Getting to the way it's supposed to be!

This commit is contained in:
2024-10-12 00:43:51 +02:00
parent 84729f9d27
commit 8f2dad9cec
2663 changed files with 540071 additions and 14 deletions

View File

@@ -0,0 +1,472 @@
import re
from typing import Optional, Callable, Any, Iterable, NamedTuple, Union, List, Dict, Tuple
import typing
try:
regex_type = re.Pattern
except AttributeError:
try:
regex_type = re.RegexObject
except AttributeError:
regex_type = type(re.compile(''))
Matcher = Callable[[str, int], int]
def make_regex_matcher(regex) -> Matcher:
def matcher(text: str, begin: int) -> int:
m = regex.match(text, begin)
if m:
return m.end()
else:
return -1
return matcher
def make_literal_matcher(literal) -> Matcher:
def matcher(text: str, begin: int) -> int:
if text.startswith(literal, begin):
return begin + len(literal)
else:
return -1
return matcher
def never_matcher(text: str, begin: int) -> int:
return -1
def always_one_matcher(text: str, begin: int) -> int:
return begin + 1
class Location(NamedTuple):
filename: str
source: str
begin: int
end: int
line: int
column: int
def __str__(self):
if self.filename:
return f"{self.filename}:{self.line}:{self.column}"
else:
return f"{self.line}:{self.column}"
class ParserHint(NamedTuple):
location: Location
message: str
class ParseError(Exception):
def __init__(self, loc: Location, message: str, hints: Iterable[ParserHint]):
msg = f"{loc}: {message}"
if hints:
lines = (f".. while parsing {h.message} at {h.location}" for h in reversed(hints))
msg = msg + "\n" + "\n".join(lines)
super().__init__(msg)
self.loc = loc
class Rule:
"""Rule to match tokens
"""
def __init__(self, name: str, matcher: Matcher=never_matcher, literal: str="", value:Any=None, ignore:bool=False):
self.name = name
self.matcher = matcher
self.literal = literal
if value is None or callable(value):
self.valuer = value
else:
self.valuer = lambda s: value
self.ignore = bool(ignore)
def __repr__(self):
return "Rule({!r})".format(self.name)
def __str__(self):
return "{!r}".format(self.name)
# Special rules for begin and end
Begin = Rule("begin-of-file")
End = Rule("end-of-file")
Synthetic = Rule("synthetic")
Error = Rule("error")
def make_matcher_from_pattern(pattern: Any) -> Matcher:
if isinstance(pattern, str):
# Compile strings to regex
regex = re.compile(pattern, re.ASCII)
return make_regex_matcher(regex)
elif isinstance(pattern, regex_type):
# Already compiled regex
return make_regex_matcher(pattern)
elif callable(pattern):
# Custom matcher function
return pattern
else:
raise TypeError('Invalid type for rule pattern {!r}'.format(type(pattern)))
class Lexer(object):
def __init__(self):
self.global_rules = []
self.prefix_rules = {}
self.lexer_type = SourceLexer
def add_rule(self, rule: Rule, prefix:Iterable[str]=None):
if prefix:
for pre in prefix:
if not isinstance(pre, str):
raise TypeError("Prefixes must be an iterable of str")
if len(pre) > 1:
raise ValueError('Prefixes must be single characters')
rules = self.prefix_rules.setdefault(pre, [])
rules.append(rule)
else:
self.global_rules.append(rule)
def rule(self, name: str, pattern: Any, *, value:Any=None, prefix:Optional[Iterable[str]]=None):
matcher = make_matcher_from_pattern(pattern)
rule = Rule(name, matcher, "", value, ignore=False)
self.add_rule(rule, prefix)
return rule
def ignore(self, name: str, pattern: Any, *, value:Any=None, prefix:Optional[Iterable[str]]=None):
matcher = make_matcher_from_pattern(pattern)
rule = Rule(name, matcher, "", value, ignore=True)
self.add_rule(rule, prefix)
return rule
def ignore_whitespace(self, *, ignore_newline=True):
spaces = " \t\v\r"
if ignore_newline:
spaces += "\n"
regex = re.compile(f"[{re.escape(spaces)}]+")
self.ignore("whitespace", regex, prefix=spaces)
def literal(self, literal: str, value: Any=None):
if not isinstance(literal, str):
raise TypeError('Literals must be strings, got {!r}'.format(type(literal)))
if not literal:
raise ValueError('Empty literal')
if len(literal) == 1:
# Prefix match is full match
matcher = always_one_matcher
else:
matcher = make_literal_matcher(literal)
rule = Rule(repr(literal), matcher, literal, value)
self.add_rule(rule, literal[0])
return rule
def literals(self, *args: str):
return [self.literal(arg) for arg in args]
def make(self, source: str, filename: str=""):
return self.lexer_type(self, source, filename)
class Token:
__slots__ = ["rule", "location", "value", "_text"]
def __init__(self, rule: Rule, location: Location):
self.rule = rule
self.location = location
self.value = None
self._text = None
if rule.valuer:
self.value = rule.valuer(self.text())
def text(self) -> str:
if self._text is None:
loc = self.location
self._text = loc.source[loc.begin:loc.end]
return self._text
def __str__(self) -> str:
loc = self.location
length = loc.end - loc.begin
if self.rule.literal or length > 20:
return self.rule.name
else:
return f"{self.rule.name} {self.text()!r}"
def __repr__(self) -> str:
return f"Token({self.rule.name!r}"
def synthetic(text: str):
length = len(text)
loc = Location("", text, 0, length, 1, 1)
return Token(Synthetic, loc)
class SourceLexer:
def __init__(self, lexer: Lexer, source: str, filename:str=""):
self.pos = 0
self.lexer = lexer
self.source = source
self.source_length = len(source)
self.filename = filename
self.line = 1
self.line_end = 0
def scan(self) -> Token:
pos = self.pos
source_end = self.source_length
lexer = self.lexer
source = self.source
global_rules = lexer.global_rules
while pos < source_end:
prefix = source[pos]
prefix_rules = lexer.prefix_rules.get(prefix)
best_rule = None
best_end = -1
if prefix_rules:
for rule in prefix_rules:
end = rule.matcher(source, pos)
if end >= best_end:
best_rule = rule
best_end = end
for rule in global_rules:
end = rule.matcher(source, pos)
if end >= best_end:
best_rule = rule
best_end = end
column = pos - self.line_end + 1
while self.line_end < best_end:
line_end = source.find("\n", self.line_end, best_end)
if line_end < 0: break
self.line_end = line_end + 1
self.line += 1
if best_end < 0:
loc = Location(self.filename, source, pos, pos + 1, self.line, column)
return Token(Error, loc)
if best_rule.ignore:
pos = best_end
else:
self.pos = best_end
loc = Location(self.filename, source, pos, best_end, self.line, column)
return Token(best_rule, loc)
loc = Location(self.filename, source, source_end, source_end + 1, self.line + 1, 1)
return Token(End, loc)
def format_rule(rule):
if isinstance(rule, list):
return 'any of ({})'.format(', '.join(format_rule(r) for r in rule))
elif isinstance(rule, Rule):
return rule.name
elif isinstance(rule, str):
return repr(rule)
else:
raise TypeError(f'Unsupported rule type {repr(type(rule))}')
def format_message(msg):
return " " + msg if msg else ""
class ParserHintContext:
def __init__(self, parser: "Parser", token_or_loc: Union[Token, Location], message: str):
self.parser = parser
if hasattr(token_or_loc, "location"):
self.location = token_or_loc.location
else:
self.location = token_or_loc
self.message = message
def __enter__(self):
self.parser.hint_stack.append(ParserHint(self.location, self.message))
def __exit__(self, type, value, traceback):
self.parser.hint_stack.pop()
class Parser:
def __init__(self, lexer: Lexer, source: str, filename:str=""):
begin_loc = Location(filename, source, 0, 0, 1, 1)
self.lexer = lexer
self.source_lexer = lexer.make(source, filename)
self.prev_token = Token(Begin, begin_loc)
self.token = self.source_lexer.scan()
self.hint_stack = []
def scan(self):
if self.token.rule is not End:
self.prev_token = self.token
self.token = self.source_lexer.scan()
if self.token.rule is Error:
self.fail(f"Bad token starting with {self.token.text()!r}")
return self.prev_token
def peek(self, rule: Any) -> Optional[Token]:
if isinstance(rule, list):
for r in rule:
tok = self.peek(r)
if tok: return tok
elif isinstance(rule, Rule):
if self.token.rule == rule:
return self.token
elif isinstance(rule, str):
if self.token.rule.literal == rule:
return self.token
else:
raise TypeError(f'Unsupported rule type {type(rule)!r}')
def accept(self, rule) -> Optional[Token]:
tok = self.peek(rule)
if tok:
self.scan()
return tok
else:
return None
def fail_at(self, location: Location, message: str):
raise ParseError(location, message, self.hint_stack)
def fail(self, message: str):
self.fail_at(self.token.location, message)
def fail_prev(self, message: str):
self.fail_at(self.prev_token.location, message)
def fail_got(self, message: str):
self.fail_at(self.token.location, message + f", got {self.token}")
def fail_prev_got(self, message: str):
self.fail_at(self.prev_token.location, message + f", got {self.prev_token}")
def require(self, rule, message: str="") -> Token:
tok = self.accept(rule)
if tok:
return tok
else:
fr, fm = format_rule, format_message
self.fail_got(f"Expected {fr(rule)}{fm(message)}")
def sep(self, sep, message="") -> Iterable[int]:
n = 0
yield n
while self.accept(sep):
yield n
n += 1
def until(self, end, message="") -> Iterable[int]:
n = 0
while not self.accept(end):
yield n
n += 1
def sep_until(self, sep, end, message="") -> Iterable[int]:
n = 0
while not self.accept(end):
if n > 0 and not self.accept(sep):
fr, fm = format_rule, format_message
self.fail_got(f"Expected {fr(sep)} or {fr(end)}{fm(message)}")
yield n
n += 1
def ignore(self, rule) -> int:
n = 0
while self.accept(rule):
n += 1
return n
def hint(self, token_or_loc: Union[Token, Location], message: str):
return ParserHintContext(self, token_or_loc, message)
get_origin = getattr(typing, "get_origin", lambda o: getattr(o, "__origin__", None))
get_args = getattr(typing, "get_args", lambda o: getattr(o, "__args__", None))
class AstField(NamedTuple):
name: str
base: type
optional: bool
sequence: bool
def make_ast_field(name, base):
origin, args = get_origin(base), get_args(base)
optional = sequence = False
if origin == Union and len(args) == 2 and type(None) in args:
base = args[args.index(type(None)) ^ 1]
optional = True
origin, args = get_origin(base), get_args(base)
if origin == List:
base = args[0]
sequence = True
elif origin:
base = object
return AstField(name, base, optional, sequence)
class Ast:
def __init__(self, *args, **kwargs):
cls = type(self)
if len(args) > len(cls.fields):
raise TypeError(f"Too many fields for {cls.__name__}: {len(args)}, expected {len(cls.fields)}")
for field, arg in zip(cls.fields, args):
setattr(self, field.name, arg)
for name, arg in kwargs.items():
setattr(self, name, arg)
for field in cls.fields:
try:
value = getattr(self, field.name)
if field.optional and value is None:
continue
if field.sequence:
for ix, v in enumerate(value):
if not isinstance(v, field.base):
raise TypeError(f"Trying to assign '{type(v).__name__}' to '{cls.__name__}' field '{field.name}: {field.base.__name__}' index [{ix}]")
else:
if not isinstance(value, field.base):
raise TypeError(f"Trying to assign '{type(value).__name__}' to '{cls.__name__}' field '{field.name}: {field.base.__name__}'")
except AttributeError:
raise ValueError(f"'{cls.__name__}' requires field '{field.name}: {field.base.__name__}'")
def __init_subclass__(cls, **kwargs):
fields = getattr(cls, "__annotations__", {})
cls.fields = [make_ast_field(k, v) for k,v in fields.items()]
super().__init_subclass__(**kwargs)
def _imp_dump(self, result, indent):
cls = type(self)
indent_str = " " * indent
result += (cls.__name__, "(")
first = True
num_asts = 0
for field in cls.fields:
if issubclass(field.base, Ast):
num_asts += 1
continue
if not first: result.append(", ")
first = False
result += (field.name, "=", str(getattr(self, field.name, None)))
for field in cls.fields:
if not issubclass(field.base, Ast): continue
if num_asts > 1:
result += ("\n", indent_str, " ")
else:
if not first: result.append(", ")
result.append(field.name)
result.append("=")
attr = getattr(self, field.name, None)
if not attr:
result.append("None")
continue
if field.sequence:
result.append("[")
seq_indent = 1 if num_asts == 1 else 2
print(seq_indent)
for ast in getattr(self, field.name, None):
result += ("\n", indent_str, " " * seq_indent)
ast._imp_dump(result, indent + seq_indent)
result += ("\n", indent_str, " ]")
else:
attr._imp_dump(result, indent + 1)
result += ")"
def dump(self, indent=0):
result = []
self._imp_dump(result, indent)
return "".join(result)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,804 @@
import parsette
import string
from typing import List, Optional, NamedTuple, Union
import json
import argparse
import os
import re
lexer = parsette.Lexer()
lexer.ignore_whitespace()
TEnd = parsette.End
TIdent = lexer.rule("identifier", r"[A-Za-z_][A-Za-z0-9_]*", prefix=string.ascii_letters+"_")
TNumber = lexer.rule("number", r"(0[Xx][0-9A-Fa-f]+)|([0-9]+)", prefix=string.digits)
TComment = lexer.rule("comment", r"//[^\r\n]*", prefix="/")
TPreproc = lexer.rule("preproc", r"#[^\n\\]*(\\\r?\n[^\n\\]*?)*\n", prefix="#")
TString = lexer.rule("string", r"\"[^\"]*\"", prefix="\"")
lexer.literals(*"const typedef struct union enum extern ufbx_abi ufbx_inline ufbx_nullable ufbx_abi ufbx_unsafe UFBX_LIST_TYPE UFBX_ENUM_REPR UFBX_FLAG_REPR UFBX_ENUM_FORCE_WIDTH UFBX_FLAG_FORCE_WIDTH UFBX_ENUM_TYPE".split())
lexer.literals(*",.*[]{}()<>=-;")
lexer.ignore("disable", re.compile(r"//\s*bindgen-disable.*?//\s*bindgen-enable", flags=re.DOTALL))
Token = parsette.Token
Ast = parsette.Ast
class AType(Ast):
pass
class AName(Ast):
pass
class ATop(Ast):
pass
class AStructDecl(Ast):
pass
class AEnumDecl(Ast):
pass
class ADecl(Ast):
type: AType
names: List[AName]
end_line: Optional[int] = None
class ANamePointer(AName):
inner: AName
class ANameArray(AName):
inner: AName
length: Optional[Token]
class ANameIdent(AName):
ident: Token
class ANameFunction(AName):
inner: AName
args: List[ADecl]
class ANameAnonymous(AName):
pass
class ATypeConst(AType):
inner: AType
class ATypeSpec(AType):
inner: AType
spec: Token
class ATypeIdent(AType):
name: Token
class ATypeStruct(AType):
kind: Token
name: Optional[Token]
decls: Optional[List[AStructDecl]]
class ATypeEnum(AType):
kind: Token
name: Optional[Token]
decls: Optional[List[AEnumDecl]]
class AStructComment(AStructDecl):
comments: List[Token]
class AStructField(AStructDecl):
decl: ADecl
class AEnumComment(AEnumDecl):
comments: List[Token]
class AEnumValue(AEnumDecl):
name: Token
value: Optional[Token]
class ATopPreproc(ATop):
preproc: Token
class ATopComment(ATop):
comments: List[Token]
class ATopDecl(ATop):
decl: ADecl
class ATopExtern(ATop):
decl: ADecl
class ATopTypedef(ATop):
decl: ADecl
class ATopFile(ATop):
tops: List[ATop]
class ATopList(ATop):
name: Token
type: ADecl
class ATopEnumType(ATop):
enum_type: Token
prefix: Token
last_value: Token
class Parser(parsette.Parser):
def __init__(self, source, filename=""):
super().__init__(lexer, source, filename)
def finish_comment(self, comment_type, first):
comments = [first]
line = first.location.line + 1
while self.peek(TComment) and self.token.location.line == line:
comments.append(self.scan())
line += 1
return comment_type(comments)
def accept_impl(self) -> bool:
if self.token.rule != TIdent: return False
text = self.token.text()
if not text.startswith("UFBX_"): return False
if not text.endswith("_IMPL"): return False
self.scan()
return True
def finish_struct(self, kind) -> ATypeStruct:
kn = kind.text()
name = self.accept(TIdent)
if self.accept("{"):
fields = []
loc = name if name else kind
with self.hint(loc, f"{kn} {name.text()}" if name else f"anonymous {kn}"):
while not self.accept("}"):
if self.accept(TComment):
fields.append(self.finish_comment(AStructComment, self.prev_token))
elif self.accept_impl():
self.require("(", "for macro parameters")
self.finish_macro_params()
else:
decl = self.parse_decl(f"{kn} field")
field = AStructField(decl)
fields.append(field)
self.require(";", f"after {kn} field")
else:
fields = None
return ATypeStruct(kind, name, fields)
def parse_enum_decl(self) -> AEnumDecl:
if self.accept(TComment):
return self.finish_comment(AEnumComment, self.prev_token)
else:
name = self.require(TIdent, "enum value name")
value = None
if self.accept("="):
value = self.require([TIdent, TNumber], f"'{name.text()}' value")
return AEnumValue(name, value)
def finish_enum(self, kind) -> ATypeStruct:
kn = kind.text()
name = self.accept(TIdent)
self.require(["UFBX_ENUM_REPR", "UFBX_FLAG_REPR"], "enum repr macro")
if self.accept("{"):
decls = []
loc = name if name else kind
has_force_width = False
with self.hint(loc, f"{kn} {name.text()}" if name else f"anonymous {kn}"):
while not self.accept("}"):
if self.accept(","):
continue
if self.accept(["UFBX_ENUM_FORCE_WIDTH", "UFBX_FLAG_FORCE_WIDTH"]):
self.require("(", "for FORCE_WIDTH macro parameters")
self.require(TIdent, "for FORCE_WIDTH macro name")
self.require(")", "for FORCE_WIDTH macro parameters")
has_force_width = True
continue
decls.append(self.parse_enum_decl())
if not has_force_width:
self.fail_at(self.prev_token, "enum missing FORCE_WIDTH macro")
else:
decls = None
return ATypeEnum(kind, name, decls)
def parse_type(self) -> AType:
token = self.token
if self.accept("const"):
inner = self.parse_type()
return ATypeConst(inner)
elif self.accept(["ufbx_nullable", "ufbx_abi", "ufbx_unsafe", "ufbx_inline"]):
inner = self.parse_type()
return ATypeSpec(inner, token)
elif self.accept(["struct", "union"]):
return self.finish_struct(self.prev_token)
elif self.accept("enum"):
return self.finish_enum(self.prev_token)
elif self.accept(TIdent):
return ATypeIdent(self.prev_token)
else:
self.fail_got("expected a type")
def parse_name_non_array(self, ctx, allow_anonymous=False) -> AName:
if self.accept("*"):
inner = self.parse_name_non_array(ctx, allow_anonymous)
return ANamePointer(inner)
if allow_anonymous and not self.peek(TIdent):
return ANameAnonymous()
else:
name = self.require(TIdent, f"for {ctx} name")
return ANameIdent(name)
def parse_name(self, ctx, allow_anonymous=False) -> AName:
ast = self.parse_name_non_array(ctx, allow_anonymous)
while True:
if self.accept("["):
length = self.accept([TIdent, TNumber])
self.require("]", f"for opening [")
ast = ANameArray(ast, length)
elif self.accept("("):
args = []
while not self.accept(")"):
args.append(self.parse_decl("argument", allow_list=False, allow_anonymous=True))
self.accept(",")
ast = ANameFunction(ast, args)
else:
break
return ast
def parse_decl(self, ctx, allow_anonymous=False, allow_list=True) -> ADecl:
typ = self.parse_type()
names = []
if not self.peek(";"):
if allow_list:
for _ in self.sep(","):
names.append(self.parse_name(ctx, allow_anonymous))
else:
names.append(self.parse_name(ctx, allow_anonymous))
return ADecl(typ, names)
def finish_top_list(self) -> ATopList:
self.require("(", "for macro parameters")
name = self.require(TIdent, "for list type name")
self.require(",", "for macro parameters")
decl = self.parse_decl("UFBX_TOP_LIST type", allow_anonymous=True, allow_list=False)
self.require(")", "for macro parameters")
return ATopList(name, decl)
def finish_top_enum_type(self) -> ATopEnumType:
self.require("(", "for macro parameters")
enum_name = self.require(TIdent, "for enum type name")
self.require(",", "for macro parameters")
prefix = self.require(TIdent, "for enum prefix")
self.require(",", "for macro parameters")
last_value = self.require(TIdent, "for enum last value")
self.require(")", "for macro parameters")
return ATopEnumType(enum_name, prefix, last_value)
def finish_macro_params(self):
while not self.accept(")"):
if self.accept(TEnd): self.fail("Unclosed macro parameters")
if self.accept("("):
self.finish_macro_params()
else:
self.scan()
def parse_top(self) -> List[ATop]:
if self.accept(TPreproc):
return [ATopPreproc(self.prev_token)]
elif self.accept(TComment):
return [self.finish_comment(ATopComment, self.prev_token)]
elif self.accept("typedef"):
decl = self.parse_decl("typedef")
self.require(";", "after typedef")
decl.end_line = self.prev_token.location.line
return [ATopTypedef(decl)]
elif self.accept("extern"):
if self.accept(TString):
self.require("{", "for extern ABI block")
tops = []
while not self.accept("}"):
tops += self.parse_top()
return tops
else:
decl = self.parse_decl("extern")
self.require(";", "after extern")
decl.end_line = self.prev_token.location.line
return [ATopExtern(decl)]
elif self.accept("UFBX_LIST_TYPE"):
tl = self.finish_top_list()
self.require(";", "after UFBX_LIST_TYPE()")
return [tl]
elif self.accept("UFBX_ENUM_TYPE"):
tl = self.finish_top_enum_type()
self.require(";", "after UFBX_ENUM_TYPE()")
return [tl]
else:
decl = self.parse_decl("top-level")
if self.accept("{"):
level = 1
while level > 0:
if self.accept("{"):
level += 1
elif self.accept("}"):
level -= 1
else:
self.scan()
decl.end_line = self.prev_token.location.line
else:
self.require(";", "after top-level declaration")
decl.end_line = self.prev_token.location.line
return [ATopDecl(decl)]
def parse_top_file(self) -> ATopFile:
tops = []
while not self.accept(parsette.End):
if self.ignore(TEnd): continue
tops += self.parse_top()
return ATopFile(tops)
def fmt_type(type: AType):
if isinstance(type, ATypeIdent):
return type.name.text()
elif isinstance(type, ATypeConst):
return f"const {fmt_type(type.inner)}"
elif isinstance(type, ATypeSpec):
return f"{type.spec.text()} {fmt_type(type.inner)}"
class SMod: pass
class SModConst(SMod): pass
class SModNullable(SMod): pass
class SModInline(SMod): pass
class SModAbi(SMod): pass
class SModUnsafe(SMod): pass
class SModPointer(SMod): pass
class SModArray(SMod):
def __init__(self, length: Optional[str]):
self.length = length
class SModFunction(SMod):
def __init__(self, args: List["SDecl"]):
self.args = args
class SComment(NamedTuple):
line_begin: int
line_end: int
text: List[str]
class SType(NamedTuple):
kind: str
name: Optional[str]
mods: List[SMod] = []
body: Union["SStruct", "SEnum", "SEnumType", None] = None
class SName(NamedTuple):
name: Optional[str]
type: SType
value: Optional[str] = None
class SDecl(NamedTuple):
line_begin: int
line_end: int
kind: str
names: List[SName]
comment: Optional[SComment] = None
comment_inline: bool = False
is_function: bool = False
define_args: Optional[List[str]] = None
value: Optional[str] = None
class SDeclGroup(NamedTuple):
line: int
decls: List[SDecl]
comment: Optional[SComment] = None
comment_inline: bool = False
is_function: bool = False
SCommentDecl = Union[SComment, SDecl, SDeclGroup]
class SStruct(NamedTuple):
line: int
kind: str
name: Optional[str]
decls: List[SCommentDecl]
is_list: bool = False
class SEnum(NamedTuple):
line: int
name: Optional[str]
decls: List[SCommentDecl]
class SEnumType(NamedTuple):
line: int
enum_name: str
enum_prefix: str
last_value: str
def type_line(typ: AType):
if isinstance(typ, ATypeIdent):
return typ.name.location.line
elif isinstance(typ, ATypeConst):
return type_line(typ.inner)
elif isinstance(typ, ATypeStruct):
return typ.kind.location.line
elif isinstance(typ, ATypeEnum):
return typ.kind.location.line
elif isinstance(typ, ATypeSpec):
return type_line(typ.inner)
else:
raise TypeError(f"Unhandled type {type(typ).__name__}")
spec_to_mod = {
"ufbx_abi": SModAbi,
"ufbx_nullable": SModNullable,
"ufbx_inline": SModInline,
"ufbx_unsafe": SModUnsafe,
}
def to_stype(typ: AType) -> SType:
if isinstance(typ, ATypeIdent):
return SType("name", typ.name.text())
elif isinstance(typ, ATypeConst):
st = to_stype(typ.inner)
return st._replace(mods=st.mods + [SModConst()])
elif isinstance(typ, ATypeSpec):
st = to_stype(typ.inner)
spec = typ.spec.text()
return st._replace(mods=st.mods + [spec_to_mod[spec]()])
elif isinstance(typ, ATypeStruct):
body = to_sstruct(typ) if typ.decls is not None else None
return SType(typ.kind.text(), typ.name.text() if typ.name else None, body=body)
elif isinstance(typ, ATypeEnum):
body = to_senum(typ) if typ.decls is not None else None
return SType("enum", typ.name.text() if typ.name else None, body=body)
else:
raise TypeError(f"Unhandled type {type(typ).__name__}")
def name_to_stype(base: SType, name: AName) -> SType:
if isinstance(name, ANamePointer):
st = name_to_stype(base, name.inner)
return st._replace(mods=st.mods + [SModPointer()])
elif isinstance(name, ANameArray):
st = name_to_stype(base, name.inner)
mod = SModArray(name.length.text() if name.length else None)
return st._replace(mods=st.mods + [mod])
elif isinstance(name, ANameFunction):
st = name_to_stype(base, name.inner)
mod = SModFunction([to_sdecl(a, "argument") for a in name.args])
return st._replace(mods=st.mods + [mod])
elif isinstance(name, ANameIdent):
return base
elif isinstance(name, ANameAnonymous):
return base
else:
raise TypeError(f"Unhandled type {type(name)}")
def name_str(name: AName):
if isinstance(name, ANameIdent):
return name.ident.text()
elif isinstance(name, ANameAnonymous):
return None
elif isinstance(name, ANamePointer):
return name_str(name.inner)
elif isinstance(name, ANameArray):
return name_str(name.inner)
elif isinstance(name, ANameFunction):
return name_str(name.inner)
else:
raise TypeError(f"Unhandled type {type(name)}")
def to_sdecl(decl: ADecl, kind: str) -> SDecl:
names = []
is_function = False
base_st = to_stype(decl.type)
for name in decl.names:
st = name_to_stype(base_st, name)
if any(isinstance(mod, SModFunction) for mod in st.mods):
is_function = True
names.append(SName(name_str(name), st))
if not decl.names:
names.append(SName(None, base_st))
line = type_line(decl.type)
end_line = decl.end_line
if end_line is None: end_line = line
return SDecl(line, end_line, kind, names, is_function=is_function)
Comment = List[str]
def to_scomment(comment: Ast):
if not comment: return None
begin = comment.comments[0].location.line
end = comment.comments[-1].location.line
text = [c.text()[3:] for c in comment.comments]
return SComment(begin, end, text)
def to_sstruct(struct: ATypeStruct) -> SStruct:
decls = []
for decl in struct.decls:
if isinstance(decl, AStructComment):
decls.append(to_scomment(decl))
elif isinstance(decl, AStructField):
decls.append(to_sdecl(decl.decl, "field"))
line = struct.kind.location.line
name = struct.name.text() if struct.name else None
kind = struct.kind.text()
return SStruct(line, kind, name, decls)
def to_senum(enum: ATypeEnum) -> SEnum:
decls = []
name = enum.name.text() if enum.name else None
for decl in enum.decls:
if isinstance(decl, AEnumComment):
decls.append(to_scomment(decl))
elif isinstance(decl, AEnumValue):
line = decl.name.location.line
decls.append(SDecl(
line_begin=line,
line_end=line,
kind="enumValue",
value=decl.value.text() if decl.value else None,
names=[
SName(
name=decl.name.text(),
type=SType("enum", name),
value=decl.value)
]))
line = enum.kind.location.line
return SEnum(line, name, decls)
def to_sbody(typ: AType):
if isinstance(typ, ATypeStruct):
return to_sstruct(typ)
elif isinstance(typ, ATypeEnum):
return to_senum(typ)
else:
raise TypeError(f"Unhandled type {type(typ)}")
def top_sdecls(top: ATop) -> List[SCommentDecl]:
if isinstance(top, ATopFile):
decls = []
for t in top.tops:
decls += top_sdecls(t)
return decls
elif isinstance(top, ATopTypedef):
return [to_sdecl(top.decl, "typedef")]
elif isinstance(top, ATopExtern):
return [to_sdecl(top.decl, "extern")]
elif isinstance(top, ATopDecl):
return [to_sdecl(top.decl, "toplevel")]
elif isinstance(top, ATopComment):
return [to_scomment(top)]
elif isinstance(top, ATopList):
line = top.name.location.line
name = top.name.text()
st = to_stype(top.type.type)
st = name_to_stype(st, top.type.names[0])
return [SDecl(line, line, "list", [SName(None, SType("struct", name,
body=SStruct(line, "struct", name, [
SDecl(line, line, "field", [SName("data", st._replace(mods=st.mods+[SModPointer()]))]),
SDecl(line+1, line+1, "field", [SName("count", SType("name", "size_t"))]),
], is_list=True)
))])]
elif isinstance(top, ATopEnumType):
line = top.enum_type.location.line
name = top.prefix.text() + "_COUNT"
return [SDecl(line, line, "enumCount",
[SName(name, SType("enumType", "enumType", body=SEnumType(
line, top.enum_type.text(), top.prefix.text(), top.last_value.text())
)
)]
)]
elif isinstance(top, ATopPreproc):
line = top.preproc.location.line
text = top.preproc.text()
m = re.match(r"#\s*define\s+(\w+)(\([^\)]*\))?\s+(.*)", text)
if m:
name = m.group(1)
args = m.group(2)
if args:
args = [arg.strip() for arg in args.split(",")]
else:
args = None
value = m.group(3)
return [SDecl(line, line, "define", [SName(name, SType("define", "define"))],
define_args=args,
value=value)]
else:
return [] # TODO
else:
raise TypeError(f"Unhandled type {type(top)}")
def collect_decl_comments(decls: List[SCommentDecl]):
n = 0
while n < len(decls):
dc = decls[n:n+3]
if isinstance(dc[0], SComment):
if (len(dc) >= 2 and isinstance(dc[1], SDecl) and dc[0].line_end + 1 == dc[1].line_begin
and (len(dc) < 3 or not (isinstance(dc[2], SComment) and dc[1].line_end == dc[2].line_begin))):
yield dc[1]._replace(comment=dc[0])
n += 2
else:
yield dc[0]
n += 1
else:
if len(dc) >= 2 and isinstance(dc[1], SComment) and dc[0].line_end == dc[1].line_begin:
comment = dc[1]._replace(text=[re.sub("^\s*<\s*", "", t) for t in dc[1].text])
yield dc[0]._replace(comment=comment, comment_inline=True)
n += 2
else:
yield dc[0]
n += 1
def collect_decl_groups(decls: List[SCommentDecl]):
n = 0
while n < len(decls):
dc = decls[n]
if isinstance(dc, SDecl) and not dc.comment_inline and not (dc.names and dc.names[0].type.body):
group = [dc]
line = dc.line_end + 1
n += 1
while n < len(decls):
dc2 = decls[n]
if not isinstance(dc2, SDecl): break
if dc2.comment: break
if dc2.line_begin != line: break
if dc2.names and dc2.names[0].type.body: break
if dc2.is_function != dc.is_function: break
group.append(dc2)
line = dc2.line_end + 1
n += 1
group[0] = dc._replace(comment=None)
comment_inline = len(group) == 1 and dc.comment_inline
yield SDeclGroup(dc.line_begin, group, dc.comment, comment_inline, dc.is_function)
elif isinstance(dc, SDecl) and not (dc.names and dc.names[0].type.body):
group = [dc._replace(comment=None)]
yield SDeclGroup(dc.line_begin, group, dc.comment, dc.comment_inline, dc.is_function)
n += 1
else:
yield dc
n += 1
def collect_decls(decls: List[SCommentDecl], allow_groups: bool) -> List[SCommentDecl]:
decls = list(collect_decl_comments(decls))
if allow_groups:
decls = list(collect_decl_groups(decls))
return decls
def format_arg(decl: SDecl):
name = decl.names[0]
return {
"type": format_type(name.type),
"name": name.name,
}
def format_mod(mod: SMod):
if isinstance(mod, SModConst):
return { "type": "const" }
elif isinstance(mod, SModNullable):
return { "type": "nullable" }
elif isinstance(mod, SModInline):
return { "type": "inline" }
elif isinstance(mod, SModAbi):
return { "type": "abi" }
elif isinstance(mod, SModPointer):
return { "type": "pointer" }
elif isinstance(mod, SModUnsafe):
return { "type": "unsafe" }
elif isinstance(mod, SModArray):
return { "type": "array", "length": mod.length }
elif isinstance(mod, SModFunction):
return { "type": "function", "args": [format_arg(d) for d in mod.args] }
else:
raise TypeError(f"Unhandled mod {type(mod)}")
def format_type(type: SType):
return {
"kind": type.kind,
"name": type.name,
"mods": [format_mod(mod) for mod in type.mods],
}
def format_name(name: SName):
return {
"type": format_type(name.type),
"name": name.name,
}
def format_decls(decls: List[SCommentDecl], allow_groups: bool):
for decl in collect_decls(decls, allow_groups):
if isinstance(decl, SComment):
yield {
"kind": "paragraph",
"comment": decl.text,
}
elif isinstance(decl, SDecl):
body = None
if decl.names and decl.names[0].type.body:
body = decl.names[0].type.body
if isinstance(body, SStruct):
yield {
"kind": "struct",
"structKind": body.kind,
"line": body.line,
"name": body.name,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
"isList": body.is_list,
"decls": list(format_decls(body.decls, allow_groups=True)),
}
elif isinstance(body, SEnum):
yield {
"kind": "enum",
"line": body.line,
"name": body.name,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
"decls": list(format_decls(body.decls, allow_groups=True)),
}
elif isinstance(body, SEnumType):
yield {
"kind": "enumType",
"line": body.line,
"enumName": body.enum_name,
"countName": body.enum_prefix + "_COUNT",
"lastValue": body.last_value,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
}
else:
for name in decl.names:
yield {
"kind": "decl",
"declKind": decl.kind,
"line": decl.line_begin,
"name": name.name,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
"isFunction": decl.is_function,
"value": decl.value,
"defineArgs": decl.define_args,
"type": format_type(name.type),
}
elif isinstance(decl, SDeclGroup):
yield {
"kind": "group",
"line": decl.line,
"name": None,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
"isFunction": decl.is_function,
"decls": list(format_decls(decl.decls, allow_groups=False)),
}
else:
raise TypeError(f"Unhandled type {type(decl)}")
if __name__ == "__main__":
parser = argparse.ArgumentParser("ufbx_parser.py")
parser.add_argument("-i", help="Input file")
parser.add_argument("-o", help="Output file")
argv = parser.parse_args()
src_path = os.path.dirname(os.path.realpath(__file__))
input_file = argv.i
if not input_file:
input_file = os.path.join(src_path, "..", "ufbx.h")
output_file = argv.o
if not output_file:
output_file = os.path.join(src_path, "build", "ufbx.json")
output_path = os.path.dirname(os.path.realpath(output_file))
if not os.path.exists(output_path):
os.makedirs(output_path, exist_ok=True)
with open(input_file) as f:
source = f.read()
p = Parser(source, "ufbx.h")
top_file = p.parse_top_file()
result = top_sdecls(top_file)
js = list(format_decls(result, allow_groups=True))
with open(output_file, "wt") as f:
json.dump(js, f, indent=2)