Files
coven/modules/ufbx/bindgen/ufbx_parser.py

805 lines
27 KiB
Python

import parsette
import string
from typing import List, Optional, NamedTuple, Union
import json
import argparse
import os
import re
lexer = parsette.Lexer()
lexer.ignore_whitespace()
TEnd = parsette.End
TIdent = lexer.rule("identifier", r"[A-Za-z_][A-Za-z0-9_]*", prefix=string.ascii_letters+"_")
TNumber = lexer.rule("number", r"(0[Xx][0-9A-Fa-f]+)|([0-9]+)", prefix=string.digits)
TComment = lexer.rule("comment", r"//[^\r\n]*", prefix="/")
TPreproc = lexer.rule("preproc", r"#[^\n\\]*(\\\r?\n[^\n\\]*?)*\n", prefix="#")
TString = lexer.rule("string", r"\"[^\"]*\"", prefix="\"")
lexer.literals(*"const typedef struct union enum extern ufbx_abi ufbx_inline ufbx_nullable ufbx_abi ufbx_unsafe UFBX_LIST_TYPE UFBX_ENUM_REPR UFBX_FLAG_REPR UFBX_ENUM_FORCE_WIDTH UFBX_FLAG_FORCE_WIDTH UFBX_ENUM_TYPE".split())
lexer.literals(*",.*[]{}()<>=-;")
lexer.ignore("disable", re.compile(r"//\s*bindgen-disable.*?//\s*bindgen-enable", flags=re.DOTALL))
Token = parsette.Token
Ast = parsette.Ast
class AType(Ast):
pass
class AName(Ast):
pass
class ATop(Ast):
pass
class AStructDecl(Ast):
pass
class AEnumDecl(Ast):
pass
class ADecl(Ast):
type: AType
names: List[AName]
end_line: Optional[int] = None
class ANamePointer(AName):
inner: AName
class ANameArray(AName):
inner: AName
length: Optional[Token]
class ANameIdent(AName):
ident: Token
class ANameFunction(AName):
inner: AName
args: List[ADecl]
class ANameAnonymous(AName):
pass
class ATypeConst(AType):
inner: AType
class ATypeSpec(AType):
inner: AType
spec: Token
class ATypeIdent(AType):
name: Token
class ATypeStruct(AType):
kind: Token
name: Optional[Token]
decls: Optional[List[AStructDecl]]
class ATypeEnum(AType):
kind: Token
name: Optional[Token]
decls: Optional[List[AEnumDecl]]
class AStructComment(AStructDecl):
comments: List[Token]
class AStructField(AStructDecl):
decl: ADecl
class AEnumComment(AEnumDecl):
comments: List[Token]
class AEnumValue(AEnumDecl):
name: Token
value: Optional[Token]
class ATopPreproc(ATop):
preproc: Token
class ATopComment(ATop):
comments: List[Token]
class ATopDecl(ATop):
decl: ADecl
class ATopExtern(ATop):
decl: ADecl
class ATopTypedef(ATop):
decl: ADecl
class ATopFile(ATop):
tops: List[ATop]
class ATopList(ATop):
name: Token
type: ADecl
class ATopEnumType(ATop):
enum_type: Token
prefix: Token
last_value: Token
class Parser(parsette.Parser):
def __init__(self, source, filename=""):
super().__init__(lexer, source, filename)
def finish_comment(self, comment_type, first):
comments = [first]
line = first.location.line + 1
while self.peek(TComment) and self.token.location.line == line:
comments.append(self.scan())
line += 1
return comment_type(comments)
def accept_impl(self) -> bool:
if self.token.rule != TIdent: return False
text = self.token.text()
if not text.startswith("UFBX_"): return False
if not text.endswith("_IMPL"): return False
self.scan()
return True
def finish_struct(self, kind) -> ATypeStruct:
kn = kind.text()
name = self.accept(TIdent)
if self.accept("{"):
fields = []
loc = name if name else kind
with self.hint(loc, f"{kn} {name.text()}" if name else f"anonymous {kn}"):
while not self.accept("}"):
if self.accept(TComment):
fields.append(self.finish_comment(AStructComment, self.prev_token))
elif self.accept_impl():
self.require("(", "for macro parameters")
self.finish_macro_params()
else:
decl = self.parse_decl(f"{kn} field")
field = AStructField(decl)
fields.append(field)
self.require(";", f"after {kn} field")
else:
fields = None
return ATypeStruct(kind, name, fields)
def parse_enum_decl(self) -> AEnumDecl:
if self.accept(TComment):
return self.finish_comment(AEnumComment, self.prev_token)
else:
name = self.require(TIdent, "enum value name")
value = None
if self.accept("="):
value = self.require([TIdent, TNumber], f"'{name.text()}' value")
return AEnumValue(name, value)
def finish_enum(self, kind) -> ATypeStruct:
kn = kind.text()
name = self.accept(TIdent)
self.require(["UFBX_ENUM_REPR", "UFBX_FLAG_REPR"], "enum repr macro")
if self.accept("{"):
decls = []
loc = name if name else kind
has_force_width = False
with self.hint(loc, f"{kn} {name.text()}" if name else f"anonymous {kn}"):
while not self.accept("}"):
if self.accept(","):
continue
if self.accept(["UFBX_ENUM_FORCE_WIDTH", "UFBX_FLAG_FORCE_WIDTH"]):
self.require("(", "for FORCE_WIDTH macro parameters")
self.require(TIdent, "for FORCE_WIDTH macro name")
self.require(")", "for FORCE_WIDTH macro parameters")
has_force_width = True
continue
decls.append(self.parse_enum_decl())
if not has_force_width:
self.fail_at(self.prev_token, "enum missing FORCE_WIDTH macro")
else:
decls = None
return ATypeEnum(kind, name, decls)
def parse_type(self) -> AType:
token = self.token
if self.accept("const"):
inner = self.parse_type()
return ATypeConst(inner)
elif self.accept(["ufbx_nullable", "ufbx_abi", "ufbx_unsafe", "ufbx_inline"]):
inner = self.parse_type()
return ATypeSpec(inner, token)
elif self.accept(["struct", "union"]):
return self.finish_struct(self.prev_token)
elif self.accept("enum"):
return self.finish_enum(self.prev_token)
elif self.accept(TIdent):
return ATypeIdent(self.prev_token)
else:
self.fail_got("expected a type")
def parse_name_non_array(self, ctx, allow_anonymous=False) -> AName:
if self.accept("*"):
inner = self.parse_name_non_array(ctx, allow_anonymous)
return ANamePointer(inner)
if allow_anonymous and not self.peek(TIdent):
return ANameAnonymous()
else:
name = self.require(TIdent, f"for {ctx} name")
return ANameIdent(name)
def parse_name(self, ctx, allow_anonymous=False) -> AName:
ast = self.parse_name_non_array(ctx, allow_anonymous)
while True:
if self.accept("["):
length = self.accept([TIdent, TNumber])
self.require("]", f"for opening [")
ast = ANameArray(ast, length)
elif self.accept("("):
args = []
while not self.accept(")"):
args.append(self.parse_decl("argument", allow_list=False, allow_anonymous=True))
self.accept(",")
ast = ANameFunction(ast, args)
else:
break
return ast
def parse_decl(self, ctx, allow_anonymous=False, allow_list=True) -> ADecl:
typ = self.parse_type()
names = []
if not self.peek(";"):
if allow_list:
for _ in self.sep(","):
names.append(self.parse_name(ctx, allow_anonymous))
else:
names.append(self.parse_name(ctx, allow_anonymous))
return ADecl(typ, names)
def finish_top_list(self) -> ATopList:
self.require("(", "for macro parameters")
name = self.require(TIdent, "for list type name")
self.require(",", "for macro parameters")
decl = self.parse_decl("UFBX_TOP_LIST type", allow_anonymous=True, allow_list=False)
self.require(")", "for macro parameters")
return ATopList(name, decl)
def finish_top_enum_type(self) -> ATopEnumType:
self.require("(", "for macro parameters")
enum_name = self.require(TIdent, "for enum type name")
self.require(",", "for macro parameters")
prefix = self.require(TIdent, "for enum prefix")
self.require(",", "for macro parameters")
last_value = self.require(TIdent, "for enum last value")
self.require(")", "for macro parameters")
return ATopEnumType(enum_name, prefix, last_value)
def finish_macro_params(self):
while not self.accept(")"):
if self.accept(TEnd): self.fail("Unclosed macro parameters")
if self.accept("("):
self.finish_macro_params()
else:
self.scan()
def parse_top(self) -> List[ATop]:
if self.accept(TPreproc):
return [ATopPreproc(self.prev_token)]
elif self.accept(TComment):
return [self.finish_comment(ATopComment, self.prev_token)]
elif self.accept("typedef"):
decl = self.parse_decl("typedef")
self.require(";", "after typedef")
decl.end_line = self.prev_token.location.line
return [ATopTypedef(decl)]
elif self.accept("extern"):
if self.accept(TString):
self.require("{", "for extern ABI block")
tops = []
while not self.accept("}"):
tops += self.parse_top()
return tops
else:
decl = self.parse_decl("extern")
self.require(";", "after extern")
decl.end_line = self.prev_token.location.line
return [ATopExtern(decl)]
elif self.accept("UFBX_LIST_TYPE"):
tl = self.finish_top_list()
self.require(";", "after UFBX_LIST_TYPE()")
return [tl]
elif self.accept("UFBX_ENUM_TYPE"):
tl = self.finish_top_enum_type()
self.require(";", "after UFBX_ENUM_TYPE()")
return [tl]
else:
decl = self.parse_decl("top-level")
if self.accept("{"):
level = 1
while level > 0:
if self.accept("{"):
level += 1
elif self.accept("}"):
level -= 1
else:
self.scan()
decl.end_line = self.prev_token.location.line
else:
self.require(";", "after top-level declaration")
decl.end_line = self.prev_token.location.line
return [ATopDecl(decl)]
def parse_top_file(self) -> ATopFile:
tops = []
while not self.accept(parsette.End):
if self.ignore(TEnd): continue
tops += self.parse_top()
return ATopFile(tops)
def fmt_type(type: AType):
if isinstance(type, ATypeIdent):
return type.name.text()
elif isinstance(type, ATypeConst):
return f"const {fmt_type(type.inner)}"
elif isinstance(type, ATypeSpec):
return f"{type.spec.text()} {fmt_type(type.inner)}"
class SMod: pass
class SModConst(SMod): pass
class SModNullable(SMod): pass
class SModInline(SMod): pass
class SModAbi(SMod): pass
class SModUnsafe(SMod): pass
class SModPointer(SMod): pass
class SModArray(SMod):
def __init__(self, length: Optional[str]):
self.length = length
class SModFunction(SMod):
def __init__(self, args: List["SDecl"]):
self.args = args
class SComment(NamedTuple):
line_begin: int
line_end: int
text: List[str]
class SType(NamedTuple):
kind: str
name: Optional[str]
mods: List[SMod] = []
body: Union["SStruct", "SEnum", "SEnumType", None] = None
class SName(NamedTuple):
name: Optional[str]
type: SType
value: Optional[str] = None
class SDecl(NamedTuple):
line_begin: int
line_end: int
kind: str
names: List[SName]
comment: Optional[SComment] = None
comment_inline: bool = False
is_function: bool = False
define_args: Optional[List[str]] = None
value: Optional[str] = None
class SDeclGroup(NamedTuple):
line: int
decls: List[SDecl]
comment: Optional[SComment] = None
comment_inline: bool = False
is_function: bool = False
SCommentDecl = Union[SComment, SDecl, SDeclGroup]
class SStruct(NamedTuple):
line: int
kind: str
name: Optional[str]
decls: List[SCommentDecl]
is_list: bool = False
class SEnum(NamedTuple):
line: int
name: Optional[str]
decls: List[SCommentDecl]
class SEnumType(NamedTuple):
line: int
enum_name: str
enum_prefix: str
last_value: str
def type_line(typ: AType):
if isinstance(typ, ATypeIdent):
return typ.name.location.line
elif isinstance(typ, ATypeConst):
return type_line(typ.inner)
elif isinstance(typ, ATypeStruct):
return typ.kind.location.line
elif isinstance(typ, ATypeEnum):
return typ.kind.location.line
elif isinstance(typ, ATypeSpec):
return type_line(typ.inner)
else:
raise TypeError(f"Unhandled type {type(typ).__name__}")
spec_to_mod = {
"ufbx_abi": SModAbi,
"ufbx_nullable": SModNullable,
"ufbx_inline": SModInline,
"ufbx_unsafe": SModUnsafe,
}
def to_stype(typ: AType) -> SType:
if isinstance(typ, ATypeIdent):
return SType("name", typ.name.text())
elif isinstance(typ, ATypeConst):
st = to_stype(typ.inner)
return st._replace(mods=st.mods + [SModConst()])
elif isinstance(typ, ATypeSpec):
st = to_stype(typ.inner)
spec = typ.spec.text()
return st._replace(mods=st.mods + [spec_to_mod[spec]()])
elif isinstance(typ, ATypeStruct):
body = to_sstruct(typ) if typ.decls is not None else None
return SType(typ.kind.text(), typ.name.text() if typ.name else None, body=body)
elif isinstance(typ, ATypeEnum):
body = to_senum(typ) if typ.decls is not None else None
return SType("enum", typ.name.text() if typ.name else None, body=body)
else:
raise TypeError(f"Unhandled type {type(typ).__name__}")
def name_to_stype(base: SType, name: AName) -> SType:
if isinstance(name, ANamePointer):
st = name_to_stype(base, name.inner)
return st._replace(mods=st.mods + [SModPointer()])
elif isinstance(name, ANameArray):
st = name_to_stype(base, name.inner)
mod = SModArray(name.length.text() if name.length else None)
return st._replace(mods=st.mods + [mod])
elif isinstance(name, ANameFunction):
st = name_to_stype(base, name.inner)
mod = SModFunction([to_sdecl(a, "argument") for a in name.args])
return st._replace(mods=st.mods + [mod])
elif isinstance(name, ANameIdent):
return base
elif isinstance(name, ANameAnonymous):
return base
else:
raise TypeError(f"Unhandled type {type(name)}")
def name_str(name: AName):
if isinstance(name, ANameIdent):
return name.ident.text()
elif isinstance(name, ANameAnonymous):
return None
elif isinstance(name, ANamePointer):
return name_str(name.inner)
elif isinstance(name, ANameArray):
return name_str(name.inner)
elif isinstance(name, ANameFunction):
return name_str(name.inner)
else:
raise TypeError(f"Unhandled type {type(name)}")
def to_sdecl(decl: ADecl, kind: str) -> SDecl:
names = []
is_function = False
base_st = to_stype(decl.type)
for name in decl.names:
st = name_to_stype(base_st, name)
if any(isinstance(mod, SModFunction) for mod in st.mods):
is_function = True
names.append(SName(name_str(name), st))
if not decl.names:
names.append(SName(None, base_st))
line = type_line(decl.type)
end_line = decl.end_line
if end_line is None: end_line = line
return SDecl(line, end_line, kind, names, is_function=is_function)
Comment = List[str]
def to_scomment(comment: Ast):
if not comment: return None
begin = comment.comments[0].location.line
end = comment.comments[-1].location.line
text = [c.text()[3:] for c in comment.comments]
return SComment(begin, end, text)
def to_sstruct(struct: ATypeStruct) -> SStruct:
decls = []
for decl in struct.decls:
if isinstance(decl, AStructComment):
decls.append(to_scomment(decl))
elif isinstance(decl, AStructField):
decls.append(to_sdecl(decl.decl, "field"))
line = struct.kind.location.line
name = struct.name.text() if struct.name else None
kind = struct.kind.text()
return SStruct(line, kind, name, decls)
def to_senum(enum: ATypeEnum) -> SEnum:
decls = []
name = enum.name.text() if enum.name else None
for decl in enum.decls:
if isinstance(decl, AEnumComment):
decls.append(to_scomment(decl))
elif isinstance(decl, AEnumValue):
line = decl.name.location.line
decls.append(SDecl(
line_begin=line,
line_end=line,
kind="enumValue",
value=decl.value.text() if decl.value else None,
names=[
SName(
name=decl.name.text(),
type=SType("enum", name),
value=decl.value)
]))
line = enum.kind.location.line
return SEnum(line, name, decls)
def to_sbody(typ: AType):
if isinstance(typ, ATypeStruct):
return to_sstruct(typ)
elif isinstance(typ, ATypeEnum):
return to_senum(typ)
else:
raise TypeError(f"Unhandled type {type(typ)}")
def top_sdecls(top: ATop) -> List[SCommentDecl]:
if isinstance(top, ATopFile):
decls = []
for t in top.tops:
decls += top_sdecls(t)
return decls
elif isinstance(top, ATopTypedef):
return [to_sdecl(top.decl, "typedef")]
elif isinstance(top, ATopExtern):
return [to_sdecl(top.decl, "extern")]
elif isinstance(top, ATopDecl):
return [to_sdecl(top.decl, "toplevel")]
elif isinstance(top, ATopComment):
return [to_scomment(top)]
elif isinstance(top, ATopList):
line = top.name.location.line
name = top.name.text()
st = to_stype(top.type.type)
st = name_to_stype(st, top.type.names[0])
return [SDecl(line, line, "list", [SName(None, SType("struct", name,
body=SStruct(line, "struct", name, [
SDecl(line, line, "field", [SName("data", st._replace(mods=st.mods+[SModPointer()]))]),
SDecl(line+1, line+1, "field", [SName("count", SType("name", "size_t"))]),
], is_list=True)
))])]
elif isinstance(top, ATopEnumType):
line = top.enum_type.location.line
name = top.prefix.text() + "_COUNT"
return [SDecl(line, line, "enumCount",
[SName(name, SType("enumType", "enumType", body=SEnumType(
line, top.enum_type.text(), top.prefix.text(), top.last_value.text())
)
)]
)]
elif isinstance(top, ATopPreproc):
line = top.preproc.location.line
text = top.preproc.text()
m = re.match(r"#\s*define\s+(\w+)(\([^\)]*\))?\s+(.*)", text)
if m:
name = m.group(1)
args = m.group(2)
if args:
args = [arg.strip() for arg in args.split(",")]
else:
args = None
value = m.group(3)
return [SDecl(line, line, "define", [SName(name, SType("define", "define"))],
define_args=args,
value=value)]
else:
return [] # TODO
else:
raise TypeError(f"Unhandled type {type(top)}")
def collect_decl_comments(decls: List[SCommentDecl]):
n = 0
while n < len(decls):
dc = decls[n:n+3]
if isinstance(dc[0], SComment):
if (len(dc) >= 2 and isinstance(dc[1], SDecl) and dc[0].line_end + 1 == dc[1].line_begin
and (len(dc) < 3 or not (isinstance(dc[2], SComment) and dc[1].line_end == dc[2].line_begin))):
yield dc[1]._replace(comment=dc[0])
n += 2
else:
yield dc[0]
n += 1
else:
if len(dc) >= 2 and isinstance(dc[1], SComment) and dc[0].line_end == dc[1].line_begin:
comment = dc[1]._replace(text=[re.sub("^\s*<\s*", "", t) for t in dc[1].text])
yield dc[0]._replace(comment=comment, comment_inline=True)
n += 2
else:
yield dc[0]
n += 1
def collect_decl_groups(decls: List[SCommentDecl]):
n = 0
while n < len(decls):
dc = decls[n]
if isinstance(dc, SDecl) and not dc.comment_inline and not (dc.names and dc.names[0].type.body):
group = [dc]
line = dc.line_end + 1
n += 1
while n < len(decls):
dc2 = decls[n]
if not isinstance(dc2, SDecl): break
if dc2.comment: break
if dc2.line_begin != line: break
if dc2.names and dc2.names[0].type.body: break
if dc2.is_function != dc.is_function: break
group.append(dc2)
line = dc2.line_end + 1
n += 1
group[0] = dc._replace(comment=None)
comment_inline = len(group) == 1 and dc.comment_inline
yield SDeclGroup(dc.line_begin, group, dc.comment, comment_inline, dc.is_function)
elif isinstance(dc, SDecl) and not (dc.names and dc.names[0].type.body):
group = [dc._replace(comment=None)]
yield SDeclGroup(dc.line_begin, group, dc.comment, dc.comment_inline, dc.is_function)
n += 1
else:
yield dc
n += 1
def collect_decls(decls: List[SCommentDecl], allow_groups: bool) -> List[SCommentDecl]:
decls = list(collect_decl_comments(decls))
if allow_groups:
decls = list(collect_decl_groups(decls))
return decls
def format_arg(decl: SDecl):
name = decl.names[0]
return {
"type": format_type(name.type),
"name": name.name,
}
def format_mod(mod: SMod):
if isinstance(mod, SModConst):
return { "type": "const" }
elif isinstance(mod, SModNullable):
return { "type": "nullable" }
elif isinstance(mod, SModInline):
return { "type": "inline" }
elif isinstance(mod, SModAbi):
return { "type": "abi" }
elif isinstance(mod, SModPointer):
return { "type": "pointer" }
elif isinstance(mod, SModUnsafe):
return { "type": "unsafe" }
elif isinstance(mod, SModArray):
return { "type": "array", "length": mod.length }
elif isinstance(mod, SModFunction):
return { "type": "function", "args": [format_arg(d) for d in mod.args] }
else:
raise TypeError(f"Unhandled mod {type(mod)}")
def format_type(type: SType):
return {
"kind": type.kind,
"name": type.name,
"mods": [format_mod(mod) for mod in type.mods],
}
def format_name(name: SName):
return {
"type": format_type(name.type),
"name": name.name,
}
def format_decls(decls: List[SCommentDecl], allow_groups: bool):
for decl in collect_decls(decls, allow_groups):
if isinstance(decl, SComment):
yield {
"kind": "paragraph",
"comment": decl.text,
}
elif isinstance(decl, SDecl):
body = None
if decl.names and decl.names[0].type.body:
body = decl.names[0].type.body
if isinstance(body, SStruct):
yield {
"kind": "struct",
"structKind": body.kind,
"line": body.line,
"name": body.name,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
"isList": body.is_list,
"decls": list(format_decls(body.decls, allow_groups=True)),
}
elif isinstance(body, SEnum):
yield {
"kind": "enum",
"line": body.line,
"name": body.name,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
"decls": list(format_decls(body.decls, allow_groups=True)),
}
elif isinstance(body, SEnumType):
yield {
"kind": "enumType",
"line": body.line,
"enumName": body.enum_name,
"countName": body.enum_prefix + "_COUNT",
"lastValue": body.last_value,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
}
else:
for name in decl.names:
yield {
"kind": "decl",
"declKind": decl.kind,
"line": decl.line_begin,
"name": name.name,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
"isFunction": decl.is_function,
"value": decl.value,
"defineArgs": decl.define_args,
"type": format_type(name.type),
}
elif isinstance(decl, SDeclGroup):
yield {
"kind": "group",
"line": decl.line,
"name": None,
"comment": decl.comment.text if decl.comment else [],
"commentInline": decl.comment_inline,
"isFunction": decl.is_function,
"decls": list(format_decls(decl.decls, allow_groups=False)),
}
else:
raise TypeError(f"Unhandled type {type(decl)}")
if __name__ == "__main__":
parser = argparse.ArgumentParser("ufbx_parser.py")
parser.add_argument("-i", help="Input file")
parser.add_argument("-o", help="Output file")
argv = parser.parse_args()
src_path = os.path.dirname(os.path.realpath(__file__))
input_file = argv.i
if not input_file:
input_file = os.path.join(src_path, "..", "ufbx.h")
output_file = argv.o
if not output_file:
output_file = os.path.join(src_path, "build", "ufbx.json")
output_path = os.path.dirname(os.path.realpath(output_file))
if not os.path.exists(output_path):
os.makedirs(output_path, exist_ok=True)
with open(input_file) as f:
source = f.read()
p = Parser(source, "ufbx.h")
top_file = p.parse_top_file()
result = top_sdecls(top_file)
js = list(format_decls(result, allow_groups=True))
with open(output_file, "wt") as f:
json.dump(js, f, indent=2)