FreeCAD / src /Tools /bindings /model /generateModel_Python.py
AbdulElahGwaith's picture
Upload folder using huggingface_hub
985c397 verified
# SPDX-License-Identifier: LGPL-2.1-or-later
"""Parses Python binding interface files into a typed AST model."""
from dataclasses import dataclass
from enum import Enum
import ast
import re
from typing import List
from model.typedModel import (
GenerateModel,
PythonExport,
Methode,
Attribute,
Documentation,
Author,
Parameter,
ParameterType,
SequenceProtocol,
)
SIGNATURE_SEP = re.compile(r"\s+--\s+", re.DOTALL)
SELF_CLS_ARG = re.compile(r"\(\s*(self|cls)(\s*,\s*)?")
class ArgumentKind(Enum):
PositionOnly = 0
Arg = 1
VarArg = 2
KwOnly = 3
KwArg = 4
@dataclass
class FuncArgument:
name: str
annotation: str
kind: ArgumentKind
class FunctionSignature:
"""
Parse function arguments with correct classification and order.
"""
args: list[FuncArgument]
has_keywords: bool
docstring: str
annotated_text: str
text: str
const_flag: bool = False
static_flag: bool = False
class_flag: bool = False
noargs_flag: bool = False
is_overload: bool = False
def __init__(self, func: ast.FunctionDef):
self.args = []
self.has_keywords = False
self.is_overload = False
self.docstring = ast.get_docstring(func) or ""
args = func.args
self.update_flags(func)
self.args.extend(
(
FuncArgument(
arg.arg,
self.get_annotation_str(arg.annotation),
ArgumentKind.PositionOnly,
)
for arg in args.posonlyargs
),
)
self.args.extend(
(
FuncArgument(
arg.arg,
self.get_annotation_str(arg.annotation),
ArgumentKind.Arg,
)
for arg in args.args
),
)
# tricky part to determine if there are keyword arguments or not
if args.args:
if args.args[0].arg in ("self", "cls"):
instance_args = len(args.args) > 1
else:
instance_args = True
else:
instance_args = False
self.has_keywords = bool(instance_args or args.kwonlyargs or args.kwarg)
if args.vararg:
self.args.append(
FuncArgument(
args.vararg.arg,
self.get_annotation_str(args.vararg.annotation),
ArgumentKind.VarArg,
),
)
self.args.extend(
(
FuncArgument(
arg.arg,
self.get_annotation_str(arg.annotation),
ArgumentKind.KwOnly,
)
for arg in args.kwonlyargs
),
)
if args.kwarg:
self.args.append(
FuncArgument(
args.kwarg.arg,
self.get_annotation_str(args.kwarg.annotation),
ArgumentKind.KwArg,
),
)
# Annotated signatures (Not supported by __text_signature__)
returns = ast.unparse(func.returns) if func.returns else "object"
parameters = ast.unparse(func.args)
self.annotated_text = SELF_CLS_ARG.sub("(", f"{func.name}({parameters}) -> {returns}", 1)
# Not Annotated signatures (supported by __text_signature__)
all_args = [*args.posonlyargs, *args.args, args.vararg, *args.kwonlyargs, args.kwarg]
for item in all_args:
if item:
item.annotation = None
parameters = ast.unparse(args)
self.text = SELF_CLS_ARG.sub(r"($\1\2", f"{func.name}({parameters})", 1)
def get_annotation_str(self, node: ast.AST | None) -> str:
if not node:
return "object"
return ast.unparse(node)
def update_flags(self, func: ast.FunctionDef) -> None:
for deco in func.decorator_list:
match deco:
case ast.Name(id, _):
name = id
case ast.Attribute(_, attr, _):
name = attr
case _:
continue
match name:
case "constmethod":
self.const_flag = True
case "classmethod":
self.class_flag = True
case "no_args":
self.noargs_flag = True
case "staticmethod":
self.static_flag = True
case "overload":
self.is_overload = True
class Function:
name: str
signatures: list[FunctionSignature]
def __init__(self, func: ast.FunctionDef) -> None:
self.name = func.name
self.signatures = [FunctionSignature(func)]
def update(self, func: ast.FunctionDef) -> None:
self.signatures.append(FunctionSignature(func))
@property
def docstring(self) -> str:
return "\n".join((f.docstring for f in self.signatures))
@property
def has_keywords(self) -> bool:
overloads = len(self.signatures) > 1
if overloads:
return any(sig.has_keywords for sig in self.signatures if sig.is_overload)
return self.signatures[0].has_keywords
@property
def signature(self) -> FunctionSignature | None:
"""First non overload signature"""
for sig in self.signatures:
if not sig.is_overload:
return sig
return None
@property
def static_flag(self) -> bool:
return any(sig.static_flag for sig in self.signatures)
@property
def const_flag(self) -> bool:
return any(sig.const_flag for sig in self.signatures)
@property
def class_flag(self) -> bool:
return any(sig.class_flag for sig in self.signatures)
@property
def noargs_flag(self) -> bool:
return any(sig.noargs_flag for sig in self.signatures)
def add_signature_docs(self, doc: Documentation) -> None:
if len(self.signatures) == 1:
docstring = [self.signatures[0].text]
signature = [self.signatures[0].annotated_text]
else:
docstring = [sig.text for sig in self.signatures if not sig.is_overload]
signature = [sig.annotated_text for sig in self.signatures if sig.is_overload]
if not docstring:
return
user_doc = doc.UserDocu or ""
marker = SIGNATURE_SEP.search(user_doc)
if marker:
user_doc = user_doc[marker.end() :].strip()
docstring.append("--\n") # mark __text_signature__
docstring.extend(signature) # Include real annotated signature in user docstring
docstring.append(f"\n{user_doc}") # Rest of the docstring
doc.UserDocu = "\n".join(docstring)
def _extract_decorator_kwargs(decorator: ast.expr) -> dict:
"""
Extract keyword arguments from a decorator call like `@export(Father="...", Name="...")`.
Returns them in a dict.
"""
if not isinstance(decorator, ast.Call):
return {}
result = {}
for kw in decorator.keywords:
match kw.value:
case ast.Constant(value=val):
result[kw.arg] = val
case _:
pass
return result
def _parse_docstring_for_documentation(docstring: str) -> Documentation:
"""
Given a docstring, parse out DeveloperDocu, UserDocu, Author, Licence, etc.
This is a simple heuristic-based parser. Adjust as needed for your format.
"""
dev_docu = None
user_docu = None
author_name = None
author_email = None
author_licence = None
if not docstring:
return Documentation()
import textwrap
# Remove common indentation
dedented_docstring = textwrap.dedent(docstring).strip()
lines = dedented_docstring.split("\n")
user_docu_lines = []
for raw_line in lines:
stripped_line = raw_line.strip()
if stripped_line.startswith("DeveloperDocu:"):
dev_docu = stripped_line.split("DeveloperDocu:", 1)[1].strip()
elif stripped_line.startswith("UserDocu:"):
user_docu = stripped_line.split("UserDocu:", 1)[1].strip()
elif stripped_line.startswith("Author:"):
# e.g. "Author: John Doe (john@example.com)"
author_part = stripped_line.split("Author:", 1)[1].strip()
match = re.search(r"(.*?)\s*\((.*?)\)", author_part)
if match:
author_name = match.group(1).strip()
author_email = match.group(2).strip()
else:
author_name = author_part
elif stripped_line.startswith("Licence:"):
author_licence = stripped_line.split("Licence:", 1)[1].strip()
else:
user_docu_lines.append(raw_line)
if user_docu is None:
user_docu = "\n".join(user_docu_lines)
author_obj = None
if author_name or author_email or author_licence:
author_obj = Author(
content=docstring,
Name=author_name or "",
EMail=author_email or "",
Licence=author_licence or "LGPL",
)
return Documentation(
Author=author_obj,
DeveloperDocu=dev_docu,
UserDocu=user_docu,
)
def _get_type_str(node):
"""Recursively convert an AST node for a type annotation to its string representation."""
match node:
case ast.Name(id=name):
# Handle qualified names (e.g., typing.List)
return name
case ast.Attribute(value=val, attr=attr):
# For annotations like List[str] (or Final[List[str]]), build the string recursively.
return f"{_get_type_str(val)}.{attr}"
case ast.Subscript(value=val, slice=slice_node):
value_str = _get_type_str(val)
slice_str = _get_type_str(slice_node)
return f"{value_str}[{slice_str}]"
case ast.Tuple(elts=elts):
# For multiple types (e.g., Tuple[int, str])
return ", ".join(_get_type_str(elt) for elt in elts)
case _:
# Fallback for unsupported node types
return "object"
def _python_type_to_parameter_type(py_type: str) -> ParameterType:
"""
Map a Python type annotation (as a string) to the ParameterType enum if possible.
Fallback to OBJECT if unrecognized.
"""
py_type = py_type.lower()
match py_type:
case _ if py_type in ("int", "builtins.int"):
return ParameterType.LONG
case _ if py_type in ("float", "builtins.float"):
return ParameterType.FLOAT
case _ if py_type in ("str", "builtins.str"):
return ParameterType.STRING
case _ if py_type in ("bool", "builtins.bool"):
return ParameterType.BOOLEAN
case _ if py_type.startswith(("list", "typing.list")):
return ParameterType.LIST
case _ if py_type.startswith(("dict", "typing.dict")):
return ParameterType.DICT
case _ if py_type.startswith(("callable", "typing.callable")):
return ParameterType.CALLABLE
case _ if py_type.startswith(("sequence", "typing.sequence")):
return ParameterType.SEQUENCE
case _ if py_type.startswith(("tuple", "typing.tuple")):
return ParameterType.TUPLE
case _ if py_type.startswith(("pycxxvector")):
return ParameterType.VECTOR
case _:
return ParameterType.OBJECT
def _parse_class_attributes(class_node: ast.ClassDef, source_code: str) -> List[Attribute]:
"""
Parse top-level attributes (e.g. `TypeId: str = ""`) from the class AST node.
We'll create an `Attribute` for each. For the `Documentation` of each attribute,
we might store minimal or none if there's no docstring.
"""
attributes = []
default_doc = Documentation(DeveloperDocu="", UserDocu="", Author=None)
for idx, stmt in enumerate(class_node.body):
if isinstance(stmt, ast.AnnAssign):
# e.g.: `TypeId: Final[str] = ""`
name = stmt.target.id if isinstance(stmt.target, ast.Name) else "unknown"
# Evaluate the type annotation and detect Final for read-only attributes
if isinstance(stmt.annotation, ast.Name):
# e.g. `str`
type_name = stmt.annotation.id
readonly = False
elif isinstance(stmt.annotation, ast.Subscript):
# Check if this is a Final type hint, e.g. Final[int] or typing.Final[int]
is_final = (
isinstance(stmt.annotation.value, ast.Name)
and stmt.annotation.value.id == "Final"
) or (
isinstance(stmt.annotation.value, ast.Attribute)
and stmt.annotation.value.attr == "Final"
)
if is_final:
readonly = True
# Extract the inner type from the Final[...] annotation
type_name = _get_type_str(stmt.annotation.slice)
else:
type_name = _get_type_str(stmt.annotation)
readonly = False
else:
type_name = "object"
readonly = False
param_type = _python_type_to_parameter_type(type_name)
# Look for a docstring immediately following the attribute definition.
attr_doc = default_doc
if idx + 1 < len(class_node.body):
next_stmt = class_node.body[idx + 1]
if (
isinstance(next_stmt, ast.Expr)
and isinstance(next_stmt.value, ast.Constant)
and isinstance(next_stmt.value.value, str)
):
docstring = next_stmt.value.value
# Parse the docstring to build a Documentation object.
attr_doc = _parse_docstring_for_documentation(docstring)
param = Parameter(Name=name, Type=param_type)
attr = Attribute(
Documentation=attr_doc,
Parameter=param,
Name=name,
ReadOnly=readonly,
)
attributes.append(attr)
return attributes
def _parse_methods(class_node: ast.ClassDef) -> List[Methode]:
"""
Parse methods from the class AST node, extracting:
- Method name
- Parameters (from the function signature / annotations)
- Docstring
"""
methods = []
def collect_function_defs(nodes) -> list[ast.FunctionDef]:
funcs = []
for node in nodes:
if isinstance(node, ast.FunctionDef):
funcs.append(node)
elif isinstance(node, ast.If):
funcs.extend(collect_function_defs(node.body))
funcs.extend(collect_function_defs(node.orelse))
return funcs
# Collect including overloads
functions: dict[str, Function] = {}
for func_node in collect_function_defs(class_node.body):
if func := functions.get(func_node.name):
func.update(func_node)
else:
functions[func_node.name] = Function(func_node)
for func in functions.values():
doc_obj = _parse_docstring_for_documentation(func.docstring)
func.add_signature_docs(doc_obj)
method_params = []
signature = func.signature
if signature is None:
continue
# Process positional parameters (skipping self/cls)
for arg_i, arg in enumerate(signature.args):
param_name = arg.name
if arg_i == 0 and param_name in ("self", "cls"):
continue
param_type = _python_type_to_parameter_type(arg.annotation)
method_params.append(Parameter(Name=param_name, Type=param_type))
method = Methode(
Name=func.name,
Documentation=doc_obj,
Parameter=method_params,
Const=func.const_flag,
Static=func.static_flag,
Class=func.class_flag,
Keyword=func.has_keywords,
NoArgs=func.noargs_flag,
)
methods.append(method)
return methods
def _get_module_from_path(path: str) -> str:
"""
Returns the name of the FreeCAD module from the path.
Examples:
.../src/Base/Persistence.py -> "Base"
.../src/Mod/CAM/Path/__init__.py -> "CAM"
"""
# 1. Split the path by the OS separator.
import os
parts = path.split(os.sep)
# 2. Attempt to find "src" in the path components.
try:
idx_src = len(parts) - 1 - list(reversed(parts)).index("src")
except ValueError:
# If "src" is not found, we cannot determine the module name.
return None
# 3. Check if there is a path component immediately after "src".
# If there isn't, we have nothing to return.
if idx_src + 1 >= len(parts):
return None
next_part = parts[idx_src + 1]
# 4. If the next component is "Mod", then the module name is the
# component AFTER "Mod" (e.g. "CAM" in "Mod/CAM").
if next_part == "Mod":
if idx_src + 2 < len(parts):
return parts[idx_src + 2]
else:
# "Mod" is the last component
return None
else:
# 5. Otherwise, if it's not "Mod", we treat that next component
# itself as the module name (e.g. "Base").
return next_part
def _extract_module_name(import_path: str, default_module: str) -> str:
"""
Given an import_path like "Base.Foo", return "Base".
If import_path has no dot (e.g., "Foo"), return default_module.
Examples:
extract_module_name("Base.Foo", default_module="Fallback") -> "Base"
extract_module_name("Foo", default_module="Fallback") -> "Fallback"
"""
if "." in import_path:
# Take everything before the first dot
return import_path.split(".", 1)[0]
else:
# No dot, return the fallback module name
return default_module
def _get_module_path(module_name: str) -> str:
if module_name in ["Base", "App", "Gui"]:
return module_name
return "Mod/" + module_name
def _parse_imports(tree) -> dict:
"""
Parses the given source_code for import statements and constructs
a mapping from imported name -> module path.
For example, code like:
from Metadata import export, forward_declarations, constmethod
from PyObjectBase import PyObjectBase
from Base.Foo import Foo
from typing import List, Final
yields a mapping of:
{
"export": "Metadata",
"forward_declarations": "Metadata",
"constmethod": "Metadata",
"PyObjectBase": "PyObjectBase",
"Foo": "Base.Foo",
"List": "typing",
"Final": "typing"
}
"""
name_to_module_map = {}
for node in tree.body:
match node:
# Handle 'import X' or 'import X as Y'
case ast.Import(names=names):
# e.g. import foo, import foo as bar
for alias in names:
imported_name = alias.asname if alias.asname else alias.name
name_to_module_map[imported_name] = alias.name
# Handle 'from X import Y, Z as W'
case ast.ImportFrom(module=module, names=names):
module_name = module if module is not None else ""
for alias in names:
imported_name = alias.asname if alias.asname else alias.name
name_to_module_map[imported_name] = module_name
case _:
pass
return name_to_module_map
def _get_native_class_name(klass: str) -> str:
return klass
def _get_native_python_class_name(klass: str) -> str:
if klass == "PyObjectBase":
return klass
return klass + "Py"
def _extract_base_class_name(base: ast.expr) -> str:
"""
Extract the base class name from an AST node using ast.unparse.
For generic bases (e.g. GenericParent[T]), it removes the generic part.
For qualified names (e.g. some_module.ParentClass), it returns only the last part.
"""
base_str = ast.unparse(base)
# Remove generic parameters if present.
if "[" in base_str:
base_str = base_str.split("[", 1)[0]
# For qualified names, take only the class name.
if "." in base_str:
base_str = base_str.split(".")[-1]
return base_str
def _parse_class(class_node, source_code: str, path: str, imports_mapping: dict) -> PythonExport:
base_class_name = None
for base in class_node.bases:
base_class_name = _extract_base_class_name(base)
break # Only consider the first base class.
assert base_class_name is not None
is_exported = False
export_decorator_kwargs = {}
forward_declarations_text = ""
class_declarations_text = ""
sequence_protocol_kwargs = None
for decorator in class_node.decorator_list:
match decorator:
case ast.Name(id="export"):
export_decorator_kwargs = {}
is_exported = True
case ast.Call(func=ast.Name(id="export"), keywords=_, args=_):
export_decorator_kwargs = _extract_decorator_kwargs(decorator)
is_exported = True
case ast.Call(func=ast.Name(id="forward_declarations"), args=args):
if args:
match args[0]:
case ast.Constant(value=val):
forward_declarations_text = val
case ast.Call(func=ast.Name(id="class_declarations"), args=args):
if args:
match args[0]:
case ast.Constant(value=val):
class_declarations_text = val
case ast.Call(func=ast.Name(id="sequence_protocol"), keywords=_, args=_):
sequence_protocol_kwargs = _extract_decorator_kwargs(decorator)
case _:
pass
# Parse imports to compute module metadata
module_name = _get_module_from_path(path)
imported_from_module = imports_mapping[base_class_name]
parent_module_name = _extract_module_name(imported_from_module, module_name)
class_docstring = ast.get_docstring(class_node) or ""
doc_obj = _parse_docstring_for_documentation(class_docstring)
class_attributes = _parse_class_attributes(class_node, source_code)
class_methods = _parse_methods(class_node)
native_class_name = _get_native_class_name(class_node.name)
native_python_class_name = _get_native_python_class_name(class_node.name)
include = _get_module_path(module_name) + "/" + native_class_name + ".h"
father_native_python_class_name = _get_native_python_class_name(base_class_name)
father_include = (
_get_module_path(parent_module_name) + "/" + father_native_python_class_name + ".h"
)
py_export = PythonExport(
Documentation=doc_obj,
ModuleName=module_name,
Name=export_decorator_kwargs.get("Name", "") or native_python_class_name,
PythonName=export_decorator_kwargs.get("PythonName", "") or None,
Include=export_decorator_kwargs.get("Include", "") or include,
Father=export_decorator_kwargs.get("Father", "") or father_native_python_class_name,
Twin=export_decorator_kwargs.get("Twin", "") or native_class_name,
TwinPointer=export_decorator_kwargs.get("TwinPointer", "") or native_class_name,
Namespace=export_decorator_kwargs.get("Namespace", "") or module_name,
FatherInclude=export_decorator_kwargs.get("FatherInclude", "") or father_include,
FatherNamespace=export_decorator_kwargs.get("FatherNamespace", "") or parent_module_name,
Constructor=export_decorator_kwargs.get("Constructor", False),
NumberProtocol=export_decorator_kwargs.get("NumberProtocol", False),
RichCompare=export_decorator_kwargs.get("RichCompare", False),
Delete=export_decorator_kwargs.get("Delete", False),
Reference=export_decorator_kwargs.get("Reference", None),
Initialization=export_decorator_kwargs.get("Initialization", False),
DisableNotify=export_decorator_kwargs.get("DisableNotify", False),
DescriptorGetter=export_decorator_kwargs.get("DescriptorGetter", False),
DescriptorSetter=export_decorator_kwargs.get("DescriptorSetter", False),
ForwardDeclarations=forward_declarations_text,
ClassDeclarations=class_declarations_text,
IsExplicitlyExported=is_exported,
)
# Attach sequence protocol metadata if provided.
if sequence_protocol_kwargs is not None:
seq_protocol = SequenceProtocol(**sequence_protocol_kwargs)
py_export.Sequence = seq_protocol
py_export.Attribute.extend(class_attributes)
py_export.Methode.extend(class_methods)
return py_export
def parse_python_code(path: str) -> GenerateModel:
"""
Parse the given Python source code and build a GenerateModel containing
PythonExport entries. If any class is explicitly exported using @export,
only those classes are used. If no classes have the @export decorator,
then a single non-exported class is assumed to be the export. If there
are multiple non-exported classes, an exception is raised.
"""
with open(path, "r") as file:
source_code = file.read()
tree = ast.parse(source_code)
imports_mapping = _parse_imports(tree)
explicit_exports = []
non_explicit_exports = []
for node in tree.body:
if isinstance(node, ast.ClassDef):
py_export = _parse_class(node, source_code, path, imports_mapping)
if py_export.IsExplicitlyExported:
explicit_exports.append(py_export)
else:
non_explicit_exports.append(py_export)
model = GenerateModel()
if explicit_exports:
# Use only explicitly exported classes.
model.PythonExport.extend(explicit_exports)
else:
# No explicit exports; allow only one non-exported class.
if len(non_explicit_exports) == 1:
model.PythonExport.append(non_explicit_exports[0])
elif len(non_explicit_exports) > 1:
raise Exception(
"Multiple non explicitly-exported classes were found, please use @export."
)
else:
raise Exception("No classes found for export.")
return model
def parse(path):
model = parse_python_code(path)
return model
def main():
import sys
args = sys.argv[1:]
model = parse(args[0])
model.dump()
if __name__ == "__main__":
main()