FreeCAD / src /Tools /bindings /model /generateModel_Python.py

Upload folder using huggingface_hub

985c397 verified about 1 month ago

26.9 kB

	# SPDX-License-Identifier: LGPL-2.1-or-later

	"""Parses Python binding interface files into a typed AST model."""

	from dataclasses import dataclass
	from enum import Enum
	import ast
	import re
	from typing import List
	from model.typedModel import (
	GenerateModel,
	PythonExport,
	Methode,
	Attribute,
	Documentation,
	Author,
	Parameter,
	ParameterType,
	SequenceProtocol,
	)

	SIGNATURE_SEP = re.compile(r"\s+--\s+", re.DOTALL)
	SELF_CLS_ARG = re.compile(r"\(\s(self\|cls)(\s,\s*)?")


	class ArgumentKind(Enum):
	PositionOnly = 0
	Arg = 1
	VarArg = 2
	KwOnly = 3
	KwArg = 4


	@dataclass
	class FuncArgument:
	name: str
	annotation: str
	kind: ArgumentKind


	class FunctionSignature:
	"""
	Parse function arguments with correct classification and order.
	"""

	args: list[FuncArgument]
	has_keywords: bool
	docstring: str
	annotated_text: str
	text: str

	const_flag: bool = False
	static_flag: bool = False
	class_flag: bool = False
	noargs_flag: bool = False
	is_overload: bool = False

	def __init__(self, func: ast.FunctionDef):
	self.args = []
	self.has_keywords = False
	self.is_overload = False
	self.docstring = ast.get_docstring(func) or ""

	args = func.args
	self.update_flags(func)

	self.args.extend(
	(
	FuncArgument(
	arg.arg,
	self.get_annotation_str(arg.annotation),
	ArgumentKind.PositionOnly,
	)
	for arg in args.posonlyargs
	),
	)

	self.args.extend(
	(
	FuncArgument(
	arg.arg,
	self.get_annotation_str(arg.annotation),
	ArgumentKind.Arg,
	)
	for arg in args.args
	),
	)

	# tricky part to determine if there are keyword arguments or not
	if args.args:
	if args.args[0].arg in ("self", "cls"):
	instance_args = len(args.args) > 1
	else:
	instance_args = True
	else:
	instance_args = False

	self.has_keywords = bool(instance_args or args.kwonlyargs or args.kwarg)

	if args.vararg:
	self.args.append(
	FuncArgument(
	args.vararg.arg,
	self.get_annotation_str(args.vararg.annotation),
	ArgumentKind.VarArg,
	),
	)

	self.args.extend(
	(
	FuncArgument(
	arg.arg,
	self.get_annotation_str(arg.annotation),
	ArgumentKind.KwOnly,
	)
	for arg in args.kwonlyargs
	),
	)

	if args.kwarg:
	self.args.append(
	FuncArgument(
	args.kwarg.arg,
	self.get_annotation_str(args.kwarg.annotation),
	ArgumentKind.KwArg,
	),
	)

	# Annotated signatures (Not supported by __text_signature__)
	returns = ast.unparse(func.returns) if func.returns else "object"
	parameters = ast.unparse(func.args)
	self.annotated_text = SELF_CLS_ARG.sub("(", f"{func.name}({parameters}) -> {returns}", 1)

	# Not Annotated signatures (supported by __text_signature__)
	all_args = [args.posonlyargs, args.args, args.vararg, *args.kwonlyargs, args.kwarg]
	for item in all_args:
	if item:
	item.annotation = None
	parameters = ast.unparse(args)
	self.text = SELF_CLS_ARG.sub(r"($\1\2", f"{func.name}({parameters})", 1)

	def get_annotation_str(self, node: ast.AST \| None) -> str:
	if not node:
	return "object"
	return ast.unparse(node)

	def update_flags(self, func: ast.FunctionDef) -> None:
	for deco in func.decorator_list:
	match deco:
	case ast.Name(id, _):
	name = id
	case ast.Attribute(_, attr, _):
	name = attr
	case _:
	continue

	match name:
	case "constmethod":
	self.const_flag = True
	case "classmethod":
	self.class_flag = True
	case "no_args":
	self.noargs_flag = True
	case "staticmethod":
	self.static_flag = True
	case "overload":
	self.is_overload = True


	class Function:
	name: str
	signatures: list[FunctionSignature]

	def __init__(self, func: ast.FunctionDef) -> None:
	self.name = func.name
	self.signatures = [FunctionSignature(func)]

	def update(self, func: ast.FunctionDef) -> None:
	self.signatures.append(FunctionSignature(func))

	@property
	def docstring(self) -> str:
	return "\n".join((f.docstring for f in self.signatures))

	@property
	def has_keywords(self) -> bool:
	overloads = len(self.signatures) > 1
	if overloads:
	return any(sig.has_keywords for sig in self.signatures if sig.is_overload)
	return self.signatures[0].has_keywords

	@property
	def signature(self) -> FunctionSignature \| None:
	"""First non overload signature"""
	for sig in self.signatures:
	if not sig.is_overload:
	return sig
	return None

	@property
	def static_flag(self) -> bool:
	return any(sig.static_flag for sig in self.signatures)

	@property
	def const_flag(self) -> bool:
	return any(sig.const_flag for sig in self.signatures)

	@property
	def class_flag(self) -> bool:
	return any(sig.class_flag for sig in self.signatures)

	@property
	def noargs_flag(self) -> bool:
	return any(sig.noargs_flag for sig in self.signatures)

	def add_signature_docs(self, doc: Documentation) -> None:
	if len(self.signatures) == 1:
	docstring = [self.signatures[0].text]
	signature = [self.signatures[0].annotated_text]
	else:
	docstring = [sig.text for sig in self.signatures if not sig.is_overload]
	signature = [sig.annotated_text for sig in self.signatures if sig.is_overload]

	if not docstring:
	return

	user_doc = doc.UserDocu or ""
	marker = SIGNATURE_SEP.search(user_doc)
	if marker:
	user_doc = user_doc[marker.end() :].strip()

	docstring.append("--\n") # mark __text_signature__
	docstring.extend(signature) # Include real annotated signature in user docstring
	docstring.append(f"\n{user_doc}") # Rest of the docstring
	doc.UserDocu = "\n".join(docstring)


	def _extract_decorator_kwargs(decorator: ast.expr) -> dict:
	"""
	Extract keyword arguments from a decorator call like `@export(Father="...", Name="...")`.
	Returns them in a dict.
	"""
	if not isinstance(decorator, ast.Call):
	return {}
	result = {}
	for kw in decorator.keywords:
	match kw.value:
	case ast.Constant(value=val):
	result[kw.arg] = val
	case _:
	pass
	return result


	def _parse_docstring_for_documentation(docstring: str) -> Documentation:
	"""
	Given a docstring, parse out DeveloperDocu, UserDocu, Author, Licence, etc.
	This is a simple heuristic-based parser. Adjust as needed for your format.
	"""
	dev_docu = None
	user_docu = None
	author_name = None
	author_email = None
	author_licence = None

	if not docstring:
	return Documentation()

	import textwrap

	# Remove common indentation
	dedented_docstring = textwrap.dedent(docstring).strip()
	lines = dedented_docstring.split("\n")
	user_docu_lines = []

	for raw_line in lines:
	stripped_line = raw_line.strip()
	if stripped_line.startswith("DeveloperDocu:"):
	dev_docu = stripped_line.split("DeveloperDocu:", 1)[1].strip()
	elif stripped_line.startswith("UserDocu:"):
	user_docu = stripped_line.split("UserDocu:", 1)[1].strip()
	elif stripped_line.startswith("Author:"):
	# e.g. "Author: John Doe (john@example.com)"
	author_part = stripped_line.split("Author:", 1)[1].strip()
	match = re.search(r"(.?)\s$(.*?)$", author_part)
	if match:
	author_name = match.group(1).strip()
	author_email = match.group(2).strip()
	else:
	author_name = author_part
	elif stripped_line.startswith("Licence:"):
	author_licence = stripped_line.split("Licence:", 1)[1].strip()
	else:
	user_docu_lines.append(raw_line)

	if user_docu is None:
	user_docu = "\n".join(user_docu_lines)

	author_obj = None
	if author_name or author_email or author_licence:
	author_obj = Author(
	content=docstring,
	Name=author_name or "",
	EMail=author_email or "",
	Licence=author_licence or "LGPL",
	)

	return Documentation(
	Author=author_obj,
	DeveloperDocu=dev_docu,
	UserDocu=user_docu,
	)


	def _get_type_str(node):
	"""Recursively convert an AST node for a type annotation to its string representation."""
	match node:
	case ast.Name(id=name):
	# Handle qualified names (e.g., typing.List)
	return name
	case ast.Attribute(value=val, attr=attr):
	# For annotations like List[str] (or Final[List[str]]), build the string recursively.
	return f"{_get_type_str(val)}.{attr}"
	case ast.Subscript(value=val, slice=slice_node):
	value_str = _get_type_str(val)
	slice_str = _get_type_str(slice_node)
	return f"{value_str}[{slice_str}]"
	case ast.Tuple(elts=elts):
	# For multiple types (e.g., Tuple[int, str])
	return ", ".join(_get_type_str(elt) for elt in elts)
	case _:
	# Fallback for unsupported node types
	return "object"


	def _python_type_to_parameter_type(py_type: str) -> ParameterType:
	"""
	Map a Python type annotation (as a string) to the ParameterType enum if possible.
	Fallback to OBJECT if unrecognized.
	"""
	py_type = py_type.lower()
	match py_type:
	case _ if py_type in ("int", "builtins.int"):
	return ParameterType.LONG
	case _ if py_type in ("float", "builtins.float"):
	return ParameterType.FLOAT
	case _ if py_type in ("str", "builtins.str"):
	return ParameterType.STRING
	case _ if py_type in ("bool", "builtins.bool"):
	return ParameterType.BOOLEAN
	case _ if py_type.startswith(("list", "typing.list")):
	return ParameterType.LIST
	case _ if py_type.startswith(("dict", "typing.dict")):
	return ParameterType.DICT
	case _ if py_type.startswith(("callable", "typing.callable")):
	return ParameterType.CALLABLE
	case _ if py_type.startswith(("sequence", "typing.sequence")):
	return ParameterType.SEQUENCE
	case _ if py_type.startswith(("tuple", "typing.tuple")):
	return ParameterType.TUPLE
	case _ if py_type.startswith(("pycxxvector")):
	return ParameterType.VECTOR
	case _:
	return ParameterType.OBJECT


	def _parse_class_attributes(class_node: ast.ClassDef, source_code: str) -> List[Attribute]:
	"""
	Parse top-level attributes (e.g. `TypeId: str = ""`) from the class AST node.
	We'll create an `Attribute` for each. For the `Documentation` of each attribute,
	we might store minimal or none if there's no docstring.
	"""
	attributes = []
	default_doc = Documentation(DeveloperDocu="", UserDocu="", Author=None)

	for idx, stmt in enumerate(class_node.body):
	if isinstance(stmt, ast.AnnAssign):
	# e.g.: `TypeId: Final[str] = ""`
	name = stmt.target.id if isinstance(stmt.target, ast.Name) else "unknown"
	# Evaluate the type annotation and detect Final for read-only attributes
	if isinstance(stmt.annotation, ast.Name):
	# e.g. `str`
	type_name = stmt.annotation.id
	readonly = False
	elif isinstance(stmt.annotation, ast.Subscript):
	# Check if this is a Final type hint, e.g. Final[int] or typing.Final[int]
	is_final = (
	isinstance(stmt.annotation.value, ast.Name)
	and stmt.annotation.value.id == "Final"
	) or (
	isinstance(stmt.annotation.value, ast.Attribute)
	and stmt.annotation.value.attr == "Final"
	)
	if is_final:
	readonly = True
	# Extract the inner type from the Final[...] annotation
	type_name = _get_type_str(stmt.annotation.slice)
	else:
	type_name = _get_type_str(stmt.annotation)
	readonly = False
	else:
	type_name = "object"
	readonly = False

	param_type = _python_type_to_parameter_type(type_name)

	# Look for a docstring immediately following the attribute definition.
	attr_doc = default_doc
	if idx + 1 < len(class_node.body):
	next_stmt = class_node.body[idx + 1]
	if (
	isinstance(next_stmt, ast.Expr)
	and isinstance(next_stmt.value, ast.Constant)
	and isinstance(next_stmt.value.value, str)
	):
	docstring = next_stmt.value.value

	# Parse the docstring to build a Documentation object.
	attr_doc = _parse_docstring_for_documentation(docstring)

	param = Parameter(Name=name, Type=param_type)
	attr = Attribute(
	Documentation=attr_doc,
	Parameter=param,
	Name=name,
	ReadOnly=readonly,
	)
	attributes.append(attr)

	return attributes


	def _parse_methods(class_node: ast.ClassDef) -> List[Methode]:
	"""
	Parse methods from the class AST node, extracting:
	- Method name
	- Parameters (from the function signature / annotations)
	- Docstring
	"""
	methods = []

	def collect_function_defs(nodes) -> list[ast.FunctionDef]:
	funcs = []
	for node in nodes:
	if isinstance(node, ast.FunctionDef):
	funcs.append(node)
	elif isinstance(node, ast.If):
	funcs.extend(collect_function_defs(node.body))
	funcs.extend(collect_function_defs(node.orelse))
	return funcs

	# Collect including overloads
	functions: dict[str, Function] = {}
	for func_node in collect_function_defs(class_node.body):
	if func := functions.get(func_node.name):
	func.update(func_node)
	else:
	functions[func_node.name] = Function(func_node)

	for func in functions.values():
	doc_obj = _parse_docstring_for_documentation(func.docstring)
	func.add_signature_docs(doc_obj)
	method_params = []

	signature = func.signature
	if signature is None:
	continue

	# Process positional parameters (skipping self/cls)
	for arg_i, arg in enumerate(signature.args):
	param_name = arg.name
	if arg_i == 0 and param_name in ("self", "cls"):
	continue
	param_type = _python_type_to_parameter_type(arg.annotation)
	method_params.append(Parameter(Name=param_name, Type=param_type))

	method = Methode(
	Name=func.name,
	Documentation=doc_obj,
	Parameter=method_params,
	Const=func.const_flag,
	Static=func.static_flag,
	Class=func.class_flag,
	Keyword=func.has_keywords,
	NoArgs=func.noargs_flag,
	)

	methods.append(method)

	return methods


	def _get_module_from_path(path: str) -> str:
	"""
	Returns the name of the FreeCAD module from the path.
	Examples:
	.../src/Base/Persistence.py -> "Base"
	.../src/Mod/CAM/Path/__init__.py -> "CAM"
	"""
	# 1. Split the path by the OS separator.
	import os

	parts = path.split(os.sep)

	# 2. Attempt to find "src" in the path components.
	try:
	idx_src = len(parts) - 1 - list(reversed(parts)).index("src")
	except ValueError:
	# If "src" is not found, we cannot determine the module name.
	return None

	# 3. Check if there is a path component immediately after "src".
	# If there isn't, we have nothing to return.
	if idx_src + 1 >= len(parts):
	return None

	next_part = parts[idx_src + 1]

	# 4. If the next component is "Mod", then the module name is the
	# component AFTER "Mod" (e.g. "CAM" in "Mod/CAM").
	if next_part == "Mod":
	if idx_src + 2 < len(parts):
	return parts[idx_src + 2]
	else:
	# "Mod" is the last component
	return None
	else:
	# 5. Otherwise, if it's not "Mod", we treat that next component
	# itself as the module name (e.g. "Base").
	return next_part


	def _extract_module_name(import_path: str, default_module: str) -> str:
	"""
	Given an import_path like "Base.Foo", return "Base".
	If import_path has no dot (e.g., "Foo"), return default_module.

	Examples:
	extract_module_name("Base.Foo", default_module="Fallback") -> "Base"
	extract_module_name("Foo", default_module="Fallback") -> "Fallback"
	"""
	if "." in import_path:
	# Take everything before the first dot
	return import_path.split(".", 1)[0]
	else:
	# No dot, return the fallback module name
	return default_module


	def _get_module_path(module_name: str) -> str:
	if module_name in ["Base", "App", "Gui"]:
	return module_name
	return "Mod/" + module_name


	def _parse_imports(tree) -> dict:
	"""
	Parses the given source_code for import statements and constructs
	a mapping from imported name -> module path.

	For example, code like:

	from Metadata import export, forward_declarations, constmethod
	from PyObjectBase import PyObjectBase
	from Base.Foo import Foo
	from typing import List, Final

	yields a mapping of:
	{
	"export": "Metadata",
	"forward_declarations": "Metadata",
	"constmethod": "Metadata",
	"PyObjectBase": "PyObjectBase",
	"Foo": "Base.Foo",
	"List": "typing",
	"Final": "typing"
	}
	"""
	name_to_module_map = {}

	for node in tree.body:
	match node:
	# Handle 'import X' or 'import X as Y'
	case ast.Import(names=names):
	# e.g. import foo, import foo as bar
	for alias in names:
	imported_name = alias.asname if alias.asname else alias.name
	name_to_module_map[imported_name] = alias.name
	# Handle 'from X import Y, Z as W'
	case ast.ImportFrom(module=module, names=names):
	module_name = module if module is not None else ""
	for alias in names:
	imported_name = alias.asname if alias.asname else alias.name
	name_to_module_map[imported_name] = module_name
	case _:
	pass

	return name_to_module_map


	def _get_native_class_name(klass: str) -> str:
	return klass


	def _get_native_python_class_name(klass: str) -> str:
	if klass == "PyObjectBase":
	return klass
	return klass + "Py"


	def _extract_base_class_name(base: ast.expr) -> str:
	"""
	Extract the base class name from an AST node using ast.unparse.
	For generic bases (e.g. GenericParent[T]), it removes the generic part.
	For qualified names (e.g. some_module.ParentClass), it returns only the last part.
	"""
	base_str = ast.unparse(base)
	# Remove generic parameters if present.
	if "[" in base_str:
	base_str = base_str.split("[", 1)[0]
	# For qualified names, take only the class name.
	if "." in base_str:
	base_str = base_str.split(".")[-1]
	return base_str


	def _parse_class(class_node, source_code: str, path: str, imports_mapping: dict) -> PythonExport:
	base_class_name = None
	for base in class_node.bases:
	base_class_name = _extract_base_class_name(base)
	break # Only consider the first base class.

	assert base_class_name is not None

	is_exported = False
	export_decorator_kwargs = {}
	forward_declarations_text = ""
	class_declarations_text = ""
	sequence_protocol_kwargs = None

	for decorator in class_node.decorator_list:
	match decorator:
	case ast.Name(id="export"):
	export_decorator_kwargs = {}
	is_exported = True
	case ast.Call(func=ast.Name(id="export"), keywords=_, args=_):
	export_decorator_kwargs = _extract_decorator_kwargs(decorator)
	is_exported = True
	case ast.Call(func=ast.Name(id="forward_declarations"), args=args):
	if args:
	match args[0]:
	case ast.Constant(value=val):
	forward_declarations_text = val
	case ast.Call(func=ast.Name(id="class_declarations"), args=args):
	if args:
	match args[0]:
	case ast.Constant(value=val):
	class_declarations_text = val
	case ast.Call(func=ast.Name(id="sequence_protocol"), keywords=_, args=_):
	sequence_protocol_kwargs = _extract_decorator_kwargs(decorator)
	case _:
	pass

	# Parse imports to compute module metadata
	module_name = _get_module_from_path(path)

	imported_from_module = imports_mapping[base_class_name]
	parent_module_name = _extract_module_name(imported_from_module, module_name)

	class_docstring = ast.get_docstring(class_node) or ""
	doc_obj = _parse_docstring_for_documentation(class_docstring)
	class_attributes = _parse_class_attributes(class_node, source_code)
	class_methods = _parse_methods(class_node)

	native_class_name = _get_native_class_name(class_node.name)
	native_python_class_name = _get_native_python_class_name(class_node.name)
	include = _get_module_path(module_name) + "/" + native_class_name + ".h"

	father_native_python_class_name = _get_native_python_class_name(base_class_name)
	father_include = (
	_get_module_path(parent_module_name) + "/" + father_native_python_class_name + ".h"
	)

	py_export = PythonExport(
	Documentation=doc_obj,
	ModuleName=module_name,
	Name=export_decorator_kwargs.get("Name", "") or native_python_class_name,
	PythonName=export_decorator_kwargs.get("PythonName", "") or None,
	Include=export_decorator_kwargs.get("Include", "") or include,
	Father=export_decorator_kwargs.get("Father", "") or father_native_python_class_name,
	Twin=export_decorator_kwargs.get("Twin", "") or native_class_name,
	TwinPointer=export_decorator_kwargs.get("TwinPointer", "") or native_class_name,
	Namespace=export_decorator_kwargs.get("Namespace", "") or module_name,
	FatherInclude=export_decorator_kwargs.get("FatherInclude", "") or father_include,
	FatherNamespace=export_decorator_kwargs.get("FatherNamespace", "") or parent_module_name,
	Constructor=export_decorator_kwargs.get("Constructor", False),
	NumberProtocol=export_decorator_kwargs.get("NumberProtocol", False),
	RichCompare=export_decorator_kwargs.get("RichCompare", False),
	Delete=export_decorator_kwargs.get("Delete", False),
	Reference=export_decorator_kwargs.get("Reference", None),
	Initialization=export_decorator_kwargs.get("Initialization", False),
	DisableNotify=export_decorator_kwargs.get("DisableNotify", False),
	DescriptorGetter=export_decorator_kwargs.get("DescriptorGetter", False),
	DescriptorSetter=export_decorator_kwargs.get("DescriptorSetter", False),
	ForwardDeclarations=forward_declarations_text,
	ClassDeclarations=class_declarations_text,
	IsExplicitlyExported=is_exported,
	)

	# Attach sequence protocol metadata if provided.
	if sequence_protocol_kwargs is not None:
	seq_protocol = SequenceProtocol(**sequence_protocol_kwargs)
	py_export.Sequence = seq_protocol

	py_export.Attribute.extend(class_attributes)
	py_export.Methode.extend(class_methods)

	return py_export


	def parse_python_code(path: str) -> GenerateModel:
	"""
	Parse the given Python source code and build a GenerateModel containing
	PythonExport entries. If any class is explicitly exported using @export,
	only those classes are used. If no classes have the @export decorator,
	then a single non-exported class is assumed to be the export. If there
	are multiple non-exported classes, an exception is raised.
	"""
	with open(path, "r") as file:
	source_code = file.read()

	tree = ast.parse(source_code)
	imports_mapping = _parse_imports(tree)

	explicit_exports = []
	non_explicit_exports = []

	for node in tree.body:
	if isinstance(node, ast.ClassDef):
	py_export = _parse_class(node, source_code, path, imports_mapping)
	if py_export.IsExplicitlyExported:
	explicit_exports.append(py_export)
	else:
	non_explicit_exports.append(py_export)

	model = GenerateModel()
	if explicit_exports:
	# Use only explicitly exported classes.
	model.PythonExport.extend(explicit_exports)
	else:
	# No explicit exports; allow only one non-exported class.
	if len(non_explicit_exports) == 1:
	model.PythonExport.append(non_explicit_exports[0])
	elif len(non_explicit_exports) > 1:
	raise Exception(
	"Multiple non explicitly-exported classes were found, please use @export."
	)
	else:
	raise Exception("No classes found for export.")

	return model


	def parse(path):
	model = parse_python_code(path)
	return model


	def main():
	import sys

	args = sys.argv[1:]
	model = parse(args[0])
	model.dump()


	if __name__ == "__main__":
	main()