Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import ast | |
| from pydantic import BaseModel | |
| from parser.ast_parser import ParsedModule | |
| class ChunkNode(BaseModel): | |
| module_id: str | |
| name: str | |
| code: str | |
| parent_module_id: str | None = None | |
| is_chunk: bool = False | |
| start_line: int = 1 | |
| end_line: int = 1 | |
| class ChunkResult(BaseModel): | |
| parent: ChunkNode | |
| chunks: list[ChunkNode] | |
| def _slice_lines(source: str, start: int, end: int) -> str: | |
| lines = source.splitlines() | |
| start_idx = max(start - 1, 0) | |
| end_idx = min(end, len(lines)) | |
| return "\n".join(lines[start_idx:end_idx]).strip() | |
| def chunk_module(parsed: ParsedModule, max_lines: int = 300) -> ChunkResult: | |
| line_count = len(parsed.raw_code.splitlines()) | |
| if line_count <= max_lines: | |
| parent = ChunkNode( | |
| module_id=parsed.module_id, | |
| name=parsed.module_id.split(".")[-1], | |
| code=parsed.raw_code, | |
| is_chunk=False, | |
| start_line=1, | |
| end_line=line_count, | |
| ) | |
| return ChunkResult(parent=parent, chunks=[]) | |
| try: | |
| tree = ast.parse(parsed.raw_code) | |
| except SyntaxError: | |
| parent = ChunkNode( | |
| module_id=parsed.module_id, | |
| name=parsed.module_id.split(".")[-1], | |
| code=parsed.raw_code, | |
| is_chunk=False, | |
| start_line=1, | |
| end_line=line_count, | |
| ) | |
| return ChunkResult(parent=parent, chunks=[]) | |
| chunks: list[ChunkNode] = [] | |
| for node in tree.body: | |
| if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): | |
| start_line = int(getattr(node, "lineno", 1)) | |
| end_line = int(getattr(node, "end_lineno", start_line)) | |
| chunk_id = f"{parsed.module_id}::{node.name}" | |
| chunks.append( | |
| ChunkNode( | |
| module_id=chunk_id, | |
| name=node.name, | |
| code=_slice_lines(parsed.raw_code, start_line, end_line), | |
| parent_module_id=parsed.module_id, | |
| is_chunk=True, | |
| start_line=start_line, | |
| end_line=end_line, | |
| ) | |
| ) | |
| if not chunks: | |
| chunks.append( | |
| ChunkNode( | |
| module_id=f"{parsed.module_id}::module_body", | |
| name="module_body", | |
| code=parsed.raw_code, | |
| parent_module_id=parsed.module_id, | |
| is_chunk=True, | |
| start_line=1, | |
| end_line=line_count, | |
| ) | |
| ) | |
| parent = ChunkNode( | |
| module_id=parsed.module_id, | |
| name=parsed.module_id.split(".")[-1], | |
| code="", | |
| is_chunk=False, | |
| start_line=1, | |
| end_line=line_count, | |
| ) | |
| return ChunkResult(parent=parent, chunks=chunks) | |