File size: 2,195 Bytes
089f9ae 6d9a945 21a69f9 089f9ae 3bc9c63 089f9ae 3bc9c63 089f9ae 3bc9c63 089f9ae 3bc9c63 089f9ae 21a69f9 089f9ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | import ast
from langchain_core.documents import Document
def chunk_code(file_path: str, code: str):
"""
Chunk Python code by functions and classes using AST.
Falls back to whole-file chunk if parsing fails.
"""
documents = []
try:
tree = ast.parse(code)
except Exception:
# Fallback: whole file as one chunk
if len(code.strip()) > 100:
documents.append(
Document(
page_content=code,
metadata={
"file": file_path,
"type": "file"
}
)
)
return documents
for node in ast.walk(tree):
# -------- FUNCTIONS --------
if isinstance(node, ast.FunctionDef):
source = ast.get_source_segment(code, node)
if source:
documents.append(
Document(
page_content=source,
metadata={
"file": file_path,
"type": "function",
"name": node.name,
"line_start": node.lineno
}
)
)
# -------- CLASSES --------
elif isinstance(node, ast.ClassDef):
source = ast.get_source_segment(code, node)
if source:
documents.append(
Document(
page_content=source,
metadata={
"file": file_path,
"type": "class",
"name": node.name,
"line_start": node.lineno
}
)
)
# If no functions/classes found, keep whole file
if not documents and len(code.strip()) > 100:
documents.append(
Document(
page_content=code,
metadata={
"file": file_path,
"type": "file"
}
)
)
return documents
|