File size: 2,195 Bytes
089f9ae
6d9a945
 
21a69f9
089f9ae
 
 
 
 
3bc9c63
089f9ae
 
 
 
 
 
3bc9c63
089f9ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bc9c63
089f9ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bc9c63
089f9ae
21a69f9
089f9ae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import ast
from langchain_core.documents import Document

def chunk_code(file_path: str, code: str):
    """
    Chunk Python code by functions and classes using AST.
    Falls back to whole-file chunk if parsing fails.
    """
    documents = []

    try:
        tree = ast.parse(code)
    except Exception:
        # Fallback: whole file as one chunk
        if len(code.strip()) > 100:
            documents.append(
                Document(
                    page_content=code,
                    metadata={
                        "file": file_path,
                        "type": "file"
                    }
                )
            )
        return documents

    for node in ast.walk(tree):
        # -------- FUNCTIONS --------
        if isinstance(node, ast.FunctionDef):
            source = ast.get_source_segment(code, node)
            if source:
                documents.append(
                    Document(
                        page_content=source,
                        metadata={
                            "file": file_path,
                            "type": "function",
                            "name": node.name,
                            "line_start": node.lineno
                        }
                    )
                )

        # -------- CLASSES --------
        elif isinstance(node, ast.ClassDef):
            source = ast.get_source_segment(code, node)
            if source:
                documents.append(
                    Document(
                        page_content=source,
                        metadata={
                            "file": file_path,
                            "type": "class",
                            "name": node.name,
                            "line_start": node.lineno
                        }
                    )
                )

    # If no functions/classes found, keep whole file
    if not documents and len(code.strip()) > 100:
        documents.append(
            Document(
                page_content=code,
                metadata={
                    "file": file_path,
                    "type": "file"
                }
            )
        )

    return documents