File size: 3,467 Bytes
463fc7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
chunk_schema.py - UPDATED with enhanced hierarchy
"""

from typing import Dict, List, Optional, Literal, Union
from dataclasses import dataclass, field


# ✅ EXPANDED ChunkType to support ALL file types
ChunkType = Literal[
    "module",        # Python module
    "class",         # Python class  
    "function",      # Python function
    "method",        # Python method
    "context",       # General context
    "documentation", # Markdown/RST docs
    "configuration", # Config files (JSON, YAML, TOML)
    "notebook",      # Jupyter notebook
    "script",        # Shell scripts
    "dockerfile",    # Docker files
    "typescript",    # TypeScript files
    "javascript",    # JavaScript files
    "text",          # Plain text
    "imports",       # Import statements
    "unknown"        # Unknown file type
]

# For AST symbol types
ASTSymbolType = Literal[
    "module", "class", "function", "method", "context",
    "documentation", "configuration", "notebook", "script",
    "dockerfile", "typescript", "javascript", "text", 
    "imports",
    "unknown"
]


# @dataclass  
# class ChunkHierarchy:
#     """Enhanced hierarchical relationship metadata"""
#     parent_id: Optional[str] = None
#     children_ids: List[str] = field(default_factory=list)
#     depth: int = 0
#     is_primary: bool = True
#     is_extracted: bool = False
#     lineage: List[str] = field(default_factory=list)  # Path from root
#     sibling_index: int = 0  # Position among siblings

@dataclass  
class ChunkHierarchy:
    """Enhanced hierarchical relationship metadata"""
    parent_id: Optional[str] = None
    children_ids: List[str] = field(default_factory=list)
    depth: int = 0
    is_primary: bool = True
    is_extracted: bool = False
    lineage: List[str] = field(default_factory=list)  # Path from root
    sibling_index: int = 0  # Position among siblings
    
    # Optional: Add methods for type-safe operations
    def add_child(self, child_id: str) -> None:
        """Type-safe method to add child"""
        if child_id not in self.children_ids:
            self.children_ids.append(child_id)
    
    def remove_child(self, child_id: str) -> None:
        """Type-safe method to remove child"""
        if child_id in self.children_ids:
            self.children_ids.remove(child_id)
    
    def set_parent(self, parent_id: Optional[str]) -> None:
        """Type-safe method to set parent"""
        self.parent_id = parent_id
    
    def increment_depth(self) -> None:
        """Increment depth by 1"""
        self.depth += 1


@dataclass
class ChunkAST:
    symbol_type: Optional[ASTSymbolType] = None
    name: Optional[str] = None
    parent: Optional[str] = None
    docstring: Optional[str] = None
    decorators: List[str] = field(default_factory=list)
    imports: List[str] = field(default_factory=list)
    node_type: Optional[str] = None  # Original AST node type


@dataclass
class ChunkSpan:
    start_byte: Optional[int] = None
    end_byte: Optional[int] = None
    start_line: Optional[int] = None
    end_line: Optional[int] = None
    char_count: Optional[int] = None  # Character count for quick reference



@dataclass
class CodeChunk:
    chunk_id: str
    file_path: str
    language: str
    chunk_type: ChunkType  # ✅ Now accepts ALL types
    code: str
    ast: ChunkAST
    span: ChunkSpan
    metadata: Dict = field(default_factory=dict)
    hierarchy: ChunkHierarchy = field(default_factory=ChunkHierarchy)