Spaces:
Sleeping
Sleeping
File size: 3,467 Bytes
463fc7e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | """
chunk_schema.py - UPDATED with enhanced hierarchy
"""
from typing import Dict, List, Optional, Literal, Union
from dataclasses import dataclass, field
# ✅ EXPANDED ChunkType to support ALL file types
ChunkType = Literal[
"module", # Python module
"class", # Python class
"function", # Python function
"method", # Python method
"context", # General context
"documentation", # Markdown/RST docs
"configuration", # Config files (JSON, YAML, TOML)
"notebook", # Jupyter notebook
"script", # Shell scripts
"dockerfile", # Docker files
"typescript", # TypeScript files
"javascript", # JavaScript files
"text", # Plain text
"imports", # Import statements
"unknown" # Unknown file type
]
# For AST symbol types
ASTSymbolType = Literal[
"module", "class", "function", "method", "context",
"documentation", "configuration", "notebook", "script",
"dockerfile", "typescript", "javascript", "text",
"imports",
"unknown"
]
# @dataclass
# class ChunkHierarchy:
# """Enhanced hierarchical relationship metadata"""
# parent_id: Optional[str] = None
# children_ids: List[str] = field(default_factory=list)
# depth: int = 0
# is_primary: bool = True
# is_extracted: bool = False
# lineage: List[str] = field(default_factory=list) # Path from root
# sibling_index: int = 0 # Position among siblings
@dataclass
class ChunkHierarchy:
"""Enhanced hierarchical relationship metadata"""
parent_id: Optional[str] = None
children_ids: List[str] = field(default_factory=list)
depth: int = 0
is_primary: bool = True
is_extracted: bool = False
lineage: List[str] = field(default_factory=list) # Path from root
sibling_index: int = 0 # Position among siblings
# Optional: Add methods for type-safe operations
def add_child(self, child_id: str) -> None:
"""Type-safe method to add child"""
if child_id not in self.children_ids:
self.children_ids.append(child_id)
def remove_child(self, child_id: str) -> None:
"""Type-safe method to remove child"""
if child_id in self.children_ids:
self.children_ids.remove(child_id)
def set_parent(self, parent_id: Optional[str]) -> None:
"""Type-safe method to set parent"""
self.parent_id = parent_id
def increment_depth(self) -> None:
"""Increment depth by 1"""
self.depth += 1
@dataclass
class ChunkAST:
symbol_type: Optional[ASTSymbolType] = None
name: Optional[str] = None
parent: Optional[str] = None
docstring: Optional[str] = None
decorators: List[str] = field(default_factory=list)
imports: List[str] = field(default_factory=list)
node_type: Optional[str] = None # Original AST node type
@dataclass
class ChunkSpan:
start_byte: Optional[int] = None
end_byte: Optional[int] = None
start_line: Optional[int] = None
end_line: Optional[int] = None
char_count: Optional[int] = None # Character count for quick reference
@dataclass
class CodeChunk:
chunk_id: str
file_path: str
language: str
chunk_type: ChunkType # ✅ Now accepts ALL types
code: str
ast: ChunkAST
span: ChunkSpan
metadata: Dict = field(default_factory=dict)
hierarchy: ChunkHierarchy = field(default_factory=ChunkHierarchy)
|