Spaces:
Sleeping
Sleeping
| """ | |
| chunk_schema.py - UPDATED with enhanced hierarchy | |
| """ | |
| from typing import Dict, List, Optional, Literal, Union | |
| from dataclasses import dataclass, field | |
| # ✅ EXPANDED ChunkType to support ALL file types | |
| ChunkType = Literal[ | |
| "module", # Python module | |
| "class", # Python class | |
| "function", # Python function | |
| "method", # Python method | |
| "context", # General context | |
| "documentation", # Markdown/RST docs | |
| "configuration", # Config files (JSON, YAML, TOML) | |
| "notebook", # Jupyter notebook | |
| "script", # Shell scripts | |
| "dockerfile", # Docker files | |
| "typescript", # TypeScript files | |
| "javascript", # JavaScript files | |
| "text", # Plain text | |
| "imports", # Import statements | |
| "unknown" # Unknown file type | |
| ] | |
| # For AST symbol types | |
| ASTSymbolType = Literal[ | |
| "module", "class", "function", "method", "context", | |
| "documentation", "configuration", "notebook", "script", | |
| "dockerfile", "typescript", "javascript", "text", | |
| "imports", | |
| "unknown" | |
| ] | |
| # @dataclass | |
| # class ChunkHierarchy: | |
| # """Enhanced hierarchical relationship metadata""" | |
| # parent_id: Optional[str] = None | |
| # children_ids: List[str] = field(default_factory=list) | |
| # depth: int = 0 | |
| # is_primary: bool = True | |
| # is_extracted: bool = False | |
| # lineage: List[str] = field(default_factory=list) # Path from root | |
| # sibling_index: int = 0 # Position among siblings | |
| class ChunkHierarchy: | |
| """Enhanced hierarchical relationship metadata""" | |
| parent_id: Optional[str] = None | |
| children_ids: List[str] = field(default_factory=list) | |
| depth: int = 0 | |
| is_primary: bool = True | |
| is_extracted: bool = False | |
| lineage: List[str] = field(default_factory=list) # Path from root | |
| sibling_index: int = 0 # Position among siblings | |
| # Optional: Add methods for type-safe operations | |
| def add_child(self, child_id: str) -> None: | |
| """Type-safe method to add child""" | |
| if child_id not in self.children_ids: | |
| self.children_ids.append(child_id) | |
| def remove_child(self, child_id: str) -> None: | |
| """Type-safe method to remove child""" | |
| if child_id in self.children_ids: | |
| self.children_ids.remove(child_id) | |
| def set_parent(self, parent_id: Optional[str]) -> None: | |
| """Type-safe method to set parent""" | |
| self.parent_id = parent_id | |
| def increment_depth(self) -> None: | |
| """Increment depth by 1""" | |
| self.depth += 1 | |
| class ChunkAST: | |
| symbol_type: Optional[ASTSymbolType] = None | |
| name: Optional[str] = None | |
| parent: Optional[str] = None | |
| docstring: Optional[str] = None | |
| decorators: List[str] = field(default_factory=list) | |
| imports: List[str] = field(default_factory=list) | |
| node_type: Optional[str] = None # Original AST node type | |
| class ChunkSpan: | |
| start_byte: Optional[int] = None | |
| end_byte: Optional[int] = None | |
| start_line: Optional[int] = None | |
| end_line: Optional[int] = None | |
| char_count: Optional[int] = None # Character count for quick reference | |
| class CodeChunk: | |
| chunk_id: str | |
| file_path: str | |
| language: str | |
| chunk_type: ChunkType # ✅ Now accepts ALL types | |
| code: str | |
| ast: ChunkAST | |
| span: ChunkSpan | |
| metadata: Dict = field(default_factory=dict) | |
| hierarchy: ChunkHierarchy = field(default_factory=ChunkHierarchy) | |