CodeMode / scripts /core /ingestion /chunk_schema.py
CodeMode Agent
Deploy CodeMode via Agent
463fc7e
"""
chunk_schema.py - UPDATED with enhanced hierarchy
"""
from typing import Dict, List, Optional, Literal, Union
from dataclasses import dataclass, field
# ✅ EXPANDED ChunkType to support ALL file types
ChunkType = Literal[
"module", # Python module
"class", # Python class
"function", # Python function
"method", # Python method
"context", # General context
"documentation", # Markdown/RST docs
"configuration", # Config files (JSON, YAML, TOML)
"notebook", # Jupyter notebook
"script", # Shell scripts
"dockerfile", # Docker files
"typescript", # TypeScript files
"javascript", # JavaScript files
"text", # Plain text
"imports", # Import statements
"unknown" # Unknown file type
]
# For AST symbol types
ASTSymbolType = Literal[
"module", "class", "function", "method", "context",
"documentation", "configuration", "notebook", "script",
"dockerfile", "typescript", "javascript", "text",
"imports",
"unknown"
]
# @dataclass
# class ChunkHierarchy:
# """Enhanced hierarchical relationship metadata"""
# parent_id: Optional[str] = None
# children_ids: List[str] = field(default_factory=list)
# depth: int = 0
# is_primary: bool = True
# is_extracted: bool = False
# lineage: List[str] = field(default_factory=list) # Path from root
# sibling_index: int = 0 # Position among siblings
@dataclass
class ChunkHierarchy:
"""Enhanced hierarchical relationship metadata"""
parent_id: Optional[str] = None
children_ids: List[str] = field(default_factory=list)
depth: int = 0
is_primary: bool = True
is_extracted: bool = False
lineage: List[str] = field(default_factory=list) # Path from root
sibling_index: int = 0 # Position among siblings
# Optional: Add methods for type-safe operations
def add_child(self, child_id: str) -> None:
"""Type-safe method to add child"""
if child_id not in self.children_ids:
self.children_ids.append(child_id)
def remove_child(self, child_id: str) -> None:
"""Type-safe method to remove child"""
if child_id in self.children_ids:
self.children_ids.remove(child_id)
def set_parent(self, parent_id: Optional[str]) -> None:
"""Type-safe method to set parent"""
self.parent_id = parent_id
def increment_depth(self) -> None:
"""Increment depth by 1"""
self.depth += 1
@dataclass
class ChunkAST:
symbol_type: Optional[ASTSymbolType] = None
name: Optional[str] = None
parent: Optional[str] = None
docstring: Optional[str] = None
decorators: List[str] = field(default_factory=list)
imports: List[str] = field(default_factory=list)
node_type: Optional[str] = None # Original AST node type
@dataclass
class ChunkSpan:
start_byte: Optional[int] = None
end_byte: Optional[int] = None
start_line: Optional[int] = None
end_line: Optional[int] = None
char_count: Optional[int] = None # Character count for quick reference
@dataclass
class CodeChunk:
chunk_id: str
file_path: str
language: str
chunk_type: ChunkType # ✅ Now accepts ALL types
code: str
ast: ChunkAST
span: ChunkSpan
metadata: Dict = field(default_factory=dict)
hierarchy: ChunkHierarchy = field(default_factory=ChunkHierarchy)