context-thread-agent / src\parser.py
mozzic's picture
Upload src\parser.py with huggingface_hub
7b56cbf verified
raw
history blame
1.64 kB
"""
Jupyter notebook parsing utilities
"""
import json
from typing import Dict, List, Any
from pathlib import Path
from src.models import Cell, CellType
class NotebookParser:
"""Parser for Jupyter notebooks."""
def parse_file(self, file_path: str) -> Dict[str, Any]:
"""Parse a Jupyter notebook file."""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"Notebook file not found: {file_path}")
with open(path, 'r', encoding='utf-8') as f:
nb_data = json.load(f)
cells = []
for i, cell_data in enumerate(nb_data.get('cells', [])):
cell = self._parse_cell(cell_data, i)
cells.append(cell)
return {
'cells': cells,
'metadata': nb_data.get('metadata', {}),
'nbformat': nb_data.get('nbformat', 4)
}
def _parse_cell(self, cell_data: Dict[str, Any], index: int) -> Cell:
"""Parse a single cell."""
cell_type_str = cell_data.get('cell_type', 'code')
cell_type = CellType(cell_type_str)
source = ''.join(cell_data.get('source', []))
# Generate cell ID
cell_id = f"cell_{index}"
# For code cells, include outputs
outputs = []
if cell_type == CellType.CODE:
outputs = cell_data.get('outputs', [])
return Cell(
cell_id=cell_id,
cell_type=cell_type,
source=source,
outputs=outputs
)