Spaces:
Sleeping
Sleeping
File size: 1,636 Bytes
7b56cbf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
"""
Jupyter notebook parsing utilities
"""
import json
from typing import Dict, List, Any
from pathlib import Path
from src.models import Cell, CellType
class NotebookParser:
"""Parser for Jupyter notebooks."""
def parse_file(self, file_path: str) -> Dict[str, Any]:
"""Parse a Jupyter notebook file."""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"Notebook file not found: {file_path}")
with open(path, 'r', encoding='utf-8') as f:
nb_data = json.load(f)
cells = []
for i, cell_data in enumerate(nb_data.get('cells', [])):
cell = self._parse_cell(cell_data, i)
cells.append(cell)
return {
'cells': cells,
'metadata': nb_data.get('metadata', {}),
'nbformat': nb_data.get('nbformat', 4)
}
def _parse_cell(self, cell_data: Dict[str, Any], index: int) -> Cell:
"""Parse a single cell."""
cell_type_str = cell_data.get('cell_type', 'code')
cell_type = CellType(cell_type_str)
source = ''.join(cell_data.get('source', []))
# Generate cell ID
cell_id = f"cell_{index}"
# For code cells, include outputs
outputs = []
if cell_type == CellType.CODE:
outputs = cell_data.get('outputs', [])
return Cell(
cell_id=cell_id,
cell_type=cell_type,
source=source,
outputs=outputs
) |