Spaces:

mozzic
/

context-thread-agent

Sleeping

context-thread-agent / src\parser.py

Upload src\parser.py with huggingface_hub

7b56cbf verified 20 days ago

1.64 kB

	"""
	Jupyter notebook parsing utilities
	"""

	import json
	from typing import Dict, List, Any
	from pathlib import Path
	from src.models import Cell, CellType


	class NotebookParser:
	"""Parser for Jupyter notebooks."""

	def parse_file(self, file_path: str) -> Dict[str, Any]:
	"""Parse a Jupyter notebook file."""
	path = Path(file_path)

	if not path.exists():
	raise FileNotFoundError(f"Notebook file not found: {file_path}")

	with open(path, 'r', encoding='utf-8') as f:
	nb_data = json.load(f)

	cells = []
	for i, cell_data in enumerate(nb_data.get('cells', [])):
	cell = self._parse_cell(cell_data, i)
	cells.append(cell)

	return {
	'cells': cells,
	'metadata': nb_data.get('metadata', {}),
	'nbformat': nb_data.get('nbformat', 4)
	}

	def _parse_cell(self, cell_data: Dict[str, Any], index: int) -> Cell:
	"""Parse a single cell."""
	cell_type_str = cell_data.get('cell_type', 'code')
	cell_type = CellType(cell_type_str)

	source = ''.join(cell_data.get('source', []))

	# Generate cell ID
	cell_id = f"cell_{index}"

	# For code cells, include outputs
	outputs = []
	if cell_type == CellType.CODE:
	outputs = cell_data.get('outputs', [])

	return Cell(
	cell_id=cell_id,
	cell_type=cell_type,
	source=source,
	outputs=outputs
	)