Spaces:

mozzic
/

context-thread-agent

Sleeping

Upload src\parser.py with huggingface_hub

7b56cbf verified about 1 month ago

1.64 kB

	"""
	Jupyter notebook parsing utilities
	"""

	import json
	from typing import Dict, List, Any
	from pathlib import Path
	from src.models import Cell, CellType


	class NotebookParser:
	"""Parser for Jupyter notebooks."""

	def parse_file(self, file_path: str) -> Dict[str, Any]:
	"""Parse a Jupyter notebook file."""
	path = Path(file_path)

	if not path.exists():
	raise FileNotFoundError(f"Notebook file not found: {file_path}")

	with open(path, 'r', encoding='utf-8') as f:
	nb_data = json.load(f)

	cells = []
	for i, cell_data in enumerate(nb_data.get('cells', [])):
	cell = self._parse_cell(cell_data, i)
	cells.append(cell)

	return {
	'cells': cells,
	'metadata': nb_data.get('metadata', {}),
	'nbformat': nb_data.get('nbformat', 4)
	}

	def _parse_cell(self, cell_data: Dict[str, Any], index: int) -> Cell:
	"""Parse a single cell."""
	cell_type_str = cell_data.get('cell_type', 'code')
	cell_type = CellType(cell_type_str)

	source = ''.join(cell_data.get('source', []))

	# Generate cell ID
	cell_id = f"cell_{index}"

	# For code cells, include outputs
	outputs = []
	if cell_type == CellType.CODE:
	outputs = cell_data.get('outputs', [])

	return Cell(
	cell_id=cell_id,
	cell_type=cell_type,
	source=source,
	outputs=outputs
	)