mozzic commited on
Commit
7b56cbf
·
verified ·
1 Parent(s): d788958

Upload src\parser.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src//parser.py +55 -0
src//parser.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Jupyter notebook parsing utilities
3
+ """
4
+
5
+ import json
6
+ from typing import Dict, List, Any
7
+ from pathlib import Path
8
+ from src.models import Cell, CellType
9
+
10
+
11
+ class NotebookParser:
12
+ """Parser for Jupyter notebooks."""
13
+
14
+ def parse_file(self, file_path: str) -> Dict[str, Any]:
15
+ """Parse a Jupyter notebook file."""
16
+ path = Path(file_path)
17
+
18
+ if not path.exists():
19
+ raise FileNotFoundError(f"Notebook file not found: {file_path}")
20
+
21
+ with open(path, 'r', encoding='utf-8') as f:
22
+ nb_data = json.load(f)
23
+
24
+ cells = []
25
+ for i, cell_data in enumerate(nb_data.get('cells', [])):
26
+ cell = self._parse_cell(cell_data, i)
27
+ cells.append(cell)
28
+
29
+ return {
30
+ 'cells': cells,
31
+ 'metadata': nb_data.get('metadata', {}),
32
+ 'nbformat': nb_data.get('nbformat', 4)
33
+ }
34
+
35
+ def _parse_cell(self, cell_data: Dict[str, Any], index: int) -> Cell:
36
+ """Parse a single cell."""
37
+ cell_type_str = cell_data.get('cell_type', 'code')
38
+ cell_type = CellType(cell_type_str)
39
+
40
+ source = ''.join(cell_data.get('source', []))
41
+
42
+ # Generate cell ID
43
+ cell_id = f"cell_{index}"
44
+
45
+ # For code cells, include outputs
46
+ outputs = []
47
+ if cell_type == CellType.CODE:
48
+ outputs = cell_data.get('outputs', [])
49
+
50
+ return Cell(
51
+ cell_id=cell_id,
52
+ cell_type=cell_type,
53
+ source=source,
54
+ outputs=outputs
55
+ )