Spaces:
Paused
Paused
| import json | |
| import os | |
| from parser import parse_source_to_graph | |
| from datetime import datetime | |
| OUTPUT_FILE = "pystructure_dataset.jsonl" | |
| def create_dataset_entry(code): | |
| """ | |
| Parses code and appends a training example to the JSONL file. | |
| """ | |
| graph_data = parse_source_to_graph(code) | |
| if "error" in graph_data: | |
| return {"status": "error", "message": graph_data["error"]} | |
| vectors = [n['vector'] for n in graph_data['nodes']] | |
| entry = { | |
| "id": f"sample_{int(datetime.now().timestamp())}", | |
| "timestamp": datetime.now().isoformat(), | |
| "source_code": code, | |
| "graph_structure": { | |
| "nodes": [n['id'] for n in graph_data['nodes']], | |
| "edges": graph_data['connections'] | |
| }, | |
| "structural_vectors": vectors, | |
| "meta": { | |
| "node_count": len(graph_data['nodes']), | |
| "max_depth": max([n['level'] for n in graph_data['nodes']]) if graph_data['nodes'] else 0 | |
| } | |
| } | |
| # Append to JSONL file | |
| with open(OUTPUT_FILE, 'a') as f: | |
| f.write(json.dumps(entry) + '\n') | |
| return {"status": "success", "file": OUTPUT_FILE, "entry_id": entry['id']} |