import json import os from parser import parse_source_to_graph from datetime import datetime OUTPUT_FILE = "pystructure_dataset.jsonl" def create_dataset_entry(code): """ Parses code and appends a training example to the JSONL file. """ graph_data = parse_source_to_graph(code) if "error" in graph_data: return {"status": "error", "message": graph_data["error"]} vectors = [n['vector'] for n in graph_data['nodes']] entry = { "id": f"sample_{int(datetime.now().timestamp())}", "timestamp": datetime.now().isoformat(), "source_code": code, "graph_structure": { "nodes": [n['id'] for n in graph_data['nodes']], "edges": graph_data['connections'] }, "structural_vectors": vectors, "meta": { "node_count": len(graph_data['nodes']), "max_depth": max([n['level'] for n in graph_data['nodes']]) if graph_data['nodes'] else 0 } } # Append to JSONL file with open(OUTPUT_FILE, 'a') as f: f.write(json.dumps(entry) + '\n') return {"status": "success", "file": OUTPUT_FILE, "entry_id": entry['id']}