File size: 1,194 Bytes
2b890a2
 
 
 
 
a2c589c
 
 
2b890a2
a2c589c
2b890a2
a2c589c
2b890a2
a2c589c
 
2b890a2
a2c589c
 
 
 
 
 
 
 
 
 
 
 
 
 
2b890a2
a2c589c
2b890a2
a2c589c
 
 
2b890a2
a2c589c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import json
import os
from parser import parse_source_to_graph
from datetime import datetime

OUTPUT_FILE = "pystructure_dataset.jsonl"

def create_dataset_entry(code):
    """
    Parses code and appends a training example to the JSONL file.
    """
    graph_data = parse_source_to_graph(code)
    
    if "error" in graph_data:
        return {"status": "error", "message": graph_data["error"]}
        
    vectors = [n['vector'] for n in graph_data['nodes']]
    
    entry = {
        "id": f"sample_{int(datetime.now().timestamp())}",
        "timestamp": datetime.now().isoformat(),
        "source_code": code,
        "graph_structure": {
            "nodes": [n['id'] for n in graph_data['nodes']],
            "edges": graph_data['connections']
        },
        "structural_vectors": vectors,
        "meta": {
            "node_count": len(graph_data['nodes']),
            "max_depth": max([n['level'] for n in graph_data['nodes']]) if graph_data['nodes'] else 0
        }
    }

    # Append to JSONL file
    with open(OUTPUT_FILE, 'a') as f:
        f.write(json.dumps(entry) + '\n')
            
    return {"status": "success", "file": OUTPUT_FILE, "entry_id": entry['id']}