File size: 660 Bytes
ac33de7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import json
import random
import sys

json_file_path = sys.argv[1]

# Load your full dataset (make sure it's a list of records)
with open(json_file_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# Shuffle and then split the data 80/20
random.shuffle(data)
split_index = int(len(data) * 0.8)
train_data = data[:split_index]
dev_data = data[split_index:]

# Save the train and dev JSON files
with open("train.json", "w", encoding="utf-8") as f:
    json.dump(train_data, f, indent=2)

with open("dev.json", "w", encoding="utf-8") as f:
    json.dump(dev_data, f, indent=2)

print(f"Train examples: {len(train_data)}, Dev examples: {len(dev_data)}")