File size: 660 Bytes
ac33de7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | import json
import random
import sys
json_file_path = sys.argv[1]
# Load your full dataset (make sure it's a list of records)
with open(json_file_path, "r", encoding="utf-8") as f:
data = json.load(f)
# Shuffle and then split the data 80/20
random.shuffle(data)
split_index = int(len(data) * 0.8)
train_data = data[:split_index]
dev_data = data[split_index:]
# Save the train and dev JSON files
with open("train.json", "w", encoding="utf-8") as f:
json.dump(train_data, f, indent=2)
with open("dev.json", "w", encoding="utf-8") as f:
json.dump(dev_data, f, indent=2)
print(f"Train examples: {len(train_data)}, Dev examples: {len(dev_data)}")
|