File size: 718 Bytes
fec0cbc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import json
import random
# Load data from the JSONL file
data = []
with open('output.jsonl', 'r') as f:
for line in f:
data.append(json.loads(line))
# Shuffle the data using random
random.seed(42)
random.shuffle(data)
# Split into test set (128 samples) and training set (remaining)
test_set = data[:112]
train_set = data[112:]
# Save the test set
with open('test_metadata.jsonl', 'w') as f:
for item in test_set:
json.dump(item, f)
f.write('\n')
# Save the training set
with open('train_metadata.jsonl', 'w') as f:
for item in train_set:
json.dump(item, f)
f.write('\n')
print(f"Test set size: {len(test_set)}")
print(f"Training set size: {len(train_set)}")
|