import json import random # Load data from the JSONL file data = [] with open('output.jsonl', 'r') as f: for line in f: data.append(json.loads(line)) # Shuffle the data using random random.seed(42) random.shuffle(data) # Split into test set (128 samples) and training set (remaining) test_set = data[:112] train_set = data[112:] # Save the test set with open('test_metadata.jsonl', 'w') as f: for item in test_set: json.dump(item, f) f.write('\n') # Save the training set with open('train_metadata.jsonl', 'w') as f: for item in train_set: json.dump(item, f) f.write('\n') print(f"Test set size: {len(test_set)}") print(f"Training set size: {len(train_set)}")