visual_pruning_kl / merge_json.py
ZachSun's picture
Upload merge_json.py with huggingface_hub
03a77f1 verified
raw
history blame contribute delete
835 Bytes
import os
import json
def merge_json_files(folder_path, output_file):
merged_data = []
# Iterate over all files in the folder
for filename in os.listdir(folder_path):
if filename.endswith(".jsonl"):
file_path = os.path.join(folder_path, filename)
print(file_path)
with open(file_path) as f:
data=[json.loads(line) for line in f]
merged_data.extend(data)
# Write the merged data to the output file
with open(output_file, "w", encoding="utf-8") as out_f:
json.dump(merged_data, out_f, indent=4, ensure_ascii=False)
print(f"Merged {len(merged_data)} entries into {output_file}")
# Example usage:
merge_json_files("/home/gs4288/visual_pruning_kl/data/playground", "/home/gs4288/visual_pruning_kl/data/playground/merged.json")