AutoLLMAnnotation / tools /merge_json_outputs.py
ayh015's picture
Update modifed code
73df34b
import os
import json
import glob
import argparse
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--input-dir", type=str, required=True)
parser.add_argument("--pattern", type=str, required=True)
parser.add_argument("--output-path", type=str, required=True)
args = parser.parse_args()
input_pattern = os.path.join(args.input_dir, args.pattern)
input_paths = sorted(glob.glob(input_pattern))
if not input_paths:
raise FileNotFoundError(f"No files matched pattern: {input_pattern}")
merged = []
for path in input_paths:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, list):
raise ValueError(f"{path} is not a JSON list, got {type(data)}")
merged.extend(data)
output_dir = os.path.dirname(args.output_path)
if output_dir:
os.makedirs(output_dir, exist_ok=True)
with open(args.output_path, "w", encoding="utf-8") as f:
json.dump(merged, f, ensure_ascii=False, indent=2)
print(f"Merged {len(input_paths)} files into {args.output_path}")
print(f"Total items: {len(merged)}")
if __name__ == "__main__":
main()