"""Export validated public traces to a JSONL file.""" from __future__ import annotations import argparse import json import sys from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parents[1] if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) from src.models.schema import TraceRecord DEFAULT_INPUT_DIR = Path("data/traces/samples") DEFAULT_OUTPUT_PATH = Path("data/traces/samples/objectverse_public_mock_traces.jsonl") def collect_traces(input_dir: Path = DEFAULT_INPUT_DIR) -> list[TraceRecord]: paths = sorted(input_dir.glob("*.json")) if not paths: raise FileNotFoundError(f"No trace JSON files found under {input_dir}") traces: list[TraceRecord] = [] for path in paths: payload = json.loads(path.read_text(encoding="utf-8")) traces.append(TraceRecord.model_validate(payload)) return traces def export_trace_jsonl( input_dir: Path = DEFAULT_INPUT_DIR, output_path: Path = DEFAULT_OUTPUT_PATH, ) -> int: traces = collect_traces(input_dir) output_path.parent.mkdir(parents=True, exist_ok=True) lines = [ json.dumps(trace.model_dump(mode="json"), ensure_ascii=False, sort_keys=True) for trace in traces ] output_path.write_text("\n".join(lines) + "\n", encoding="utf-8") return len(traces) def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--input-dir", type=Path, default=DEFAULT_INPUT_DIR) parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT_PATH) return parser.parse_args() def main() -> None: args = _parse_args() count = export_trace_jsonl(args.input_dir, args.output) print(f"exported {count} traces to {args.output}") if __name__ == "__main__": main()