Spaces:
Running on Zero
Running on Zero
| """Export validated public traces to a JSONL file.""" | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import sys | |
| from pathlib import Path | |
| PROJECT_ROOT = Path(__file__).resolve().parents[1] | |
| if str(PROJECT_ROOT) not in sys.path: | |
| sys.path.insert(0, str(PROJECT_ROOT)) | |
| from src.models.schema import TraceRecord | |
| DEFAULT_INPUT_DIR = Path("data/traces/samples") | |
| DEFAULT_OUTPUT_PATH = Path("data/traces/samples/objectverse_public_mock_traces.jsonl") | |
| def collect_traces(input_dir: Path = DEFAULT_INPUT_DIR) -> list[TraceRecord]: | |
| paths = sorted(input_dir.glob("*.json")) | |
| if not paths: | |
| raise FileNotFoundError(f"No trace JSON files found under {input_dir}") | |
| traces: list[TraceRecord] = [] | |
| for path in paths: | |
| payload = json.loads(path.read_text(encoding="utf-8")) | |
| traces.append(TraceRecord.model_validate(payload)) | |
| return traces | |
| def export_trace_jsonl( | |
| input_dir: Path = DEFAULT_INPUT_DIR, | |
| output_path: Path = DEFAULT_OUTPUT_PATH, | |
| ) -> int: | |
| traces = collect_traces(input_dir) | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| lines = [ | |
| json.dumps(trace.model_dump(mode="json"), ensure_ascii=False, sort_keys=True) | |
| for trace in traces | |
| ] | |
| output_path.write_text("\n".join(lines) + "\n", encoding="utf-8") | |
| return len(traces) | |
| def _parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description=__doc__) | |
| parser.add_argument("--input-dir", type=Path, default=DEFAULT_INPUT_DIR) | |
| parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT_PATH) | |
| return parser.parse_args() | |
| def main() -> None: | |
| args = _parse_args() | |
| count = export_trace_jsonl(args.input_dir, args.output) | |
| print(f"exported {count} traces to {args.output}") | |
| if __name__ == "__main__": | |
| main() | |