ObjectverseDiary / scripts /export_traces.py
qqyule's picture
feat: add initial mock mvp
bc02199
"""Export validated public traces to a JSONL file."""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from src.models.schema import TraceRecord
DEFAULT_INPUT_DIR = Path("data/traces/samples")
DEFAULT_OUTPUT_PATH = Path("data/traces/samples/objectverse_public_mock_traces.jsonl")
def collect_traces(input_dir: Path = DEFAULT_INPUT_DIR) -> list[TraceRecord]:
paths = sorted(input_dir.glob("*.json"))
if not paths:
raise FileNotFoundError(f"No trace JSON files found under {input_dir}")
traces: list[TraceRecord] = []
for path in paths:
payload = json.loads(path.read_text(encoding="utf-8"))
traces.append(TraceRecord.model_validate(payload))
return traces
def export_trace_jsonl(
input_dir: Path = DEFAULT_INPUT_DIR,
output_path: Path = DEFAULT_OUTPUT_PATH,
) -> int:
traces = collect_traces(input_dir)
output_path.parent.mkdir(parents=True, exist_ok=True)
lines = [
json.dumps(trace.model_dump(mode="json"), ensure_ascii=False, sort_keys=True)
for trace in traces
]
output_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
return len(traces)
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--input-dir", type=Path, default=DEFAULT_INPUT_DIR)
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT_PATH)
return parser.parse_args()
def main() -> None:
args = _parse_args()
count = export_trace_jsonl(args.input_dir, args.output)
print(f"exported {count} traces to {args.output}")
if __name__ == "__main__":
main()