Spaces:
Running on Zero
Running on Zero
File size: 1,822 Bytes
bc02199 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | """Export validated public traces to a JSONL file."""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from src.models.schema import TraceRecord
DEFAULT_INPUT_DIR = Path("data/traces/samples")
DEFAULT_OUTPUT_PATH = Path("data/traces/samples/objectverse_public_mock_traces.jsonl")
def collect_traces(input_dir: Path = DEFAULT_INPUT_DIR) -> list[TraceRecord]:
paths = sorted(input_dir.glob("*.json"))
if not paths:
raise FileNotFoundError(f"No trace JSON files found under {input_dir}")
traces: list[TraceRecord] = []
for path in paths:
payload = json.loads(path.read_text(encoding="utf-8"))
traces.append(TraceRecord.model_validate(payload))
return traces
def export_trace_jsonl(
input_dir: Path = DEFAULT_INPUT_DIR,
output_path: Path = DEFAULT_OUTPUT_PATH,
) -> int:
traces = collect_traces(input_dir)
output_path.parent.mkdir(parents=True, exist_ok=True)
lines = [
json.dumps(trace.model_dump(mode="json"), ensure_ascii=False, sort_keys=True)
for trace in traces
]
output_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
return len(traces)
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--input-dir", type=Path, default=DEFAULT_INPUT_DIR)
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT_PATH)
return parser.parse_args()
def main() -> None:
args = _parse_args()
count = export_trace_jsonl(args.input_dir, args.output)
print(f"exported {count} traces to {args.output}")
if __name__ == "__main__":
main()
|