iris / scripts /publish_trace.py
nextmarte's picture
docs: link the agent trace (Sharing is Caring) + clarify the inference log is separate
04c91be
"""Publish Iris's runtime inference traces as a Hugging Face Dataset (optional).
This is an OPEN INFERENCE LOG of the running app — a privacy-safe operational
record (only the produced text + metadata, never raw images or audio; see
core/trace.py). It is complementary to the *coding-agent* build trace at
build-small-hackathon/iris-agent-trace, which is what earns the 'Sharing is
Caring' merit badge.
How to run (on Marcus's machine, with an HF token in the `build-small-hackathon` org):
pip install datasets
huggingface-cli login # or export HF_TOKEN=...
IRIS_TRACE=1 python app.py # use the app a bit to produce traces
python scripts/publish_trace.py
It reads traces/iris_traces.jsonl and pushes a parquet dataset to
`build-small-hackathon/iris-traces`. Then add that dataset link to the README.
"""
import os
import sys
REPO = os.environ.get("IRIS_TRACE_REPO", "build-small-hackathon/iris-traces")
JSONL = os.environ.get("IRIS_TRACE_FILE", "traces/iris_traces.jsonl")
def main():
if not os.path.exists(JSONL):
sys.exit(f"No trace file at {JSONL!r}. Run the app with IRIS_TRACE=1 first.")
try:
from datasets import load_dataset
except ImportError:
sys.exit("Missing dependency: pip install datasets")
ds = load_dataset("json", data_files=JSONL, split="train")
print(f"Loaded {len(ds)} traces. Columns: {ds.column_names}")
print(f"Pushing to https://huggingface.co/datasets/{REPO} ...")
ds.push_to_hub(REPO, private=False)
print("Done. Add the dataset link to the README (Sharing is Caring badge).")
if __name__ == "__main__":
main()