File size: 1,651 Bytes
04c91be
f7a11cd
04c91be
 
 
 
 
f7a11cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""Publish Iris's runtime inference traces as a Hugging Face Dataset (optional).

This is an OPEN INFERENCE LOG of the running app — a privacy-safe operational
record (only the produced text + metadata, never raw images or audio; see
core/trace.py). It is complementary to the *coding-agent* build trace at
build-small-hackathon/iris-agent-trace, which is what earns the 'Sharing is
Caring' merit badge.

How to run (on Marcus's machine, with an HF token in the `build-small-hackathon` org):

    pip install datasets
    huggingface-cli login                 # or export HF_TOKEN=...
    IRIS_TRACE=1 python app.py            # use the app a bit to produce traces
    python scripts/publish_trace.py

It reads traces/iris_traces.jsonl and pushes a parquet dataset to
`build-small-hackathon/iris-traces`. Then add that dataset link to the README.
"""
import os
import sys

REPO = os.environ.get("IRIS_TRACE_REPO", "build-small-hackathon/iris-traces")
JSONL = os.environ.get("IRIS_TRACE_FILE", "traces/iris_traces.jsonl")


def main():
    if not os.path.exists(JSONL):
        sys.exit(f"No trace file at {JSONL!r}. Run the app with IRIS_TRACE=1 first.")
    try:
        from datasets import load_dataset
    except ImportError:
        sys.exit("Missing dependency: pip install datasets")
    ds = load_dataset("json", data_files=JSONL, split="train")
    print(f"Loaded {len(ds)} traces. Columns: {ds.column_names}")
    print(f"Pushing to https://huggingface.co/datasets/{REPO} ...")
    ds.push_to_hub(REPO, private=False)
    print("Done. Add the dataset link to the README (Sharing is Caring badge).")


if __name__ == "__main__":
    main()