monsimas commited on
Commit
52b30dc
·
verified ·
1 Parent(s): 0e5aa07

Deploy ingestion server + website (app.py, scrub.py, index.html, Dockerfile)

Browse files
Files changed (6) hide show
  1. Dockerfile +15 -0
  2. README.md +45 -5
  3. app.py +178 -0
  4. index.html +750 -0
  5. requirements.txt +4 -0
  6. scrub.py +202 -0
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ # Hugging Face Spaces inject secrets as env vars at runtime (HF_TOKEN, DATASET_REPO, ...).
4
+ WORKDIR /app
5
+
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # App code + the website it serves at "/"
10
+ COPY app.py scrub.py index.html ./
11
+
12
+ # HF Spaces route traffic to port 7860.
13
+ EXPOSE 7860
14
+
15
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,50 @@
1
  ---
2
- title: Web
3
- emoji: 📉
4
- colorFrom: purple
5
- colorTo: yellow
6
  sdk: docker
 
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Trace Commons
3
+ emoji: 🧵
4
+ colorFrom: indigo
5
+ colorTo: gray
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
+ short_description: Open commons of anonymized coding-agent traces
10
  ---
11
 
12
+ # Trace Commons website + ingestion server
13
+
14
+ This Space serves two things from one origin:
15
+
16
+ - **`GET /`** — the Trace Commons website (`index.html`).
17
+ - **`POST /donate`** — the anonymous ingestion endpoint used by the
18
+ [`donate-trace`](https://github.com/trace-commons/donate-trace) skill.
19
+
20
+ A donation is a single, already-locally-scrubbed coding-agent session. The
21
+ server re-runs the **exact same** deterministic scrubber (`scrub.py`, kept
22
+ byte-identical to the skill's) as a backstop, refuses anything that still trips
23
+ a high-confidence secret pattern, and opens a **pull request** (never a direct
24
+ push) to the dataset under a project-owned token. Contributors need no Hugging
25
+ Face account.
26
+
27
+ ## Endpoints
28
+
29
+ | Method | Path | Purpose |
30
+ |--------|------------|------------------------------------------------------|
31
+ | GET | `/` | The website |
32
+ | GET | `/health` | `{configured, dataset}` — `configured` is true once secrets are set |
33
+ | POST | `/donate` | Accept a scrubbed trace, backstop-scrub, open a PR |
34
+
35
+ ## Configuration (Space secrets)
36
+
37
+ | Secret | Required | Meaning |
38
+ |-----------------|----------|-----------------------------------------------------|
39
+ | `HF_TOKEN` | yes | Write token for the project bot account |
40
+ | `DATASET_REPO` | yes | e.g. `trace-commons/agent-traces` |
41
+ | `MAX_BYTES` | no | Max payload size (default 5,000,000) |
42
+ | `RATE_PER_HOUR` | no | Donations per IP per hour (default 20) |
43
+
44
+ If `HF_TOKEN`/`DATASET_REPO` are unset, `/donate` validates and scrubs the
45
+ payload but returns `503 validated_not_published` instead of opening a PR.
46
+
47
+ ## Dataset
48
+
49
+ Donations land in [`trace-commons/agent-traces`](https://huggingface.co/datasets/trace-commons/agent-traces)
50
+ under `sessions/<harness>/<filename>`, licensed **CC-BY-4.0**.
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Trace Commons ingestion server.
3
+
4
+ Receives anonymous donations from the donate-trace skill, re-runs the same
5
+ deterministic scrubber as a backstop, and opens a pull request to the dataset
6
+ under a single project-owned token. Contributors need no Hugging Face account.
7
+
8
+ Designed to run as a Hugging Face Space (Docker SDK) or any host that can keep
9
+ a secret. Set these as Space secrets / environment variables:
10
+
11
+ HF_TOKEN write-scoped token for the project bot account (required)
12
+ DATASET_REPO e.g. "trace-commons/agent-traces" (required)
13
+ MAX_BYTES max accepted payload size (optional, default 5_000_000)
14
+ RATE_PER_HOUR donations allowed per IP per hour (optional, default 20)
15
+
16
+ This is intentionally small. The skill already scrubbed and the user already
17
+ reviewed; the server's job is to never trust the client, re-scrub as a
18
+ backstop, refuse anything that still trips the scrubber, and submit.
19
+ """
20
+
21
+ import io
22
+ import os
23
+ import re
24
+ import time
25
+ import json
26
+ import uuid
27
+ import pathlib
28
+ from collections import defaultdict, deque
29
+
30
+ from fastapi import FastAPI, Request
31
+ from fastapi.responses import JSONResponse, FileResponse, HTMLResponse
32
+ from fastapi.middleware.cors import CORSMiddleware
33
+
34
+ from scrub import scrub_text # the exact same scrubber the skill runs
35
+
36
+ HF_TOKEN = os.environ.get("HF_TOKEN")
37
+ DATASET_REPO = os.environ.get("DATASET_REPO")
38
+ MAX_BYTES = int(os.environ.get("MAX_BYTES", "5000000"))
39
+ RATE_PER_HOUR = int(os.environ.get("RATE_PER_HOUR", "20"))
40
+
41
+ VALID_HARNESS = {"claude_code", "codex", "pi", "opencode"}
42
+ SAFE_FILENAME = re.compile(r"^[A-Za-z0-9._\-]{1,200}$")
43
+
44
+ app = FastAPI(title="Trace Commons ingestion")
45
+ app.add_middleware(
46
+ CORSMiddleware, allow_origins=["*"], allow_methods=["POST", "GET"], allow_headers=["*"]
47
+ )
48
+
49
+ # --- simple in-memory rate limiting (per IP, sliding hour) ------------------
50
+ # For a single-process Space this is enough. Behind multiple replicas, move
51
+ # this to a shared store.
52
+ _hits = defaultdict(deque)
53
+
54
+
55
+ def _rate_ok(ip):
56
+ now = time.time()
57
+ window = _hits[ip]
58
+ while window and now - window[0] > 3600:
59
+ window.popleft()
60
+ if len(window) >= RATE_PER_HOUR:
61
+ return False
62
+ window.append(now)
63
+ return True
64
+
65
+
66
+ SITE_FILE = pathlib.Path(__file__).parent / "index.html"
67
+
68
+
69
+ @app.get("/", response_class=HTMLResponse)
70
+ def home():
71
+ """Serve the Trace Commons website."""
72
+ if SITE_FILE.exists():
73
+ return FileResponse(str(SITE_FILE))
74
+ return HTMLResponse("<h1>Trace Commons</h1><p>Site file not found.</p>", status_code=200)
75
+
76
+
77
+ @app.get("/health")
78
+ def health():
79
+ configured = bool(HF_TOKEN and DATASET_REPO)
80
+ return {
81
+ "service": "trace-commons-ingestion",
82
+ "configured": configured,
83
+ "dataset": DATASET_REPO or "(unset)",
84
+ }
85
+
86
+
87
+ @app.post("/donate")
88
+ async def donate(request: Request):
89
+ ip = request.client.host if request.client else "unknown"
90
+ if not _rate_ok(ip):
91
+ return JSONResponse(
92
+ {"error": "rate_limited", "detail": "Too many donations from this address this hour."},
93
+ status_code=429,
94
+ )
95
+
96
+ body = await request.body()
97
+ if len(body) > MAX_BYTES:
98
+ return JSONResponse(
99
+ {"error": "too_large", "detail": f"Payload exceeds {MAX_BYTES} bytes."},
100
+ status_code=413,
101
+ )
102
+
103
+ try:
104
+ data = json.loads(body)
105
+ except json.JSONDecodeError:
106
+ return JSONResponse({"error": "bad_json"}, status_code=400)
107
+
108
+ harness = data.get("harness")
109
+ filename = data.get("filename")
110
+ consent = data.get("consent")
111
+ trace = data.get("trace")
112
+
113
+ # --- validation ---------------------------------------------------------
114
+ if harness not in VALID_HARNESS:
115
+ return JSONResponse({"error": "bad_harness", "detail": f"harness must be one of {sorted(VALID_HARNESS)}"}, status_code=400)
116
+ if not isinstance(trace, str) or not trace.strip():
117
+ return JSONResponse({"error": "empty_trace"}, status_code=400)
118
+ if consent is not True:
119
+ return JSONResponse({"error": "no_consent", "detail": "consent must be true; the contributor must agree to open publication."}, status_code=400)
120
+ if not filename or not SAFE_FILENAME.match(filename):
121
+ # generate a safe one rather than trusting client input
122
+ filename = f"{uuid.uuid4().hex}.jsonl"
123
+
124
+ # --- backstop scrub: never trust the client ----------------------------
125
+ cleaned, report = scrub_text(trace, harness)
126
+ # The skill should have already removed everything. If the backstop still
127
+ # finds high-confidence secrets, refuse: something slipped through.
128
+ secret_kinds = {k: v for k, v in report["redactions"].items()
129
+ if k not in ("home_path", "email")}
130
+ if secret_kinds:
131
+ return JSONResponse(
132
+ {
133
+ "error": "secrets_found",
134
+ "detail": "The server's backstop scrubber found secrets the client should have removed. Donation rejected.",
135
+ "found": secret_kinds,
136
+ },
137
+ status_code=422,
138
+ )
139
+
140
+ if not HF_TOKEN or not DATASET_REPO:
141
+ # Not yet configured — accept-validate but don't pretend to publish.
142
+ return JSONResponse(
143
+ {
144
+ "status": "validated_not_published",
145
+ "detail": "Server is not yet configured with a dataset target. Trace passed all checks but was not published.",
146
+ "redactions": report["redactions"],
147
+ },
148
+ status_code=503,
149
+ )
150
+
151
+ # --- open the PR on the contributor's behalf ---------------------------
152
+ try:
153
+ pr_url = _open_pr(cleaned, harness, filename)
154
+ except Exception as e: # noqa: BLE001 — surface a clean message to the skill
155
+ return JSONResponse({"error": "publish_failed", "detail": str(e)}, status_code=502)
156
+
157
+ return {"status": "submitted", "pr_url": pr_url, "path": f"sessions/{harness}/{filename}"}
158
+
159
+
160
+ def _open_pr(cleaned_text, harness, filename):
161
+ """Open a PR to the dataset with the cleaned trace, under the project token."""
162
+ from huggingface_hub import HfApi, CommitOperationAdd
163
+
164
+ api = HfApi(token=HF_TOKEN)
165
+ op = CommitOperationAdd(
166
+ path_in_repo=f"sessions/{harness}/{filename}",
167
+ path_or_fileobj=io.BytesIO(cleaned_text.encode("utf-8")),
168
+ )
169
+ commit = api.create_commit(
170
+ repo_id=DATASET_REPO,
171
+ repo_type="dataset",
172
+ operations=[op],
173
+ commit_message=f"Donate {harness} trace ({filename})",
174
+ commit_description="Anonymous donation via Trace Commons ingestion server.",
175
+ create_pr=True,
176
+ )
177
+ # create_commit returns an object whose pr_url is set when create_pr=True
178
+ return getattr(commit, "pr_url", None) or str(commit)
index.html ADDED
@@ -0,0 +1,750 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Trace Commons · An open dataset for code agent traces</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com">
8
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
+ <link href="https://fonts.googleapis.com/css2?family=Fraunces:opsz,wght@9..144,400;9..144,500;9..144,600&family=Spline+Sans:wght@400;500;600&family=Spline+Sans+Mono:wght@400;500;600&display=swap" rel="stylesheet">
10
+ <style>
11
+ :root{
12
+ --ink:#0e1c1b; /* deep archival teal-black */
13
+ --surface:#0f201f;
14
+ --panel:#13302d;
15
+ --panel-edge:#1e423d;
16
+ --paper-pure:#f6f1e7;
17
+ --paper-dim:#cdbfa8;
18
+ --signal:#e8743b; /* restrained ember — the one bold color */
19
+ --signal-soft:#f0a878;
20
+ --leaf:#7fb09a; /* muted verdigris, commons/green nod */
21
+ --line:rgba(243,236,224,0.14);
22
+ --line-strong:rgba(243,236,224,0.28);
23
+ --mono:'Spline Sans Mono',ui-monospace,monospace;
24
+ --sans:'Spline Sans',system-ui,sans-serif;
25
+ --display:'Fraunces',Georgia,serif;
26
+ }
27
+ *{box-sizing:border-box;margin:0;padding:0}
28
+ html{scroll-behavior:smooth}
29
+ body{
30
+ background:var(--surface);
31
+ color:var(--paper-pure);
32
+ font-family:var(--sans);
33
+ font-size:17px;line-height:1.6;
34
+ -webkit-font-smoothing:antialiased;
35
+ overflow-x:hidden;
36
+ }
37
+ body{background:
38
+ radial-gradient(900px 500px at 82% -8%, rgba(232,116,59,0.10), transparent 60%),
39
+ radial-gradient(700px 500px at 5% 8%, rgba(127,176,154,0.07), transparent 55%),
40
+ var(--surface);
41
+ }
42
+ .wrap{max-width:1080px;margin:0 auto;padding:0 28px}
43
+ a{color:inherit}
44
+ ::selection{background:var(--signal);color:var(--ink)}
45
+
46
+ /* ---------- nav ---------- */
47
+ nav{
48
+ display:flex;align-items:center;justify-content:space-between;
49
+ padding:22px 28px;max-width:1080px;margin:0 auto;
50
+ position:relative;z-index:5;
51
+ }
52
+ .brand{display:flex;align-items:center;gap:11px;font-family:var(--mono);
53
+ font-weight:500;letter-spacing:.02em;font-size:15px}
54
+ .brand .mark{
55
+ width:24px;height:24px;border:1.5px solid var(--signal);border-radius:4px;
56
+ display:grid;place-items:center;position:relative;flex:none;
57
+ }
58
+ .brand .mark::before{content:"";position:absolute;inset:5px 4px;
59
+ background:
60
+ linear-gradient(var(--signal),var(--signal)) 0 0/100% 1.5px no-repeat,
61
+ linear-gradient(var(--leaf),var(--leaf)) 0 4px/70% 1.5px no-repeat,
62
+ linear-gradient(var(--paper-dim),var(--paper-dim)) 0 8px/85% 1.5px no-repeat;
63
+ }
64
+ .nav-right{display:flex;gap:18px;align-items:center}
65
+ .nav-dataset{font-family:var(--mono);font-size:14px;color:var(--paper-dim);text-decoration:none;transition:color .2s}
66
+ .nav-dataset:hover{color:var(--paper-pure)}
67
+ .nav-cta{padding:10px 18px;font-size:14px}
68
+ @media(max-width:520px){.nav-dataset{display:none}}
69
+
70
+ /* ---------- hero ---------- */
71
+ header{padding:60px 0 30px;position:relative}
72
+ .eyebrow{
73
+ font-family:var(--mono);font-size:13px;letter-spacing:.16em;
74
+ text-transform:uppercase;color:var(--leaf);margin-bottom:26px;
75
+ display:flex;align-items:center;gap:12px;
76
+ }
77
+ .eyebrow::before{content:"";width:34px;height:1px;background:var(--leaf);opacity:.6}
78
+ h1{
79
+ font-family:var(--display);font-weight:400;
80
+ font-size:clamp(44px,7.2vw,82px);line-height:1.02;
81
+ letter-spacing:-0.02em;margin-bottom:26px;
82
+ font-optical-sizing:auto;
83
+ }
84
+ h1 em{font-style:italic;color:var(--signal-soft)}
85
+ .lede{
86
+ font-size:clamp(18px,2.1vw,21px);line-height:1.55;
87
+ color:var(--paper-dim);max-width:580px;margin-bottom:38px;
88
+ }
89
+ .lede strong{color:var(--paper-pure);font-weight:500}
90
+ .cta-row{display:flex;gap:14px;flex-wrap:wrap;align-items:center}
91
+ .btn{
92
+ font-family:var(--mono);font-size:15px;font-weight:500;
93
+ padding:14px 22px;border-radius:8px;text-decoration:none;
94
+ display:inline-flex;align-items:center;gap:10px;cursor:pointer;
95
+ border:1px solid transparent;transition:transform .15s, background .2s, border-color .2s;
96
+ }
97
+ .btn-primary{background:var(--signal);color:var(--ink)}
98
+ .btn-primary:hover{transform:translateY(-2px);background:var(--signal-soft)}
99
+ .btn-ghost{border-color:var(--line-strong);color:var(--paper-pure)}
100
+ .btn-ghost:hover{border-color:var(--paper-dim);transform:translateY(-2px)}
101
+
102
+ /* ---------- hero install command ---------- */
103
+ .hero-install{max-width:560px}
104
+ .hero-code{
105
+ padding:16px 16px 16px 18px;font-size:14px;
106
+ border-color:var(--panel-edge);
107
+ box-shadow:0 16px 40px -24px rgba(0,0,0,.7);
108
+ }
109
+ .hero-code code{overflow-x:auto;scrollbar-width:none}
110
+ .hero-code code::-webkit-scrollbar{display:none}
111
+ .hero-code .copy{font-size:13px;padding:8px 15px}
112
+ .hero-links{display:flex;flex-direction:column;gap:10px;align-items:flex-start;margin-top:18px}
113
+ .hero-install-link{
114
+ display:inline-block;font-family:var(--mono);font-size:13.5px;
115
+ color:var(--paper-dim);text-decoration:none;border-bottom:1px solid var(--line);
116
+ padding-bottom:2px;transition:color .2s,border-color .2s;
117
+ }
118
+ .hero-install-link:hover{color:var(--paper-pure);border-color:var(--paper-dim)}
119
+ .oss-pledge{
120
+ display:flex;gap:11px;align-items:flex-start;margin-top:22px;max-width:520px;
121
+ border:1px solid rgba(127,176,154,.32);border-radius:10px;padding:13px 15px;
122
+ background:rgba(127,176,154,.06);font-size:14px;line-height:1.5;color:var(--paper-pure);
123
+ }
124
+ .oss-pledge .oss-ic{color:var(--leaf);flex:none;font-size:12px;margin-top:3px}
125
+
126
+ /* ---------- hero ledger ---------- */
127
+ .hero-grid{display:grid;grid-template-columns:1.05fr .95fr;gap:54px;align-items:start}
128
+ @media(max-width:860px){.hero-grid{grid-template-columns:1fr;gap:40px}}
129
+ .ledger{
130
+ background:linear-gradient(180deg,var(--panel),var(--surface));
131
+ border:1px solid var(--panel-edge);border-radius:14px;
132
+ font-family:var(--mono);font-size:13px;overflow:hidden;
133
+ box-shadow:0 30px 60px -30px rgba(0,0,0,.6);
134
+ }
135
+ .ledger-head{
136
+ display:flex;align-items:center;gap:8px;padding:13px 16px;
137
+ border-bottom:1px solid var(--line);color:var(--paper-dim);
138
+ font-size:12px;letter-spacing:.04em;
139
+ }
140
+ .dot{width:9px;height:9px;border-radius:50%;background:var(--panel-edge)}
141
+ .ledger-head .ttl{margin-left:6px}
142
+ .ledger-head .live{margin-left:auto;display:flex;align-items:center;gap:7px;color:var(--leaf)}
143
+ .ledger-head .live::before{content:"";width:7px;height:7px;border-radius:50%;
144
+ background:var(--leaf);box-shadow:0 0 0 0 var(--leaf);animation:pulse 2.2s infinite}
145
+ @keyframes pulse{0%{box-shadow:0 0 0 0 rgba(127,176,154,.5)}70%{box-shadow:0 0 0 7px rgba(127,176,154,0)}100%{box-shadow:0 0 0 0 rgba(127,176,154,0)}}
146
+ .entry{
147
+ display:grid;grid-template-columns:auto 1fr auto;gap:14px;align-items:center;
148
+ padding:11px 16px;border-bottom:1px solid var(--line);
149
+ opacity:0;transform:translateY(6px);animation:enter .5s forwards;
150
+ }
151
+ @keyframes enter{to{opacity:1;transform:none}}
152
+ .entry .seq{color:var(--panel-edge)}
153
+ .entry .who{color:var(--paper-pure)}
154
+ .entry .who span{color:var(--paper-dim)}
155
+ .entry .tag{
156
+ font-size:11px;padding:3px 8px;border-radius:20px;border:1px solid var(--line-strong);
157
+ color:var(--leaf);letter-spacing:.03em;
158
+ }
159
+ .entry .tag.cc{color:var(--signal-soft)}
160
+ .ledger-foot{padding:13px 16px;color:var(--panel-edge);font-size:12px;display:flex;justify-content:space-between}
161
+
162
+ /* ---------- counter band ---------- */
163
+ .counter{
164
+ display:grid;grid-template-columns:repeat(3,1fr);gap:1px;
165
+ background:var(--line);border:1px solid var(--line);border-radius:14px;
166
+ overflow:hidden;margin:64px 0 0;
167
+ }
168
+ @media(max-width:640px){.counter{grid-template-columns:1fr}}
169
+ .stat{background:var(--surface);padding:26px 28px}
170
+ .stat .n{font-family:var(--display);font-size:40px;font-weight:500;letter-spacing:-.02em;line-height:1}
171
+ .stat .n .unit{font-size:18px;color:var(--paper-dim);font-family:var(--mono);margin-left:4px}
172
+ .stat .l{font-family:var(--mono);font-size:12.5px;letter-spacing:.06em;text-transform:uppercase;
173
+ color:var(--paper-dim);margin-top:12px}
174
+ .stat.is-signal .n{color:var(--signal-soft)}
175
+
176
+ /* ---------- section scaffold ---------- */
177
+ section{padding:90px 0;position:relative}
178
+ .sec-label{font-family:var(--mono);font-size:13px;letter-spacing:.14em;text-transform:uppercase;
179
+ color:var(--leaf);margin-bottom:18px;display:flex;align-items:center;gap:12px}
180
+ .sec-label .ix{color:var(--panel-edge)}
181
+ h2{font-family:var(--display);font-weight:400;font-size:clamp(30px,4.4vw,46px);
182
+ line-height:1.08;letter-spacing:-.018em;margin-bottom:18px;max-width:16ch}
183
+ h2 em{font-style:italic;color:var(--signal-soft)}
184
+ .sec-intro{color:var(--paper-dim);max-width:560px;font-size:18px;margin-bottom:46px}
185
+
186
+ /* ---------- why / argument ---------- */
187
+ .why-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:22px}
188
+ @media(max-width:860px){.why-grid{grid-template-columns:1fr}}
189
+ .card{
190
+ border:1px solid var(--line);border-radius:13px;padding:28px 26px;
191
+ background:linear-gradient(180deg,rgba(19,48,45,.4),transparent);
192
+ transition:border-color .25s,transform .25s;
193
+ }
194
+ .card:hover{border-color:var(--line-strong);transform:translateY(-3px)}
195
+ .card .ic{width:38px;height:38px;border-radius:9px;border:1px solid var(--panel-edge);
196
+ display:grid;place-items:center;margin-bottom:20px;color:var(--signal-soft);font-family:var(--mono)}
197
+ .card h3{font-family:var(--display);font-size:22px;font-weight:500;margin-bottom:10px;letter-spacing:-.01em}
198
+ .card p{font-size:15.5px;color:var(--paper-dim);line-height:1.55}
199
+ .inline-link{color:var(--signal-soft);text-decoration:none;border-bottom:1px solid rgba(232,116,59,.4);transition:border-color .2s}
200
+ .inline-link:hover{border-color:var(--signal-soft)}
201
+ .why-foot{margin-top:30px;font-family:var(--mono);font-size:13px;color:var(--paper-dim);
202
+ border-left:2px solid var(--leaf);padding-left:16px;max-width:620px;line-height:1.55}
203
+
204
+ /* ---------- install ---------- */
205
+ .install-shell{
206
+ border:1px solid var(--panel-edge);border-radius:16px;overflow:hidden;
207
+ background:linear-gradient(180deg,var(--panel),var(--surface));
208
+ }
209
+ .tabs{display:flex;border-bottom:1px solid var(--line);font-family:var(--mono);font-size:14px;flex-wrap:wrap}
210
+ .tab{padding:15px 22px;cursor:pointer;color:var(--paper-dim);border-right:1px solid var(--line);
211
+ transition:color .2s,background .2s;user-select:none;position:relative}
212
+ .tab:hover{color:var(--paper-pure)}
213
+ .tab.active{color:var(--paper-pure);background:rgba(232,116,59,.07)}
214
+ .tab.active::after{content:"";position:absolute;left:0;right:0;bottom:-1px;height:2px;background:var(--signal)}
215
+ .panel-body{padding:30px 30px 34px}
216
+ .step{display:flex;gap:18px;padding:16px 0;border-bottom:1px solid var(--line)}
217
+ .step:last-child{border-bottom:none}
218
+ .step .num{font-family:var(--mono);color:var(--signal-soft);font-size:14px;flex:none;width:26px;
219
+ border:1px solid var(--panel-edge);height:26px;border-radius:6px;display:grid;place-items:center}
220
+ .step .body{flex:1}
221
+ .step .body h4{font-size:16px;font-weight:600;margin-bottom:9px}
222
+ .step .body p{font-size:14.5px;color:var(--paper-dim);margin-bottom:12px}
223
+ .code{
224
+ display:flex;align-items:center;gap:14px;background:var(--ink);
225
+ border:1px solid var(--line);border-radius:9px;padding:13px 15px;
226
+ font-family:var(--mono);font-size:13.5px;color:var(--paper-pure);
227
+ overflow-x:auto;white-space:nowrap;
228
+ }
229
+ .code .pmt{color:var(--leaf);flex:none}
230
+ .code code{flex:1}
231
+ .copy{
232
+ flex:none;font-family:var(--mono);font-size:12px;color:var(--paper-dim);
233
+ border:1px solid var(--line-strong);background:none;border-radius:6px;
234
+ padding:6px 11px;cursor:pointer;transition:all .2s
235
+ }
236
+ .copy:hover{color:var(--ink);background:var(--signal);border-color:var(--signal)}
237
+ .copy.done{color:var(--ink);background:var(--leaf);border-color:var(--leaf)}
238
+ .install-note{font-size:13.5px;color:var(--paper-dim);margin-top:22px;font-family:var(--mono);
239
+ display:flex;gap:10px;align-items:flex-start}
240
+ .install-note .b{color:var(--leaf);flex:none}
241
+
242
+ /* ---------- agent target buttons ---------- */
243
+ .agent-targets{display:flex;flex-wrap:wrap;gap:8px;margin-top:16px;align-items:center}
244
+ .at-label{font-family:var(--mono);font-size:12px;color:var(--paper-dim);letter-spacing:.04em;margin-right:4px}
245
+ .agent-btn{
246
+ font-family:var(--mono);font-size:12.5px;color:var(--paper-dim);
247
+ border:1px solid var(--line-strong);background:none;border-radius:7px;
248
+ padding:7px 13px;cursor:pointer;transition:all .18s;
249
+ }
250
+ .agent-btn:hover{color:var(--paper-pure);border-color:var(--paper-dim)}
251
+ .agent-btn.active{background:var(--leaf);color:var(--ink);border-color:var(--leaf)}
252
+
253
+ /* ---------- pipeline strip ---------- */
254
+ .flow{display:grid;grid-template-columns:repeat(4,1fr);gap:0;margin-top:8px;
255
+ border:1px solid var(--line);border-radius:13px;overflow:hidden}
256
+ @media(max-width:760px){.flow{grid-template-columns:1fr}}
257
+ .flow-step{padding:24px 22px;border-right:1px solid var(--line);position:relative}
258
+ .flow-step:last-child{border-right:none}
259
+ @media(max-width:760px){.flow-step{border-right:none;border-bottom:1px solid var(--line)}}
260
+ .flow-step .fi{font-family:var(--mono);font-size:12px;color:var(--leaf);letter-spacing:.08em;margin-bottom:12px}
261
+ .flow-step h4{font-size:16px;font-weight:600;margin-bottom:7px;font-family:var(--display);font-weight:500}
262
+ .flow-step p{font-size:13.5px;color:var(--paper-dim);line-height:1.5}
263
+ .flow-step.local{background:rgba(127,176,154,.05)}
264
+ .flow-step.remote{background:rgba(232,116,59,.05)}
265
+
266
+ /* ---------- explorer ---------- */
267
+ .explorer{border:1px solid var(--panel-edge);border-radius:16px;overflow:hidden;
268
+ background:linear-gradient(180deg,var(--panel),var(--surface))}
269
+ .exp-head{display:flex;align-items:center;justify-content:space-between;padding:18px 22px;
270
+ border-bottom:1px solid var(--line);font-family:var(--mono);font-size:13px;color:var(--paper-dim);flex-wrap:wrap;gap:12px}
271
+ .exp-head .filters{display:flex;gap:8px}
272
+ .chip{padding:5px 12px;border:1px solid var(--line-strong);border-radius:20px;cursor:pointer;
273
+ color:var(--paper-dim);transition:all .2s;font-size:12px}
274
+ .chip.active{background:var(--paper-pure);color:var(--ink);border-color:var(--paper-pure)}
275
+ .exp-rows{max-height:340px;overflow-y:auto}
276
+ .erow{display:grid;grid-template-columns:90px 1fr auto auto;gap:16px;align-items:center;
277
+ padding:13px 22px;border-bottom:1px solid var(--line);font-family:var(--mono);font-size:13px}
278
+ @media(max-width:640px){.erow{grid-template-columns:1fr auto;gap:8px}.erow .em-hide{display:none}}
279
+ .erow:hover{background:rgba(243,236,224,.02)}
280
+ .erow .hash{color:var(--panel-edge)}
281
+ .erow .desc{color:var(--paper-pure);white-space:nowrap;overflow:hidden;text-overflow:ellipsis}
282
+ .erow .harness{color:var(--leaf)}
283
+ .erow .when{color:var(--paper-dim)}
284
+ .exp-foot{padding:16px 22px;text-align:center}
285
+
286
+ /* ---------- growth chart ---------- */
287
+ .growth{border:1px solid var(--panel-edge);border-radius:16px;overflow:hidden;
288
+ background:linear-gradient(180deg,var(--panel),var(--surface));margin-bottom:28px;padding:24px 26px 20px}
289
+ .growth-head{display:flex;justify-content:space-between;align-items:flex-start;gap:16px;flex-wrap:wrap;margin-bottom:18px}
290
+ .growth-label{font-family:var(--mono);font-size:12.5px;letter-spacing:.06em;text-transform:uppercase;color:var(--paper-dim)}
291
+ .growth-total{font-family:var(--display);font-size:38px;font-weight:500;letter-spacing:-.02em;color:var(--signal-soft);line-height:1.1;margin-top:6px}
292
+ .growth-unit{font-family:var(--mono);font-size:14px;color:var(--paper-dim);margin-left:8px;letter-spacing:0}
293
+ .growth-range{display:flex;gap:6px}
294
+ .rbtn{font-family:var(--mono);font-size:12.5px;color:var(--paper-dim);border:1px solid var(--line-strong);
295
+ background:none;border-radius:7px;padding:6px 13px;cursor:pointer;transition:all .18s}
296
+ .rbtn:hover{color:var(--paper-pure);border-color:var(--paper-dim)}
297
+ .rbtn.active{background:var(--paper-pure);color:var(--ink);border-color:var(--paper-pure)}
298
+ .growth-chart{position:relative;width:100%;height:300px}
299
+ .growth-chart svg{width:100%;height:100%;display:block;overflow:visible}
300
+ .growth-tip{position:absolute;pointer-events:none;opacity:0;transform:translate(-50%,-100%);
301
+ background:var(--ink);border:1px solid var(--panel-edge);border-radius:8px;padding:8px 11px;
302
+ font-family:var(--mono);font-size:12px;white-space:nowrap;transition:opacity .15s;color:var(--paper-pure);z-index:3}
303
+ .growth-tip .tipn{color:var(--signal-soft);font-weight:500}
304
+ .growth-tip .tipd{color:var(--paper-dim);font-size:11px;margin-top:2px}
305
+ .growth-axis{display:flex;justify-content:space-between;font-family:var(--mono);font-size:11.5px;
306
+ color:var(--panel-edge);margin-top:10px;letter-spacing:.03em}
307
+ .final{text-align:center;padding:100px 0 90px}
308
+ .final h2{margin:0 auto 22px;max-width:18ch}
309
+ .final .cta-row{justify-content:center}
310
+
311
+ footer{border-top:1px solid var(--line);padding:40px 0;color:var(--paper-dim);font-family:var(--mono);font-size:13px}
312
+ .foot-grid{display:flex;justify-content:space-between;align-items:center;gap:20px;flex-wrap:wrap}
313
+ .foot-links{display:flex;gap:22px}
314
+ .foot-links a{text-decoration:none;transition:color .2s}
315
+ .foot-links a:hover{color:var(--paper-pure)}
316
+
317
+ .placeholder-flag{
318
+ display:inline-block;font-family:var(--mono);font-size:10px;letter-spacing:.05em;
319
+ color:var(--signal-soft);border:1px solid rgba(232,116,59,.4);border-radius:4px;
320
+ padding:1px 6px;margin-left:8px;vertical-align:middle;text-transform:uppercase;
321
+ }
322
+ @media(prefers-reduced-motion:reduce){*{animation:none!important;transition:none!important}}
323
+ </style>
324
+ </head>
325
+ <body>
326
+
327
+ <nav>
328
+ <div class="brand"><span class="mark"></span>trace&nbsp;commons</div>
329
+ <div class="nav-right">
330
+ <a href="#" class="nav-dataset js-dataset">Dataset ↗</a>
331
+ <a href="#install" class="btn btn-primary nav-cta">Donate a trace</a>
332
+ </div>
333
+ </nav>
334
+
335
+ <header>
336
+ <div class="wrap">
337
+ <div class="hero-grid">
338
+ <div>
339
+ <div class="eyebrow">An open commons for code agent traces</div>
340
+ <h1>The data that trains coding models should belong to <em>everyone</em>.</h1>
341
+ <p class="lede">Every session you run with a coding agent teaches the next models how to write software. Right now that signal goes to a handful of companies. Donate yours instead, anonymized on your own machine, to an open dataset <strong>anyone can train on</strong>.</p>
342
+ <div class="hero-install">
343
+ <div class="code hero-code">
344
+ <span class="pmt">$</span><code id="heroCmd">npx skills add simonaszilinskas/donate-trace</code>
345
+ <button class="copy" data-copy="heroCmd">copy</button>
346
+ </div>
347
+ <div class="hero-links">
348
+ <a href="#install" class="hero-install-link">installs into any agent. see per-agent commands →</a>
349
+ <a href="#" class="hero-install-link js-dataset">or browse the dataset on Hugging Face ↗</a>
350
+ </div>
351
+ <div class="oss-pledge">
352
+ <span class="oss-ic">◆</span>
353
+ <span>Open-source work only. Donate traces from public, openly-licensed repositories. Never from private, proprietary, or client code.</span>
354
+ </div>
355
+ </div>
356
+ </div>
357
+
358
+ <div class="ledger" aria-label="Live donation ledger (sample data)">
359
+ <div class="ledger-head">
360
+ <span class="dot"></span><span class="dot"></span><span class="dot"></span>
361
+ <span class="ttl">commons.ledger</span>
362
+ <span class="live">recording</span>
363
+ </div>
364
+ <div id="ledgerRows"></div>
365
+ <div class="ledger-foot"><span>append-only</span><span>sample feed</span></div>
366
+ </div>
367
+ </div>
368
+ </div>
369
+ </header>
370
+
371
+ <!-- WHY -->
372
+ <section id="why">
373
+ <div class="wrap">
374
+ <div class="sec-label"><span class="ix">01</span> Why donate</div>
375
+ <h2>An open commons breaks the <em>cycle</em>.</h2>
376
+ <p class="sec-intro">Coding models get better on the data their makers can see. The more everyone leans on the same few tools, the bigger that head start gets, and the harder it becomes for anyone new to catch up. A shared, openly-licensed dataset is how that gap closes. It gives open models and independent labs the same kind of real-world data to learn from.</p>
377
+ <div class="why-grid">
378
+ <div class="card">
379
+ <div class="ic">⌘</div>
380
+ <h3>Level the training data</h3>
381
+ <p>A diff shows where you ended up. A trace shows how you got there. Pooled, those sessions hold the reasoning behind real outcomes, the kind of data that's currently locked inside a few companies.</p>
382
+ </div>
383
+ <div class="card">
384
+ <div class="ic">◇</div>
385
+ <h3>A commons, not a moat</h3>
386
+ <p>Not a private bucket, not a vendor's advantage. One openly-licensed <a href="#" class="inline-link js-dataset">dataset on Hugging Face</a> that any person, lab, or institution can download, study, and build on. It belongs to the community that fills it.</p>
387
+ </div>
388
+ <div class="card">
389
+ <div class="ic">⛉</div>
390
+ <h3>Cleaned before it leaves</h3>
391
+ <p>Anonymization runs locally, inside your agent. Paths, usernames, secrets, and personal data are stripped on your own machine, and you review what's left before anything gets sent.</p>
392
+ </div>
393
+ </div>
394
+ <p class="why-foot">Built in the open, for the open-source community — so the data that trains coding models can belong to everyone.</p>
395
+ </div>
396
+ </section>
397
+
398
+ <!-- INSTALL -->
399
+ <section id="install">
400
+ <div class="wrap">
401
+ <div class="sec-label"><span class="ix">02</span> Install</div>
402
+ <h2>One command. <em>Every</em> agent.</h2>
403
+ <p class="sec-intro">The skill follows the open Agent Skills standard, so one installer drops it into whatever coding agent you already use. No Hugging Face account required to donate. Run the universal command, or target a single agent below.</p>
404
+
405
+ <div class="install-shell">
406
+ <div class="panel-body">
407
+ <div class="step">
408
+ <div class="num">1</div>
409
+ <div class="body">
410
+ <h4>Add the skill</h4>
411
+ <p>Auto-detects your installed agents and installs to each. Works with Claude Code, Codex, pi, opencode, and 50+ others.</p>
412
+ <div class="code">
413
+ <span class="pmt">$</span><code id="installCmd">npx skills add simonaszilinskas/donate-trace</code>
414
+ <button class="copy" data-copy="installCmd">copy</button>
415
+ </div>
416
+ <div class="agent-targets">
417
+ <span class="at-label">target one agent:</span>
418
+ <button class="agent-btn active" data-agent="all" data-cmd="npx skills add simonaszilinskas/donate-trace">all detected</button>
419
+ <button class="agent-btn" data-agent="claude-code" data-cmd="npx skills add simonaszilinskas/donate-trace -a claude-code">Claude Code</button>
420
+ <button class="agent-btn" data-agent="codex" data-cmd="npx skills add simonaszilinskas/donate-trace -a codex">Codex</button>
421
+ <button class="agent-btn" data-agent="pi" data-cmd="npx skills add simonaszilinskas/donate-trace -a pi">pi</button>
422
+ <button class="agent-btn" data-agent="opencode" data-cmd="npx skills add simonaszilinskas/donate-trace -a opencode">opencode</button>
423
+ </div>
424
+ </div>
425
+ </div>
426
+ <div class="step">
427
+ <div class="num">2</div>
428
+ <div class="body">
429
+ <h4>Run it after a session</h4>
430
+ <p>Invoke the command after a session spent on open-source work. Most agents use <code style="color:var(--paper-pure)">/donate-trace</code>; pi uses <code style="color:var(--paper-pure)">/skill:donate-trace</code>. The skill will ask you to confirm the repository is public and openly licensed before it does anything.</p>
431
+ <div class="code">
432
+ <span class="pmt">›</span><code id="runCmd">/donate-trace</code>
433
+ <button class="copy" data-copy="runCmd">copy</button>
434
+ </div>
435
+ </div>
436
+ </div>
437
+ <div class="step">
438
+ <div class="num">3</div>
439
+ <div class="body">
440
+ <h4>Review, then send</h4>
441
+ <p>The skill shows you exactly what it removed and what it will contribute. Nothing is uploaded until you confirm. No Hugging Face account is needed. If you're logged in, you can attribute the donation to your own account; if not, it's submitted anonymously through the Trace Commons server. Either way it opens as a pull request a maintainer reviews before anything goes public.</p>
442
+ </div>
443
+ </div>
444
+ <div class="install-note">
445
+ <span class="b">↳</span>
446
+ <span>Prefer to let your agent do it? Paste the setup prompt and your agent installs the skill, then walks you through your first donation.
447
+ <button class="copy" data-copy="setupPrompt" style="margin-left:6px">copy setup prompt</button>
448
+ <span id="setupPrompt" style="display:none">Install the Trace Commons donation skill by running: npx skills add simonaszilinskas/donate-trace. Then, only if my current project is an open-source public repository, help me anonymize and donate my most recent session.</span>
449
+ </span>
450
+ </div>
451
+ </div>
452
+ </div>
453
+ </div>
454
+ </section>
455
+
456
+ <!-- EXPLORE -->
457
+ <section id="explore">
458
+ <div class="wrap">
459
+ <div class="sec-label"><span class="ix">03</span> Explore the commons</div>
460
+ <h2>Browse what's been <em>donated</em>.</h2>
461
+ <p class="sec-intro">Every donation is one row in a single public dataset. Watch it grow below, filter the feed by agent, or open the whole thing on Hugging Face.</p>
462
+
463
+ <div class="growth">
464
+ <div class="growth-head">
465
+ <div>
466
+ <div class="growth-label">Cumulative traces donated</div>
467
+ <div class="growth-total"><span id="growthTotal">12,480</span><span class="growth-unit">total</span></div>
468
+ </div>
469
+ <div class="growth-range" id="growthRange">
470
+ <button class="rbtn" data-r="30">30d</button>
471
+ <button class="rbtn active" data-r="90">90d</button>
472
+ <button class="rbtn" data-r="all">all</button>
473
+ </div>
474
+ </div>
475
+ <div class="growth-chart">
476
+ <svg id="growthSvg" viewBox="0 0 880 300" preserveAspectRatio="none" aria-label="Cumulative contributions over time"></svg>
477
+ <div class="growth-tip" id="growthTip"></div>
478
+ </div>
479
+ <div class="growth-axis" id="growthAxis"></div>
480
+ </div>
481
+
482
+ <div class="explorer">
483
+ <div class="exp-head">
484
+ <span>commons/agent-traces · <span style="color:var(--paper-pure)">recent donations</span></span>
485
+ <div class="filters" id="expFilters">
486
+ <span class="chip active" data-f="all">all</span>
487
+ <span class="chip" data-f="claude_code">claude</span>
488
+ <span class="chip" data-f="codex">codex</span>
489
+ <span class="chip" data-f="pi">pi</span>
490
+ <span class="chip" data-f="opencode">opencode</span>
491
+ </div>
492
+ </div>
493
+ <div class="exp-rows" id="expRows"></div>
494
+ <div class="exp-foot">
495
+ <a href="#" class="btn btn-ghost js-dataset">Open full dataset on Hugging Face ↗</a>
496
+ </div>
497
+ </div>
498
+ </div>
499
+ </section>
500
+
501
+ <!-- FINAL -->
502
+ <section class="final">
503
+ <div class="wrap">
504
+ <h2>The commons only exists if <em>we</em> build it.</h2>
505
+ <p class="sec-intro" style="margin:0 auto 38px">Open infrastructure doesn't just appear. People build it, one session at a time. Donate a trace and help keep the next coding models open to everyone.</p>
506
+ <div class="cta-row">
507
+ <a href="#install" class="btn btn-primary">Install the skill →</a>
508
+ <a href="#" class="btn btn-ghost js-dataset">Browse the dataset ↗</a>
509
+ </div>
510
+ </div>
511
+ </section>
512
+
513
+ <footer>
514
+ <div class="wrap foot-grid">
515
+ <div class="brand"><span class="mark"></span>trace&nbsp;commons</div>
516
+ <div class="foot-links">
517
+ <a href="#why">Why donate</a>
518
+ <a href="#install">Install</a>
519
+ <a href="#explore">Explore</a>
520
+ <a href="#" class="js-dataset">Dataset ↗</a>
521
+ <a href="https://github.com/simonaszilinskas/donate-trace" class="js-github" target="_blank" rel="noopener">GitHub ↗</a>
522
+ </div>
523
+ <div>Open data · public commons</div>
524
+ </div>
525
+ </footer>
526
+
527
+ <script>
528
+ /* ---- single source for the dataset URL: change DATASET_URL to point everywhere ---- */
529
+ const DATASET_URL = "https://huggingface.co/datasets/trace-commons/agent-traces";
530
+ const DATASET_API = "https://huggingface.co/api/datasets/trace-commons/agent-traces";
531
+ document.querySelectorAll('.js-dataset').forEach(a=>{
532
+ a.setAttribute('href', DATASET_URL);
533
+ if(DATASET_URL!=="#"){a.setAttribute('target','_blank');a.setAttribute('rel','noopener')}
534
+ });
535
+
536
+ /* ---- agent target buttons: swap displayed command ---- */
537
+ const targets=document.querySelector('.agent-targets');
538
+ if(targets){
539
+ targets.addEventListener('click',e=>{
540
+ const b=e.target.closest('.agent-btn'); if(!b)return;
541
+ targets.querySelectorAll('.agent-btn').forEach(x=>x.classList.remove('active'));
542
+ b.classList.add('active');
543
+ const cmd=b.dataset.cmd;
544
+ document.getElementById('installCmd').textContent=cmd;
545
+ const hero=document.getElementById('heroCmd'); if(hero)hero.textContent=cmd;
546
+ document.getElementById('runCmd').textContent = b.dataset.agent==='pi' ? '/skill:donate-trace' : '/donate-trace';
547
+ });
548
+ }
549
+
550
+ /* ---- copy buttons ---- */
551
+ document.querySelectorAll('.copy').forEach(b=>{
552
+ b.addEventListener('click',()=>{
553
+ const txt=document.getElementById(b.dataset.copy).textContent;
554
+ navigator.clipboard.writeText(txt).then(()=>{
555
+ const o=b.textContent;b.textContent='copied ✓';b.classList.add('done');
556
+ setTimeout(()=>{b.textContent=o;b.classList.remove('done')},1400);
557
+ });
558
+ });
559
+ });
560
+
561
+ /* ---- live data from the HF dataset (real tree + commit history) ---- */
562
+ let TRACES = []; // [{harness, file}] — actual donated session files
563
+ let SERIES = []; // [{t:Date, n}] cumulative donations from commit history
564
+ let currentRange = '90';
565
+ let currentFilter = 'all';
566
+ const rowsEl=document.getElementById('ledgerRows');
567
+
568
+ function harnessTag(h){return h==='claude_code'?'cc':''}
569
+ function shortName(file){const base=String(file).replace(/\.[^.]+$/,'');return base.length>12?base.slice(0,12):base;}
570
+
571
+ function renderLedger(){
572
+ if(!rowsEl)return;
573
+ rowsEl.innerHTML='';
574
+ if(!TRACES.length){
575
+ const el=document.createElement('div');el.className='entry';
576
+ el.innerHTML=`<span class="who">No donations yet — yours could be the first.</span>`;
577
+ rowsEl.appendChild(el);
578
+ return;
579
+ }
580
+ TRACES.slice(-5).reverse().forEach((t,i)=>{
581
+ const el=document.createElement('div');el.className='entry';
582
+ el.style.animationDelay=(i*0.08)+'s';
583
+ el.innerHTML=`<span class="seq">tr:${shortName(t.file)}</span>
584
+ <span class="who">anonymous <span>· donated session</span></span>
585
+ <span class="tag ${harnessTag(t.harness)}">${t.harness}</span>`;
586
+ rowsEl.appendChild(el);
587
+ });
588
+ }
589
+
590
+ async function loadDataset(){
591
+ try{
592
+ const [treeR, commitsR] = await Promise.all([
593
+ fetch(DATASET_API + "/tree/main?recursive=true"),
594
+ fetch(DATASET_API + "/commits/main")
595
+ ]);
596
+ if(treeR.ok){
597
+ const tree = await treeR.json();
598
+ TRACES = tree
599
+ .filter(f=>f.type==='file' && f.path.indexOf('sessions/')===0 && !f.path.endsWith('.gitkeep'))
600
+ .map(f=>{const p=f.path.split('/');return {harness:p[1], file:p.slice(2).join('/')};});
601
+ }
602
+ if(commitsR.ok){
603
+ const commits = await commitsR.json();
604
+ // count commits that add a trace: anonymous path -> "Donate ...",
605
+ // attributed path (hf upload) -> "Upload sessions/...". Exclude infra
606
+ // commits ("Create ... folder", "Add dataset card", README, etc.).
607
+ const dates = commits
608
+ .filter(c=>/^(donate|upload)\b/i.test(c.title||''))
609
+ .map(c=>new Date(c.date))
610
+ .sort((a,b)=>a-b);
611
+ let n=0; SERIES = dates.map(d=>({t:d, n:++n}));
612
+ }
613
+ }catch(e){/* offline / API down — keep the honest empty state */}
614
+ renderLedger();
615
+ draw(currentRange);
616
+ renderEx(currentFilter);
617
+ }
618
+
619
+ /* ---- growth chart: real cumulative donations from commit history ---- */
620
+ const svg=document.getElementById('growthSvg');
621
+ const tip=document.getElementById('growthTip');
622
+ const axisEl=document.getElementById('growthAxis');
623
+ const NS='http://www.w3.org/2000/svg';
624
+ const VW=880,VH=300,PAD=8;
625
+ let series=[];
626
+ function fmt(n){return n.toLocaleString()}
627
+ function fmtDate(d){return d.toLocaleDateString('en-US',{month:'short',day:'numeric'})}
628
+ function seriesForRange(range){
629
+ if(!SERIES.length) return [];
630
+ if(range==='all') return SERIES.slice();
631
+ const days=+range; const cutoff=new Date(); cutoff.setDate(cutoff.getDate()-days);
632
+ const within=SERIES.filter(p=>p.t>=cutoff);
633
+ return within.length>=2 ? within : SERIES.slice();
634
+ }
635
+ function drawEmpty(){
636
+ svg.innerHTML='';
637
+ axisEl.innerHTML='';
638
+ const t=document.createElementNS(NS,'text');
639
+ t.setAttribute('x',VW/2);t.setAttribute('y',VH/2);
640
+ t.setAttribute('text-anchor','middle');t.setAttribute('fill','rgba(243,236,224,0.35)');
641
+ t.setAttribute('font-size','15');
642
+ t.textContent = SERIES.length===1 ? 'First donation in — the commons has begun.' : 'No donations yet — be the first to contribute.';
643
+ svg.appendChild(t);
644
+ }
645
+ function draw(days){
646
+ document.getElementById('growthTotal').textContent=fmt(TRACES.length);
647
+ series = seriesForRange(days);
648
+ if(series.length<2){ drawEmpty(); return; }
649
+ svg.innerHTML='';
650
+ const xs=series.length-1;
651
+ const maxN=series[series.length-1].n, minN=series[0].n;
652
+ const X=i=>PAD + (i/xs)*(VW-2*PAD);
653
+ const Y=n=>VH-PAD - ((n-minN)/((maxN-minN)||1))*(VH-2*PAD-10);
654
+ // gradient + defs
655
+ const defs=document.createElementNS(NS,'defs');
656
+ defs.innerHTML=`<linearGradient id="gfill" x1="0" y1="0" x2="0" y2="1">
657
+ <stop offset="0%" stop-color="rgba(232,116,59,0.34)"/>
658
+ <stop offset="100%" stop-color="rgba(232,116,59,0)"/></linearGradient>`;
659
+ svg.appendChild(defs);
660
+ // gridlines
661
+ for(let g=0;g<=3;g++){
662
+ const y=PAD+ (g/3)*(VH-2*PAD-10);
663
+ const ln=document.createElementNS(NS,'line');
664
+ ln.setAttribute('x1',PAD);ln.setAttribute('x2',VW-PAD);
665
+ ln.setAttribute('y1',y);ln.setAttribute('y2',y);
666
+ ln.setAttribute('stroke','rgba(243,236,224,0.07)');ln.setAttribute('stroke-width','1');
667
+ svg.appendChild(ln);
668
+ }
669
+ // build path
670
+ let dLine='M '+X(0)+' '+Y(series[0].n);
671
+ series.forEach((p,i)=>{if(i>0)dLine+=' L '+X(i)+' '+Y(p.n)});
672
+ const dArea=dLine+` L ${X(xs)} ${VH-PAD} L ${X(0)} ${VH-PAD} Z`;
673
+ const area=document.createElementNS(NS,'path');
674
+ area.setAttribute('d',dArea);area.setAttribute('fill','url(#gfill)');
675
+ svg.appendChild(area);
676
+ const line=document.createElementNS(NS,'path');
677
+ line.setAttribute('d',dLine);line.setAttribute('fill','none');
678
+ line.setAttribute('stroke','var(--signal)');line.setAttribute('stroke-width','2.5');
679
+ line.setAttribute('stroke-linejoin','round');line.setAttribute('vector-effect','non-scaling-stroke');
680
+ svg.appendChild(line);
681
+ // draw-on animation
682
+ const len=line.getTotalLength();
683
+ line.style.strokeDasharray=len;line.style.strokeDashoffset=len;
684
+ line.getBoundingClientRect();
685
+ line.style.transition='stroke-dashoffset 1.1s ease-out';line.style.strokeDashoffset=0;
686
+ area.style.opacity=0;area.style.transition='opacity 1s ease-out .3s';requestAnimationFrame(()=>area.style.opacity=1);
687
+ // endpoint dot — sits exactly on the final line point
688
+ const dot=document.createElementNS(NS,'circle');
689
+ dot.setAttribute('cx',X(xs)-1.5);dot.setAttribute('cy',Y(maxN));dot.setAttribute('r','4');
690
+ dot.setAttribute('fill','var(--signal)');dot.setAttribute('stroke','var(--surface)');dot.setAttribute('stroke-width','2');
691
+ dot.setAttribute('vector-effect','non-scaling-stroke');
692
+ svg.appendChild(dot);
693
+ // axis labels
694
+ axisEl.innerHTML=`<span>${fmtDate(series[0].t)}</span><span>${fmtDate(series[(series.length/2)|0].t)}</span><span>${fmtDate(series[series.length-1].t)}</span>`;
695
+ // headline total stays the true trace count (set at top of draw)
696
+ // hover
697
+ svg._X=X;svg._Y=Y;svg._xs=xs;
698
+ }
699
+ function onMove(ev){
700
+ if(!series.length)return;
701
+ const rect=svg.getBoundingClientRect();
702
+ const rel=(ev.clientX-rect.left)/rect.width;
703
+ const i=Math.round(rel*svg._xs);
704
+ if(i<0||i>svg._xs){tip.style.opacity=0;return}
705
+ const p=series[i];
706
+ const px=(svg._X(i)/VW)*rect.width;
707
+ const py=(svg._Y(p.n)/VH)*rect.height;
708
+ tip.style.left=px+'px';tip.style.top=(py-12)+'px';tip.style.opacity=1;
709
+ tip.innerHTML=`<span class="tipn">${fmt(p.n)} traces</span><div class="tipd">${fmtDate(p.t)}</div>`;
710
+ }
711
+ svg.addEventListener('mousemove',onMove);
712
+ svg.addEventListener('mouseleave',()=>tip.style.opacity=0);
713
+ document.getElementById('growthRange').addEventListener('click',e=>{
714
+ const b=e.target.closest('.rbtn');if(!b)return;
715
+ document.querySelectorAll('#growthRange .rbtn').forEach(x=>x.classList.remove('active'));
716
+ b.classList.add('active');currentRange=b.dataset.r;draw(currentRange);
717
+ });
718
+ const expRows=document.getElementById('expRows');
719
+ function renderEx(f){
720
+ if(!expRows)return;
721
+ expRows.innerHTML='';
722
+ const rows=TRACES.filter(r=>f==='all'||r.harness===f).slice(-10).reverse();
723
+ if(!rows.length){
724
+ const el=document.createElement('div');el.className='erow';
725
+ el.innerHTML=`<span class="desc">No donated traces here yet. Run <code>/donate-trace</code> on an open-source session to be among the first.</span>`;
726
+ expRows.appendChild(el);
727
+ return;
728
+ }
729
+ rows.forEach(r=>{
730
+ const el=document.createElement('div');el.className='erow';
731
+ el.innerHTML=`<span class="hash em-hide">tr:${shortName(r.file)}</span>
732
+ <span class="desc">${r.file}</span>
733
+ <span class="harness">${r.harness}</span>
734
+ <span class="when em-hide"><a class="js-dataset-row" href="${DATASET_URL}/blob/main/sessions/${r.harness}/${r.file}" target="_blank" rel="noopener">view ↗</a></span>`;
735
+ expRows.appendChild(el);
736
+ });
737
+ }
738
+ document.getElementById('expFilters').addEventListener('click',e=>{
739
+ const c=e.target.closest('.chip');if(!c)return;
740
+ document.querySelectorAll('#expFilters .chip').forEach(x=>x.classList.remove('active'));
741
+ c.classList.add('active');currentFilter=c.dataset.f;renderEx(currentFilter);
742
+ });
743
+ // initial render (empty/honest state) then hydrate from the live dataset
744
+ renderLedger();
745
+ draw(currentRange);
746
+ renderEx(currentFilter);
747
+ loadDataset();
748
+ </script>
749
+ </body>
750
+ </html>
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ uvicorn[standard]==0.34.0
3
+ huggingface_hub==0.27.1
4
+ python-multipart==0.0.20
scrub.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ scrub.py — deterministic anonymization pass for Trace Commons donations.
4
+
5
+ Removes the high-confidence, crisply-patterned leaks from a coding-agent
6
+ session before it is reviewed and donated:
7
+ - home-directory paths and the username embedded in them
8
+ - common secret formats (API keys, tokens, PEM blocks, JWTs, env assignments)
9
+ - email addresses
10
+
11
+ This is intentionally NOT the whole anonymization story. Fuzzy things
12
+ (personal names in prose, company names, internal codenames) are left to the
13
+ review pass that the skill performs afterwards. The split is deliberate:
14
+ code handles the patterns that have signatures; a human/LLM handles meaning.
15
+
16
+ The script walks the parsed JSON of each session line and rewrites string
17
+ values in place, so it works regardless of where in the structure a string
18
+ sits. It writes a cleaned file plus a JSON report of every redaction.
19
+
20
+ Usage:
21
+ python scrub.py --in session.jsonl --harness claude_code \
22
+ --out cleaned.jsonl --report report.json
23
+ """
24
+
25
+ import argparse
26
+ import json
27
+ import re
28
+ import sys
29
+ from collections import Counter
30
+
31
+ # --- redaction patterns -----------------------------------------------------
32
+ # Order matters: more specific patterns run before more general ones.
33
+
34
+ HOME_PATH = re.compile(r'(/(?:Users|home))/([^/\s"\'\\]+)')
35
+ # Windows user paths too
36
+ WIN_PATH = re.compile(r'([A-Za-z]:\\Users\\)([^\\\s"\']+)', re.IGNORECASE)
37
+
38
+ EMAIL = re.compile(r'\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b')
39
+
40
+ # Secrets — each tuple is (name, compiled regex). Keep these conservative
41
+ # enough to avoid mangling ordinary prose but broad enough to catch real keys.
42
+ SECRET_PATTERNS = [
43
+ ("aws_access_key", re.compile(r'\bAKIA[0-9A-Z]{16}\b')),
44
+ ("aws_secret", re.compile(r'\b(?i:aws_secret_access_key)\s*[=:]\s*["\']?[A-Za-z0-9/+=]{40}["\']?')),
45
+ ("github_token", re.compile(r'\bgh[pousr]_[A-Za-z0-9]{36,}\b')),
46
+ ("openai_key", re.compile(r'\bsk-[A-Za-z0-9_\-]{20,}\b')),
47
+ ("anthropic_key", re.compile(r'\bsk-ant-[A-Za-z0-9_\-]{20,}\b')),
48
+ ("slack_token", re.compile(r'\bxox[baprs]-[A-Za-z0-9\-]{10,}\b')),
49
+ ("google_api_key", re.compile(r'\bAIza[0-9A-Za-z_\-]{35}\b')),
50
+ ("jwt", re.compile(r'\beyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\b')),
51
+ ("private_key_block", re.compile(r'-----BEGIN (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----.*?-----END (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----', re.DOTALL)),
52
+ ("bearer_token", re.compile(r'\b(?i:bearer)\s+[A-Za-z0-9_\-\.=]{20,}')),
53
+ ("connection_string", re.compile(r'\b(?:postgres|postgresql|mysql|mongodb(?:\+srv)?|redis|amqp)://[^\s"\'<>]+:[^\s"\'<>@]+@[^\s"\'<>]+')),
54
+ # generic KEY=secret env assignments where the value looks secret-ish
55
+ ("env_secret", re.compile(r'\b([A-Z][A-Z0-9_]*(?:KEY|TOKEN|SECRET|PASSWORD|PASSWD|PWD|CREDENTIAL|API)[A-Z0-9_]*)\s*=\s*["\']?([^\s"\']{8,})["\']?')),
56
+ ]
57
+
58
+
59
+ def redact_string(s, counts):
60
+ """Apply all redactions to a single string, tallying what was changed."""
61
+ if not isinstance(s, str) or not s:
62
+ return s
63
+
64
+ # Secrets first (before paths/emails, since some secrets contain those shapes)
65
+ for name, pat in SECRET_PATTERNS:
66
+ def _sub(m, _name=name):
67
+ counts[_name] += 1
68
+ if _name == "env_secret":
69
+ # keep the key name, redact the value
70
+ return f"{m.group(1)}=[REDACTED_SECRET]"
71
+ return "[REDACTED_SECRET]"
72
+ s = pat.sub(_sub, s)
73
+
74
+ # Home paths -> normalize the username segment
75
+ def _home(m):
76
+ counts["home_path"] += 1
77
+ return f"{m.group(1)}/USER"
78
+ s = HOME_PATH.sub(_home, s)
79
+
80
+ def _win(m):
81
+ counts["home_path"] += 1
82
+ return f"{m.group(1)}USER"
83
+ s = WIN_PATH.sub(_win, s)
84
+
85
+ # Emails
86
+ def _email(m):
87
+ counts["email"] += 1
88
+ return "[REDACTED_EMAIL]"
89
+ s = EMAIL.sub(_email, s)
90
+
91
+ return s
92
+
93
+
94
+ def walk(obj, counts):
95
+ """Recursively rewrite all string values in a parsed JSON structure."""
96
+ if isinstance(obj, str):
97
+ return redact_string(obj, counts)
98
+ if isinstance(obj, list):
99
+ return [walk(x, counts) for x in obj]
100
+ if isinstance(obj, dict):
101
+ return {k: walk(v, counts) for k, v in obj.items()}
102
+ return obj
103
+
104
+
105
+ def scrub_text(raw, harness):
106
+ """Scrub a raw session string. Returns (cleaned_text, report_dict).
107
+
108
+ Importable so the server can run the exact same detection as the skill,
109
+ as a backstop. Mirrors the file-based main() below.
110
+ """
111
+ counts = Counter()
112
+ lines_in = 0
113
+ lines_out = []
114
+
115
+ stripped = raw.strip()
116
+ is_single_doc = stripped.startswith("{") and stripped.count("\n") > 0 and not _looks_like_jsonl(stripped)
117
+
118
+ if is_single_doc:
119
+ try:
120
+ doc = json.loads(stripped)
121
+ cleaned = walk(doc, counts)
122
+ lines_out.append(json.dumps(cleaned, ensure_ascii=False))
123
+ lines_in = 1
124
+ except json.JSONDecodeError:
125
+ is_single_doc = False
126
+
127
+ if not is_single_doc:
128
+ for line in raw.splitlines():
129
+ line = line.strip()
130
+ if not line:
131
+ continue
132
+ lines_in += 1
133
+ try:
134
+ obj = json.loads(line)
135
+ except json.JSONDecodeError:
136
+ lines_out.append(redact_string(line, counts))
137
+ continue
138
+ cleaned = walk(obj, counts)
139
+ lines_out.append(json.dumps(cleaned, ensure_ascii=False))
140
+
141
+ report = {
142
+ "harness": harness,
143
+ "lines_processed": lines_in,
144
+ "redactions": dict(counts),
145
+ "total_redactions": sum(counts.values()),
146
+ }
147
+ return "\n".join(lines_out) + "\n", report
148
+
149
+
150
+ def main():
151
+ ap = argparse.ArgumentParser()
152
+ ap.add_argument("--in", dest="inp", required=True)
153
+ ap.add_argument("--harness", required=True)
154
+ ap.add_argument("--out", required=True)
155
+ ap.add_argument("--report", required=True)
156
+ args = ap.parse_args()
157
+
158
+ with open(args.inp, "r", encoding="utf-8", errors="replace") as f:
159
+ raw = f.read()
160
+
161
+ cleaned_text, report = scrub_text(raw, args.harness)
162
+ counts = Counter(report["redactions"])
163
+ lines_in = report["lines_processed"]
164
+
165
+ with open(args.out, "w", encoding="utf-8") as f:
166
+ f.write(cleaned_text)
167
+
168
+ with open(args.report, "w", encoding="utf-8") as f:
169
+ json.dump(report, f, indent=2)
170
+
171
+ # Human-readable summary to stdout for the skill to relay
172
+ print(f"Scrubbed {lines_in} lines from {args.harness} session.")
173
+ if counts:
174
+ for k, v in counts.most_common():
175
+ print(f" {v}× {k}")
176
+ else:
177
+ print(" No high-confidence secrets or paths found by the automated pass.")
178
+ print(f"\nCleaned file: {args.out}")
179
+ print(f"Report: {args.report}")
180
+ print("\nThis is the automated pass only. Now do the review pass for names,")
181
+ print("company names, and internal references before showing the user.")
182
+
183
+
184
+ def _looks_like_jsonl(text):
185
+ """Heuristic: if the first two non-empty lines each parse as JSON, it's JSONL."""
186
+ parsed = 0
187
+ for line in text.splitlines():
188
+ line = line.strip()
189
+ if not line:
190
+ continue
191
+ try:
192
+ json.loads(line)
193
+ parsed += 1
194
+ except json.JSONDecodeError:
195
+ return False
196
+ if parsed >= 2:
197
+ return True
198
+ return False
199
+
200
+
201
+ if __name__ == "__main__":
202
+ main()