| """InferenceClient calls: map (per-session digests) + reduce (bulletin).""" |
|
|
| import datetime as dt |
| import hashlib |
| import json |
| import os |
| from concurrent.futures import ThreadPoolExecutor |
|
|
| from huggingface_hub import InferenceClient |
|
|
| from extract import event_role, event_tool_names |
|
|
| MODEL = "Qwen/Qwen3.5-9B" |
|
|
| _NO_THINK = {"chat_template_kwargs": {"enable_thinking": False}} |
|
|
|
|
| def get_client(token: str | None = None) -> InferenceClient: |
| """Build the InferenceClient. Centralised so OAuth swap is one place.""" |
| if token is None: |
| token = os.environ.get("HF_TOKEN") |
| if not token: |
| raise RuntimeError( |
| "HF_TOKEN is not set. Export it in your shell or pass token= explicitly." |
| ) |
| return InferenceClient(model=MODEL, token=token) |
|
|
|
|
| |
|
|
| _DIGEST_SYSTEM = """You are analysing a single coding-agent session transcript. The TRANSCRIPT shows messages between a HUMAN USER and an AGENT (the AI). Return signals about the HUMAN USER only — never about the agent. |
| |
| Return STRICT JSON: |
| { |
| "session_id": <echo>, |
| "intent": "<one sentence: what the user was trying to do>", |
| "top_quotes": [<1-3 short verbatim quotes from USER messages only>], |
| "tells": [<3-5 short strings: signals about the user — frustration, confidence, knowledge gaps, communication style, premature optimization, doc-avoidance, etc.>], |
| "mood": "<one short phrase: the session's emotional arc>" |
| } |
| |
| Hard rules: |
| - Only include things the user actually said or did. Do not attribute agent behaviour to the user. |
| - top_quotes must literally appear in user messages. |
| - Be concise and specific. No invented quotes.""" |
|
|
|
|
| def digest_session(client: InferenceClient, transcript: str, session_id: str) -> dict: |
| user_prompt = f"session_id: {session_id}\n\nTranscript:\n{transcript}" |
| try: |
| resp = client.chat_completion( |
| messages=[ |
| {"role": "system", "content": _DIGEST_SYSTEM}, |
| {"role": "user", "content": user_prompt}, |
| ], |
| response_format={"type": "json_object"}, |
| max_tokens=800, |
| temperature=0, |
| extra_body=_NO_THINK, |
| ) |
| raw = resp.choices[0].message.content or "{}" |
| data = json.loads(raw) |
| data.setdefault("session_id", session_id) |
| return data |
| except Exception as e: |
| return {"session_id": session_id, "error": str(e)} |
|
|
|
|
| def digest_all( |
| client: InferenceClient, |
| transcripts: list[tuple[str, str]], |
| max_workers: int = 8, |
| ) -> list[dict]: |
| """Run digest_session over all transcripts in parallel. Drops error entries.""" |
| def _one(item): |
| sid, text = item |
| return digest_session(client, text, sid) |
|
|
| with ThreadPoolExecutor(max_workers=max_workers) as ex: |
| results = list(ex.map(_one, transcripts)) |
| return [r for r in results if "error" not in r] |
|
|
|
|
| |
|
|
|
|
| def _parse_ts(ts: str) -> dt.datetime | None: |
| try: |
| return dt.datetime.fromisoformat(ts.replace("Z", "+00:00")) |
| except Exception: |
| return None |
|
|
|
|
| def compute_stats(sessions: list[tuple[str, list[dict]]]) -> dict: |
| """Count user turns, distinct tool names, and the first→last timestamp span. |
| Format-agnostic (Claude-Code style and pi-sessions style both handled).""" |
| turns = 0 |
| tools: set[str] = set() |
| timestamps: list[dt.datetime] = [] |
| for _path, events in sessions: |
| for ev in events: |
| if event_role(ev) == "user": |
| turns += 1 |
| ts = ev.get("timestamp") |
| if isinstance(ts, str): |
| parsed = _parse_ts(ts) |
| if parsed: |
| timestamps.append(parsed) |
| tools.update(event_tool_names(ev)) |
|
|
| span = "" |
| if timestamps: |
| timestamps.sort() |
| first, last = timestamps[0], timestamps[-1] |
| if first.year == last.year: |
| span = f"{first.strftime('%b %d')} → {last.strftime('%b %d, %Y')}" |
| else: |
| span = f"{first.strftime('%b %d, %Y')} → {last.strftime('%b %d, %Y')}" |
| return {"turns": turns, "tools": len(tools), "span": span} |
|
|
|
|
| def serial_for(user: str) -> str: |
| """Stable per-user 4-digit serial.""" |
| h = int(hashlib.sha256(user.encode("utf-8")).hexdigest(), 16) |
| return f"PR-{h % 10000:04d}" |
|
|
|
|
| |
|
|
| |
| _BULLETIN_SYSTEM = """You are the Hugging Face Roastery. You read agent-trace dataset digests and write a gently savage personality bulletin about the HUMAN USER who was prompting the agent — never about the agent itself. The output is a vintage printed card; every field has a strict length budget. Be specific, be funny, never punch down. |
| |
| You will receive: |
| - user: the Hugging Face handle of the operator. |
| - dataset: the Hub dataset ID being analysed. |
| - digests: a JSON list of per-session digests already extracted from the traces (intent, top_quotes, tells, mood). |
| |
| Return EXACTLY one JSON object, no prose, no markdown: |
| { |
| "user": "<bare handle, no @>", |
| "archetype": ["The <adjective>", "<Noun>"], |
| "tagline": "<130-170 chars, 2-3 italic lines, sentences only, end on a punchline>", |
| "sins": [ |
| {"n":"01","title":"<50-90 chars: one concrete user behaviour, sentence case, no quotes>","meta":"<30-110 chars: a VERBATIM quote from a top_quotes entry — raw text only, NO surrounding quote marks (render adds them)>","source":"<the exact session_id of the digest the quote was taken from>"}, |
| {"n":"02","title":"...","meta":"...","source":"..."}, |
| {"n":"03","title":"...","meta":"...","source":"..."} |
| ], |
| "forecast": {"headline":"The week ahead","body":"<270-340 chars, horoscope-style, end with 'Lucky <x>: <y>. Avoid: <z>.'>"} |
| } |
| |
| Field budgets (hard limits — overflow breaks the layout): |
| - archetype[0]: 8-18 chars (line 1, usually "The <adjective>") |
| - archetype[1]: 6-14 chars (line 2, title-cased punch noun) |
| - tagline: 130-170 chars |
| - sins[].title: 50-90 chars |
| - sins[].meta: 30-110 chars (raw verbatim user quote, no surrounding quote marks) |
| - sins[].source: the session_id from the digest the quote came from |
| - forecast.body: 270-340 chars, ends with "Lucky <x>: <y>. Avoid: <z>." |
| |
| The sins array MUST contain exactly 3 objects. Do not emit fewer. |
| |
| Voice: |
| - Sharp but loving — group-chat energy, not insult-comic. Roast habits a thoughtful friend would call out. |
| - Sentence case for titles. Smart quotes ( " " ), en-dashes ( – ), em-dashes ( — ). No exclamation marks. No emojis. |
| - Specific, not generic. Every observation must be grounded in something the digests actually contain. |
| |
| Hard rules: |
| 1. Roast the USER, not the agent. The user cannot run code; only the agent can. Wrong: "Parsed JSON with a regex twice." (that's the agent). Right: "Asked the agent to parse JSON with a regex twice." / "Demanded a regex over a JSON parser, against advice." |
| 2. EVERY sins[].meta MUST be a verbatim top_quote from one of the digests. Emit the raw text only — NO surrounding quote marks (the renderer wraps it). No paraphrasing, no rewording, no analysis. Just the user's own words. If no top_quote fits a sin you've drafted, pick a different sin that does have a fitting quote. |
| 3. EVERY sins[].source MUST be the exact `session_id` value of the digest the quote came from. Copy it verbatim — do not shorten, rename, or invent. |
| 4. The title is the roast sentence (no quotes inside it); the meta below it is the receipt — the user's own words that prove the sin. Title and meta must be different content, not paraphrases of each other. |
| 5. No PII. No emails, no real names, no private repos. Public handles and public dataset names are fine. |
| 6. No identity punching. Roast process and habits — not who the user is. Off-limits: appearance, nationality, gender, politics, illness. Fair game: ignoring docs, refactor addiction, regex misuse, vibes-driven coding, asking the same thing six ways, premature optimisation, late-night commits. |
| |
| Procedure: |
| 1. Skim the digests for recurring patterns (repeated questions, premature optimisation, doc avoidance, tone, tool misuse, mood arc). |
| 2. Pick ONE crisp archetype. Examples: The Premature Optimizer · The Vibes Driver · The Doc Avoider · The Refactor Romantic · The Confidence Auditor · The Apology Engineer · The TODO Composer. Invent freely. |
| 3. Pick three sins the digests support. For each: write a roast sentence as the title, pick a verbatim top_quote that proves the sin and place it (raw, no quote marks) as the meta, and set source to that digest's session_id. |
| 4. Tagline: 2-3 short sentences piling on the archetype with concrete examples. End on a punchline. |
| 5. Horoscope: one absurd technical prediction grounded in a real user pattern. Close with "Lucky <something>: <x>. Avoid: <y>." |
| 6. Validate lengths against budgets. Trim or pad before emitting. |
| 7. Emit JSON only. No code fences. No commentary.""" |
|
|
|
|
| def bulletin( |
| client: InferenceClient, |
| digests: list[dict], |
| user: str, |
| dataset_id: str, |
| ) -> dict: |
| """Generate the report content (archetype, tagline, sins, forecast). One JSON call.""" |
| user_prompt = ( |
| f"user: {user}\n" |
| f"dataset: {dataset_id}\n\n" |
| f"digests (JSON list):\n{json.dumps(digests, ensure_ascii=False, indent=2)}\n\n" |
| "Reminder: emit EXACTLY 3 sins. Each sin needs `title` (the roast), " |
| "`meta` (a VERBATIM top_quote, raw text only — no surrounding quote " |
| "marks; the renderer wraps them), and `source` (the session_id of the " |
| "digest the quote was taken from, copied verbatim). " |
| "Tagline ≤170 chars; forecast.body ≤340 chars." |
| ) |
| resp = client.chat_completion( |
| messages=[ |
| {"role": "system", "content": _BULLETIN_SYSTEM}, |
| {"role": "user", "content": user_prompt}, |
| ], |
| response_format={"type": "json_object"}, |
| max_tokens=1500, |
| temperature=0, |
| extra_body=_NO_THINK, |
| ) |
| raw = resp.choices[0].message.content or "{}" |
| return json.loads(raw) |
|
|
|
|
| def build_report( |
| client: InferenceClient, |
| digests: list[dict], |
| user: str, |
| dataset_id: str, |
| stats: dict, |
| ) -> dict: |
| """Combine model output + computed stats into the full report dict for render.py.""" |
| data = bulletin(client, digests, user, dataset_id) |
| today = dt.date.today().strftime("%b %d, %Y") |
| archetype = data.get("archetype") or ["The", "Unreadable"] |
| if not isinstance(archetype, list) or len(archetype) < 2: |
| archetype = ["The", "Unreadable"] |
| sins = data.get("sins") or [] |
| sins = sins[:3] + [{"n": f"{i+1:02d}", "title": "—", "meta": "—", "source": ""} for i in range(len(sins), 3)] |
| forecast = data.get("forecast") or {"headline": "The week ahead", "body": "The cards are quiet today."} |
| return { |
| "user": str(data.get("user") or user), |
| "archetype": [str(archetype[0]), str(archetype[1])], |
| "tagline": str(data.get("tagline") or ""), |
| "sins": [ |
| { |
| "n": str(s.get("n") or f"{i+1:02d}"), |
| "title": str(s.get("title") or "—"), |
| "meta": str(s.get("meta") or "—"), |
| "source": str(s.get("source") or ""), |
| } |
| for i, s in enumerate(sins[:3]) |
| ], |
| "forecast": { |
| "headline": str(forecast.get("headline") or "The week ahead"), |
| "body": str(forecast.get("body") or ""), |
| }, |
| "dataset": dataset_id, |
| "turns": int(stats.get("turns") or 0), |
| "tools": int(stats.get("tools") or 0), |
| "span": str(stats.get("span") or ""), |
| "generated": today, |
| "serial": serial_for(user), |
| } |
|
|