File size: 11,784 Bytes
2d3963a
1252cb9
 
 
 
2d3963a
 
1252cb9
2d3963a
1252cb9
2f1a55a
 
1caf1df
1252cb9
2d3963a
11a0e99
 
2d3963a
 
 
 
 
 
 
 
 
1252cb9
 
 
 
 
 
2d3963a
1252cb9
 
 
 
 
 
 
 
 
 
 
2d3963a
1252cb9
 
2d3963a
1252cb9
2d3963a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1252cb9
 
 
 
 
 
 
 
 
 
 
 
 
2f1a55a
 
1252cb9
 
 
 
 
2f1a55a
1252cb9
 
 
 
 
 
2f1a55a
1252cb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d3963a
1252cb9
 
 
 
 
 
 
 
 
 
 
 
 
fabd7ab
 
 
1252cb9
 
 
 
 
 
 
 
 
fabd7ab
 
1252cb9
 
2d3963a
 
1252cb9
 
 
 
 
 
 
fabd7ab
 
 
 
 
1252cb9
 
 
 
fabd7ab
1252cb9
 
 
 
 
 
2d3963a
 
 
 
 
 
 
1252cb9
 
 
8844088
fabd7ab
 
 
 
 
1252cb9
2d3963a
 
 
 
 
 
 
 
 
 
 
 
1252cb9
 
 
2d3963a
1252cb9
 
 
 
 
 
2d3963a
1252cb9
 
 
 
 
fabd7ab
1252cb9
 
 
 
 
 
fabd7ab
 
 
 
 
 
1252cb9
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""InferenceClient calls: map (per-session digests) + reduce (bulletin)."""

import datetime as dt
import hashlib
import json
import os
from concurrent.futures import ThreadPoolExecutor

from huggingface_hub import InferenceClient

from extract import event_role, event_tool_names

MODEL = "Qwen/Qwen3.5-9B"

_NO_THINK = {"chat_template_kwargs": {"enable_thinking": False}}


def get_client(token: str | None = None) -> InferenceClient:
    """Build the InferenceClient. Centralised so OAuth swap is one place."""
    if token is None:
        token = os.environ.get("HF_TOKEN")
    if not token:
        raise RuntimeError(
            "HF_TOKEN is not set. Export it in your shell or pass token= explicitly."
        )
    return InferenceClient(model=MODEL, token=token)


# ---------- map: per-session digest ----------

_DIGEST_SYSTEM = """You are analysing a single coding-agent session transcript. The TRANSCRIPT shows messages between a HUMAN USER and an AGENT (the AI). Return signals about the HUMAN USER only — never about the agent.

Return STRICT JSON:
{
  "session_id": <echo>,
  "intent": "<one sentence: what the user was trying to do>",
  "top_quotes": [<1-3 short verbatim quotes from USER messages only>],
  "tells": [<3-5 short strings: signals about the user — frustration, confidence, knowledge gaps, communication style, premature optimization, doc-avoidance, etc.>],
  "mood": "<one short phrase: the session's emotional arc>"
}

Hard rules:
- Only include things the user actually said or did. Do not attribute agent behaviour to the user.
- top_quotes must literally appear in user messages.
- Be concise and specific. No invented quotes."""


def digest_session(client: InferenceClient, transcript: str, session_id: str) -> dict:
    user_prompt = f"session_id: {session_id}\n\nTranscript:\n{transcript}"
    try:
        resp = client.chat_completion(
            messages=[
                {"role": "system", "content": _DIGEST_SYSTEM},
                {"role": "user", "content": user_prompt},
            ],
            response_format={"type": "json_object"},
            max_tokens=800,
            temperature=0,
            extra_body=_NO_THINK,
        )
        raw = resp.choices[0].message.content or "{}"
        data = json.loads(raw)
        data.setdefault("session_id", session_id)
        return data
    except Exception as e:
        return {"session_id": session_id, "error": str(e)}


def digest_all(
    client: InferenceClient,
    transcripts: list[tuple[str, str]],
    max_workers: int = 8,
) -> list[dict]:
    """Run digest_session over all transcripts in parallel. Drops error entries."""
    def _one(item):
        sid, text = item
        return digest_session(client, text, sid)

    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        results = list(ex.map(_one, transcripts))
    return [r for r in results if "error" not in r]


# ---------- stats from raw events ----------


def _parse_ts(ts: str) -> dt.datetime | None:
    try:
        return dt.datetime.fromisoformat(ts.replace("Z", "+00:00"))
    except Exception:
        return None


def compute_stats(sessions: list[tuple[str, list[dict]]]) -> dict:
    """Count user turns, distinct tool names, and the first→last timestamp span.
    Format-agnostic (Claude-Code style and pi-sessions style both handled)."""
    turns = 0
    tools: set[str] = set()
    timestamps: list[dt.datetime] = []
    for _path, events in sessions:
        for ev in events:
            if event_role(ev) == "user":
                turns += 1
            ts = ev.get("timestamp")
            if isinstance(ts, str):
                parsed = _parse_ts(ts)
                if parsed:
                    timestamps.append(parsed)
            tools.update(event_tool_names(ev))

    span = ""
    if timestamps:
        timestamps.sort()
        first, last = timestamps[0], timestamps[-1]
        if first.year == last.year:
            span = f"{first.strftime('%b %d')}{last.strftime('%b %d, %Y')}"
        else:
            span = f"{first.strftime('%b %d, %Y')}{last.strftime('%b %d, %Y')}"
    return {"turns": turns, "tools": len(tools), "span": span}


def serial_for(user: str) -> str:
    """Stable per-user 4-digit serial."""
    h = int(hashlib.sha256(user.encode("utf-8")).hexdigest(), 16)
    return f"PR-{h % 10000:04d}"


# ---------- reduce: bulletin generation ----------

# Adapted from the design handoff's CONTENT_PROMPT.md.
_BULLETIN_SYSTEM = """You are the Hugging Face Roastery. You read agent-trace dataset digests and write a gently savage personality bulletin about the HUMAN USER who was prompting the agent — never about the agent itself. The output is a vintage printed card; every field has a strict length budget. Be specific, be funny, never punch down.

You will receive:
- user: the Hugging Face handle of the operator.
- dataset: the Hub dataset ID being analysed.
- digests: a JSON list of per-session digests already extracted from the traces (intent, top_quotes, tells, mood).

Return EXACTLY one JSON object, no prose, no markdown:
{
  "user":      "<bare handle, no @>",
  "archetype": ["The <adjective>", "<Noun>"],
  "tagline":   "<130-170 chars, 2-3 italic lines, sentences only, end on a punchline>",
  "sins": [
    {"n":"01","title":"<50-90 chars: one concrete user behaviour, sentence case, no quotes>","meta":"<30-110 chars: a VERBATIM quote from a top_quotes entry — raw text only, NO surrounding quote marks (render adds them)>","source":"<the exact session_id of the digest the quote was taken from>"},
    {"n":"02","title":"...","meta":"...","source":"..."},
    {"n":"03","title":"...","meta":"...","source":"..."}
  ],
  "forecast": {"headline":"The week ahead","body":"<270-340 chars, horoscope-style, end with 'Lucky <x>: <y>. Avoid: <z>.'>"}
}

Field budgets (hard limits — overflow breaks the layout):
- archetype[0]:  8-18 chars  (line 1, usually "The <adjective>")
- archetype[1]:  6-14 chars  (line 2, title-cased punch noun)
- tagline:       130-170 chars
- sins[].title:  50-90 chars
- sins[].meta:   30-110 chars  (raw verbatim user quote, no surrounding quote marks)
- sins[].source: the session_id from the digest the quote came from
- forecast.body: 270-340 chars, ends with "Lucky <x>: <y>. Avoid: <z>."

The sins array MUST contain exactly 3 objects. Do not emit fewer.

Voice:
- Sharp but loving — group-chat energy, not insult-comic. Roast habits a thoughtful friend would call out.
- Sentence case for titles. Smart quotes ( " " ), en-dashes ( – ), em-dashes ( — ). No exclamation marks. No emojis.
- Specific, not generic. Every observation must be grounded in something the digests actually contain.

Hard rules:
1. Roast the USER, not the agent. The user cannot run code; only the agent can. Wrong: "Parsed JSON with a regex twice." (that's the agent). Right: "Asked the agent to parse JSON with a regex twice." / "Demanded a regex over a JSON parser, against advice."
2. EVERY sins[].meta MUST be a verbatim top_quote from one of the digests. Emit the raw text only — NO surrounding quote marks (the renderer wraps it). No paraphrasing, no rewording, no analysis. Just the user's own words. If no top_quote fits a sin you've drafted, pick a different sin that does have a fitting quote.
3. EVERY sins[].source MUST be the exact `session_id` value of the digest the quote came from. Copy it verbatim — do not shorten, rename, or invent.
4. The title is the roast sentence (no quotes inside it); the meta below it is the receipt — the user's own words that prove the sin. Title and meta must be different content, not paraphrases of each other.
5. No PII. No emails, no real names, no private repos. Public handles and public dataset names are fine.
6. No identity punching. Roast process and habits — not who the user is. Off-limits: appearance, nationality, gender, politics, illness. Fair game: ignoring docs, refactor addiction, regex misuse, vibes-driven coding, asking the same thing six ways, premature optimisation, late-night commits.

Procedure:
1. Skim the digests for recurring patterns (repeated questions, premature optimisation, doc avoidance, tone, tool misuse, mood arc).
2. Pick ONE crisp archetype. Examples: The Premature Optimizer · The Vibes Driver · The Doc Avoider · The Refactor Romantic · The Confidence Auditor · The Apology Engineer · The TODO Composer. Invent freely.
3. Pick three sins the digests support. For each: write a roast sentence as the title, pick a verbatim top_quote that proves the sin and place it (raw, no quote marks) as the meta, and set source to that digest's session_id.
4. Tagline: 2-3 short sentences piling on the archetype with concrete examples. End on a punchline.
5. Horoscope: one absurd technical prediction grounded in a real user pattern. Close with "Lucky <something>: <x>. Avoid: <y>."
6. Validate lengths against budgets. Trim or pad before emitting.
7. Emit JSON only. No code fences. No commentary."""


def bulletin(
    client: InferenceClient,
    digests: list[dict],
    user: str,
    dataset_id: str,
) -> dict:
    """Generate the report content (archetype, tagline, sins, forecast). One JSON call."""
    user_prompt = (
        f"user: {user}\n"
        f"dataset: {dataset_id}\n\n"
        f"digests (JSON list):\n{json.dumps(digests, ensure_ascii=False, indent=2)}\n\n"
        "Reminder: emit EXACTLY 3 sins. Each sin needs `title` (the roast), "
        "`meta` (a VERBATIM top_quote, raw text only — no surrounding quote "
        "marks; the renderer wraps them), and `source` (the session_id of the "
        "digest the quote was taken from, copied verbatim). "
        "Tagline ≤170 chars; forecast.body ≤340 chars."
    )
    resp = client.chat_completion(
        messages=[
            {"role": "system", "content": _BULLETIN_SYSTEM},
            {"role": "user", "content": user_prompt},
        ],
        response_format={"type": "json_object"},
        max_tokens=1500,
        temperature=0,
        extra_body=_NO_THINK,
    )
    raw = resp.choices[0].message.content or "{}"
    return json.loads(raw)


def build_report(
    client: InferenceClient,
    digests: list[dict],
    user: str,
    dataset_id: str,
    stats: dict,
) -> dict:
    """Combine model output + computed stats into the full report dict for render.py."""
    data = bulletin(client, digests, user, dataset_id)
    today = dt.date.today().strftime("%b %d, %Y")
    archetype = data.get("archetype") or ["The", "Unreadable"]
    if not isinstance(archetype, list) or len(archetype) < 2:
        archetype = ["The", "Unreadable"]
    sins = data.get("sins") or []
    sins = sins[:3] + [{"n": f"{i+1:02d}", "title": "—", "meta": "—", "source": ""} for i in range(len(sins), 3)]
    forecast = data.get("forecast") or {"headline": "The week ahead", "body": "The cards are quiet today."}
    return {
        "user": str(data.get("user") or user),
        "archetype": [str(archetype[0]), str(archetype[1])],
        "tagline": str(data.get("tagline") or ""),
        "sins": [
            {
                "n": str(s.get("n") or f"{i+1:02d}"),
                "title": str(s.get("title") or "—"),
                "meta": str(s.get("meta") or "—"),
                "source": str(s.get("source") or ""),
            }
            for i, s in enumerate(sins[:3])
        ],
        "forecast": {
            "headline": str(forecast.get("headline") or "The week ahead"),
            "body": str(forecast.get("body") or ""),
        },
        "dataset": dataset_id,
        "turns": int(stats.get("turns") or 0),
        "tools": int(stats.get("tools") or 0),
        "span": str(stats.get("span") or ""),
        "generated": today,
        "serial": serial_for(user),
    }