File size: 3,822 Bytes
4c3cbe1
 
 
 
 
 
1252cb9
2d3963a
4c3cbe1
1252cb9
4c3cbe1
 
1252cb9
2d3963a
1252cb9
 
 
 
4448d5a
1252cb9
4c3cbe1
 
 
 
1252cb9
 
 
 
 
4c3cbe1
c8d596f
 
ab999e5
4c3cbe1
 
1252cb9
 
2d3963a
 
 
 
 
 
1252cb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d3963a
1252cb9
 
2d3963a
1252cb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d3963a
1252cb9
 
 
 
 
 
 
 
 
 
 
 
 
4c3cbe1
1252cb9
 
 
 
 
 
 
 
 
4c3cbe1
 
 
1252cb9
 
 
2d3963a
 
4c3cbe1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""Gradio Server app: custom HTML frontend + Gradio-managed API endpoint.

The input UI lives in `index.html` and talks to the `generate_bulletin`
endpoint below via `@gradio/client`. Report generation logic is unchanged
from the original Blocks app.
"""

import os
from pathlib import Path

from fastapi.responses import HTMLResponse
from gradio import Server

from analyze import build_report, compute_stats, digest_all, get_client
from dataset import fetch_sessions, list_sessions
from extract import events_to_transcript, truncate_transcript
from render import bulletin_html, empty_bulletin_html

DEFAULT_REPO = "julien-c/pi-sessions"

app = Server()

_INDEX = Path(__file__).parent / "index.html"


def _owner_from(repo_id: str) -> str:
    return repo_id.split("/")[0] if "/" in repo_id else repo_id


@app.api(name="generate_bulletin", concurrency_limit=1)
def generate_bulletin(
    repo_id: str, max_sessions: int
) -> tuple[str, str]:
    """Streams (status, html) updates; final tick carries the bulletin HTML."""

    yield "Connecting…", empty_bulletin_html("Connecting…")

    try:
        client = get_client()
    except Exception as e:
        yield f"❌ {e}", empty_bulletin_html("HF_TOKEN missing")
        return

    try:
        yield "Listing sessions…", empty_bulletin_html("Listing sessions…")
        paths = list_sessions(repo_id)
        if not paths:
            yield (
                "No sessions found in `sessions/**/*.jsonl`.",
                empty_bulletin_html("No sessions to roast."),
            )
            return

        n = min(int(max_sessions), len(paths))
        yield (
            f"Fetching {n} of {len(paths)} sessions…",
            empty_bulletin_html(f"Fetching {n} sessions…"),
        )
        sessions = fetch_sessions(repo_id, n)
        if not sessions:
            yield (
                "Found session files but couldn't parse any.",
                empty_bulletin_html("Parse error."),
            )
            return

        stats = compute_stats(sessions)
        transcripts = [
            (path, truncate_transcript(events_to_transcript(evs), 40_000))
            for path, evs in sessions
        ]

        yield (
            f"Reading {len(transcripts)} sessions in parallel…",
            empty_bulletin_html("Consulting the traces…"),
        )
        digests = digest_all(client, transcripts)
        if not digests:
            yield (
                "Every per-session digest failed. Try again or lower max sessions.",
                empty_bulletin_html("Digest error."),
            )
            return

        yield (
            f"Drafting bulletin from {len(digests)} digests…",
            empty_bulletin_html("Drafting bulletin…"),
        )

        owner = _owner_from(repo_id)
        try:
            report = build_report(
                client=client,
                digests=digests,
                user=owner,
                dataset_id=repo_id,
                stats=stats,
            )
        except Exception as e:
            yield (
                f"❌ Bulletin generation failed: {e}",
                empty_bulletin_html("The presses jammed."),
            )
            return

        yield (
            f"Bulletin issued for @{report['user']}{report['archetype'][0]} {report['archetype'][1]}.",
            bulletin_html(report),
        )
    except Exception as e:
        yield (
            f"❌ {type(e).__name__}: {e}",
            empty_bulletin_html("Error."),
        )


@app.get("/", response_class=HTMLResponse)
async def homepage():
    return _INDEX.read_text(encoding="utf-8")


if __name__ == "__main__":
    if not os.environ.get("HF_TOKEN"):
        print("warning: HF_TOKEN not set; the app will error on the first click.")
    app.launch(show_error=True)