File size: 3,822 Bytes
4c3cbe1 1252cb9 2d3963a 4c3cbe1 1252cb9 4c3cbe1 1252cb9 2d3963a 1252cb9 4448d5a 1252cb9 4c3cbe1 1252cb9 4c3cbe1 c8d596f ab999e5 4c3cbe1 1252cb9 2d3963a 1252cb9 2d3963a 1252cb9 2d3963a 1252cb9 2d3963a 1252cb9 4c3cbe1 1252cb9 4c3cbe1 1252cb9 2d3963a 4c3cbe1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | """Gradio Server app: custom HTML frontend + Gradio-managed API endpoint.
The input UI lives in `index.html` and talks to the `generate_bulletin`
endpoint below via `@gradio/client`. Report generation logic is unchanged
from the original Blocks app.
"""
import os
from pathlib import Path
from fastapi.responses import HTMLResponse
from gradio import Server
from analyze import build_report, compute_stats, digest_all, get_client
from dataset import fetch_sessions, list_sessions
from extract import events_to_transcript, truncate_transcript
from render import bulletin_html, empty_bulletin_html
DEFAULT_REPO = "julien-c/pi-sessions"
app = Server()
_INDEX = Path(__file__).parent / "index.html"
def _owner_from(repo_id: str) -> str:
return repo_id.split("/")[0] if "/" in repo_id else repo_id
@app.api(name="generate_bulletin", concurrency_limit=1)
def generate_bulletin(
repo_id: str, max_sessions: int
) -> tuple[str, str]:
"""Streams (status, html) updates; final tick carries the bulletin HTML."""
yield "Connecting…", empty_bulletin_html("Connecting…")
try:
client = get_client()
except Exception as e:
yield f"❌ {e}", empty_bulletin_html("HF_TOKEN missing")
return
try:
yield "Listing sessions…", empty_bulletin_html("Listing sessions…")
paths = list_sessions(repo_id)
if not paths:
yield (
"No sessions found in `sessions/**/*.jsonl`.",
empty_bulletin_html("No sessions to roast."),
)
return
n = min(int(max_sessions), len(paths))
yield (
f"Fetching {n} of {len(paths)} sessions…",
empty_bulletin_html(f"Fetching {n} sessions…"),
)
sessions = fetch_sessions(repo_id, n)
if not sessions:
yield (
"Found session files but couldn't parse any.",
empty_bulletin_html("Parse error."),
)
return
stats = compute_stats(sessions)
transcripts = [
(path, truncate_transcript(events_to_transcript(evs), 40_000))
for path, evs in sessions
]
yield (
f"Reading {len(transcripts)} sessions in parallel…",
empty_bulletin_html("Consulting the traces…"),
)
digests = digest_all(client, transcripts)
if not digests:
yield (
"Every per-session digest failed. Try again or lower max sessions.",
empty_bulletin_html("Digest error."),
)
return
yield (
f"Drafting bulletin from {len(digests)} digests…",
empty_bulletin_html("Drafting bulletin…"),
)
owner = _owner_from(repo_id)
try:
report = build_report(
client=client,
digests=digests,
user=owner,
dataset_id=repo_id,
stats=stats,
)
except Exception as e:
yield (
f"❌ Bulletin generation failed: {e}",
empty_bulletin_html("The presses jammed."),
)
return
yield (
f"Bulletin issued for @{report['user']} — {report['archetype'][0]} {report['archetype'][1]}.",
bulletin_html(report),
)
except Exception as e:
yield (
f"❌ {type(e).__name__}: {e}",
empty_bulletin_html("Error."),
)
@app.get("/", response_class=HTMLResponse)
async def homepage():
return _INDEX.read_text(encoding="utf-8")
if __name__ == "__main__":
if not os.environ.get("HF_TOKEN"):
print("warning: HF_TOKEN not set; the app will error on the first click.")
app.launch(show_error=True)
|