File size: 17,242 Bytes
7563305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847a58e
7563305
 
 
 
 
1eec565
8182c0f
9124577
7563305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc47444
 
 
 
 
9124577
dc47444
 
 
 
 
 
 
7563305
 
dc47444
 
 
 
 
 
 
 
 
 
 
 
b722e62
 
 
 
 
 
dc47444
 
 
 
 
 
 
 
 
 
9124577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8182c0f
 
 
 
 
4e70816
8182c0f
7563305
 
4e70816
 
 
 
 
 
 
 
 
 
 
7563305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02c841f
 
 
 
 
7563305
 
 
 
 
 
 
 
 
 
 
02c841f
7563305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
058157a
 
 
7563305
 
 
 
 
 
dc47444
 
 
 
 
7563305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
058157a
 
 
 
 
 
 
 
 
 
7563305
 
 
 
 
058157a
 
 
 
 
 
 
 
 
 
 
 
 
7563305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9124577
 
 
 
 
 
 
7563305
 
 
 
 
efca112
847a58e
 
 
4d74f1c
847a58e
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
"""
Recall — custom frontend server (NAH-36).

Serves the polished `Recall.dc.html` design (frontend/index.html) and exposes a
thin JSON API over the EXISTING backend. The learning/content logic and the
`schema.py` data contract are treated as an API and are NOT modified here.

The Session dict lives server-side, keyed by a short id the client carries
around (mirrors the single-session gr.State model the Gradio app uses) — so the
reference answers in the deck never leave the server.

Run it (stub mode is on by default):

    pip install -r requirements.txt
    python server.py            # http://127.0.0.1:7860

Flip RECALL_STUB=0 once the real model is wired:

    RECALL_STUB=0 python server.py

The legacy Gradio UI is still mounted at /gradio.
"""
from __future__ import annotations

import os
import tempfile
import threading
import time
import uuid
from collections import OrderedDict
from pathlib import Path

import gradio as gr
from fastapi import File, Form, UploadFile
from fastapi.responses import FileResponse, JSONResponse, Response
from pydantic import BaseModel
from starlette.concurrency import run_in_threadpool

import content_pipeline as cp
import learning_engine as le

FRONTEND = Path(__file__).parent / "frontend"

# Single-process session store. Bounded so a public Space can't be OOM'd by a
# client looping /api/generate: sessions are evicted least-recently-used past
# MAX_SESSIONS and expire after SESSION_TTL_SECONDS of inactivity.
MAX_SESSIONS = int(os.getenv("RECALL_MAX_SESSIONS", "500"))
SESSION_TTL_SECONDS = int(os.getenv("RECALL_SESSION_TTL", str(2 * 60 * 60)))  # 2h
# Caps on input size so a single request can't exhaust memory/disk.
MAX_UPLOAD_BYTES = int(os.getenv("RECALL_MAX_UPLOAD_MB", "10")) * 1024 * 1024
MAX_TEXT_CHARS = int(os.getenv("RECALL_MAX_TEXT_CHARS", "200000"))  # ~50k tokens

# sid -> (session, last_access_epoch). OrderedDict gives O(1) LRU eviction.
SESSIONS: "OrderedDict[str, tuple[dict, float]]" = OrderedDict()


def _purge_expired(now: float | None = None) -> None:
    now = time.time() if now is None else now
    stale = [sid for sid, (_, ts) in SESSIONS.items()
             if now - ts > SESSION_TTL_SECONDS]
    for sid in stale:
        SESSIONS.pop(sid, None)


def get_session(sid: str) -> dict | None:
    """Fetch a live session and mark it most-recently-used, or None if it's
    unknown/expired (callers already return a friendly 'session expired')."""
    _purge_expired()
    entry = SESSIONS.get(sid)
    if entry is None:
        return None
    session, _ = entry
    SESSIONS[sid] = (session, time.time())
    SESSIONS.move_to_end(sid)
    return session


def put_session(sid: str, session: dict) -> None:
    """Store/refresh a session, evicting the least-recently-used past the cap."""
    _purge_expired()
    SESSIONS[sid] = (session, time.time())
    SESSIONS.move_to_end(sid)
    while len(SESSIONS) > MAX_SESSIONS:
        SESSIONS.popitem(last=False)

# The photosynthesis notes the design's "sample" affordances load. Lets the
# Upload screen's sample chip work even with no real PDF on disk.
SAMPLE_NOTES = (
    "Photosynthesis happens in the chloroplast. The light-dependent reactions "
    "occur in the thylakoid membranes, where water is split, ATP and NADPH are "
    "produced, and oxygen is released. The Calvin cycle takes place in the "
    "stroma, where the enzyme RuBisCO fixes CO2 onto RuBP. Cellular respiration "
    "occurs in the mitochondria; most ATP is made during oxidative "
    "phosphorylation, as the electron transport chain pumps protons and oxygen "
    "acts as the final electron acceptor, forming water."
)

# The "scanned-slides.pdf" sample renders a slide to an IMAGE (no text layer) so
# the chip demonstrates the real image-PDF OCR path end-to-end: the vision model
# reads the rendered page and writes a deck from it (image PDFs are now supported,
# so the old "image-only = error" copy no longer applies).
SCAN_SAMPLE_LINES = (
    "Photosynthesis - Lecture 3",
    "",
    "Occurs in the chloroplast.",
    "Light-dependent reactions happen in the thylakoid",
    "membranes: water is split, ATP and NADPH are made,",
    "and oxygen is released.",
    "The Calvin cycle runs in the stroma, where RuBisCO",
    "fixes CO2 onto RuBP to produce glucose.",
)


def _render_scan_sample():
    """A synthetic 'scanned slide' page image (text rasterized, no text layer) so
    the sample chip shows the vision/OCR path with one click."""
    from PIL import Image, ImageDraw, ImageFont

    def _font(size):
        for p in ("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", "DejaVuSans.ttf"):
            try:
                return ImageFont.truetype(p, size)
            except Exception:  # noqa: BLE001 — no TTF on this host; use the bitmap default
                pass
        # Pillow >=10.1 lets load_default scale; without it the 5px bitmap font is
        # too small for the model to OCR reliably.
        try:
            return ImageFont.load_default(size=size)
        except TypeError:
            return ImageFont.load_default()

    img = Image.new("RGB", (1024, 720), "white")
    draw = ImageDraw.Draw(img)
    title_f, body_f = _font(40), _font(30)
    y = 70
    for i, line in enumerate(SCAN_SAMPLE_LINES):
        draw.text((70, y), line, fill=(15, 15, 15), font=title_f if i == 0 else body_f)
        y += 64 if i == 0 else 52
    return img


_SCAN_SAMPLE_PNG: bytes | None = None


def _scan_sample_png() -> bytes:
    """PNG bytes of the synthetic scanned slide, rendered once and cached. The
    render is deterministic, so the preview thumbnail is byte-identical to the
    image the vision/OCR path actually consumes — the preview can't drift from
    what's OCR'd."""
    global _SCAN_SAMPLE_PNG
    if _SCAN_SAMPLE_PNG is None:
        import io
        buf = io.BytesIO()
        _render_scan_sample().save(buf, format="PNG")
        _SCAN_SAMPLE_PNG = buf.getvalue()
    return _SCAN_SAMPLE_PNG

# `gradio.Server` extends FastAPI: we keep all our own routes (custom frontend +
# JSON API) AND get gradio's launcher, which owns port 7860 on a Hugging Face
# gradio-SDK Space and wires up the queue + ZeroGPU GPU allocation. A plain
# FastAPI + manual `uvicorn.run(7860)` collides with the Space's own gradio
# server ("address already in use") — `gradio.Server` is the supported way to run
# a custom frontend on a gradio Space.
app = gr.Server(title="Recall")


# A gradio.Server MUST register at least one `@app.api(...)` endpoint: launch()
# builds its internal Blocks from these, and on a Space an *empty* Blocks doesn't
# stay "running" (launch returns and the process exits → RUNTIME_ERROR). This is
# also the gradio-native handle (`gradio_client` / the JS Client) onto the same
# backend the custom frontend drives over plain JSON `/api/*` routes below.
@app.api(name="health")
def health() -> str:
    """Liveness probe + the Server's required gradio endpoint."""
    return "ok"


# ---- serialization ---------------------------------------------------------

def _card_out(card: dict | None) -> dict | None:
    """The client never needs (or should see) the reference answer or the raw
    source chunk — strip the card down to what the UI renders."""
    if not card:
        return None
    return {
        "id": card["id"],
        "question": card["question"],
        "topic": card["topic"],
        "difficulty": card["difficulty"],
        "parent_id": card.get("parent_id"),
    }


def _view(session: dict) -> dict:
    """Display state the header / mastery bars / deck rail are built from."""
    deck = session["deck"]
    history = session["history"]
    answered = len(history)
    total = len(deck)
    # Cards that have graduated out of the queue (mastered this session). Progress
    # tracks this rather than `answered` so the bar climbs as cards are mastered
    # and reaches total exactly when the queue drains — instead of pinning at
    # total/total the moment you've seen every card once.
    mastered = total - len(session["queue"])

    stats: dict[str, dict] = {}
    for h in history:
        s = stats.setdefault(h["topic"], {"correct": 0, "total": 0})
        s["total"] += 1
        if h["grade"] >= 3:
            s["correct"] += 1

    return {
        "total": total,
        "answered": answered,
        "posDisplay": min(mastered + 1, total) if total else 0,
        "streak": session["streak"],
        "topicStats": stats,
        "rail": [
            {"id": c["id"], "topic": c["topic"], "injected": bool(c.get("parent_id"))}
            for c in deck
        ],
    }


# ---- request models --------------------------------------------------------

class SidBody(BaseModel):
    sid: str


class GradeBody(BaseModel):
    sid: str
    answer: str = ""


class RegenBody(BaseModel):
    sid: str
    direction: str  # "harder" | "easier"


# ---- API -------------------------------------------------------------------

@app.post("/api/generate")
async def api_generate(
    text: str = Form(""),
    sample: str = Form(""),
    file: UploadFile | None = File(None),
):
    need_more = ("I need a little more to work with — paste a paragraph of "
                 "notes or pick a PDF, and I'll build your deck.")

    source = ""
    # Set when a scanned/image-only PDF is turned into a deck by the vision model
    # (no text to chunk); `None` means "fall through to the text path below".
    deck_from_images = None
    if text and text.strip():
        source = text.strip()[:MAX_TEXT_CHARS]  # cap to bound chunking work
        if len(source) < 40:
            return JSONResponse({"error": need_more}, status_code=400)
    elif sample == "bio":
        source = SAMPLE_NOTES
    elif sample == "scan":
        # Render a slide to an image and run the OCR/vision path — demonstrates
        # image-PDF support end-to-end (stub returns the canned deck).
        deck_from_images = await run_in_threadpool(
            cp.generate_deck_from_images, [_render_scan_sample()])
    elif file is not None:
        suffix = Path(file.filename or "upload").suffix or ".txt"
        # Stream to a temp file in capped chunks so an oversized upload never
        # gets fully buffered in memory.
        too_large = False
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
            tmp_path = tmp.name
            size = 0
            while chunk := await file.read(1024 * 1024):
                size += len(chunk)
                if size > MAX_UPLOAD_BYTES:
                    too_large = True
                    break
                tmp.write(chunk)
        try:
            if too_large:
                return JSONResponse(
                    {"error": f"That file is too large (limit "
                              f"{MAX_UPLOAD_BYTES // (1024 * 1024)} MB). Upload a "
                              "smaller PDF or paste the notes instead."},
                    status_code=413,
                )
            # Extraction (PDF parsing) is blocking — keep it off the event loop.
            source = await run_in_threadpool(cp.extract_text, tmp_path)
        except cp.ExtractionError as e:
            # A scanned/image-only PDF has no text to chunk — render its pages and
            # let the multimodal model read them directly. Fall back to the
            # original error if it's not image-only or no pages render.
            if await run_in_threadpool(cp.is_image_only_pdf, tmp_path):
                images = await run_in_threadpool(cp.render_pdf_images, tmp_path)
                if images:
                    deck_from_images = await run_in_threadpool(
                        cp.generate_deck_from_images, images)
            if deck_from_images is None:
                return JSONResponse({"error": str(e)}, status_code=422)
        finally:
            os.unlink(tmp_path)
    else:
        return JSONResponse({"error": need_more}, status_code=400)

    if deck_from_images is not None:
        deck = deck_from_images
    else:
        try:
            # Deck generation hits the model — run it in a worker thread so a slow
            # call doesn't block every other request on the event loop.
            deck = await run_in_threadpool(cp.generate_deck, source)
        except Exception as e:  # noqa: BLE001 — surface as friendly copy, never crash
            return JSONResponse(
                {"error": f"Couldn't build a deck from that ({type(e).__name__}). "
                          "Try different material."},
                status_code=422,
            )
    if not deck:
        return JSONResponse(
            {"error": "Couldn't generate questions from that. Try different material."},
            status_code=422,
        )

    session = le.init_session(deck)
    card = le.next_card(session)
    sid = uuid.uuid4().hex
    put_session(sid, session)
    return {"sid": sid, "card": _card_out(card), "view": _view(session)}


@app.post("/api/grade")
async def api_grade(body: GradeBody):
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)

    # Grading + follow-up generation hit the model; run the whole study step in
    # a worker thread so it doesn't block the event loop.
    grade, fups = await run_in_threadpool(le.grade_and_adapt, session, body.answer or "")
    if grade is None:
        return {"done": True, "view": _view(session)}

    injected_ids = [f["id"] for f in fups]
    put_session(body.sid, session)
    return {
        "grade": {
            "score": grade["score"],
            "correct": grade["correct"],
            "explanation": grade["explanation"],
            "missed": grade["missed_concept"],
        },
        "injectedIds": injected_ids,
        "view": _view(session),
    }


@app.post("/api/regenerate")
async def api_regenerate(body: RegenBody):
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)
    card = le.next_card(session)
    if card is None:
        return {"card": None, "view": _view(session)}
    new = await run_in_threadpool(cp.regenerate, card, body.direction)  # hits the model
    session = le.replace_card(session, card["id"], new)
    put_session(body.sid, session)
    out = _card_out(new)
    out["diffLabel"] = "harder" if body.direction == "harder" else "easier"
    return {"card": out, "view": _view(session)}


@app.post("/api/next")
async def api_next(body: SidBody):
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)
    card = le.next_card(session)
    return {"card": _card_out(card), "view": _view(session)}


@app.post("/api/recap")
async def api_recap(body: SidBody):
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)
    r = await run_in_threadpool(le.recap, session)  # reflection line hits the model
    return {"recap": r, "view": _view(session)}


@app.post("/api/restart")
async def api_restart(body: SidBody):
    """Study the same source deck again from a clean session (Recap → restart)."""
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)
    # Rebuild from the original (non-injected) cards only.
    base = [c for c in session["deck"] if not c.get("parent_id")]
    fresh = le.init_session(base)
    card = le.next_card(fresh)
    put_session(body.sid, fresh)
    return {"card": _card_out(card), "view": _view(fresh)}


# ---- frontend --------------------------------------------------------------

@app.get("/api/sample/scan.png")
async def sample_scan_png():
    """Preview of the scanned-slides sample — served from the same renderer the
    vision/OCR path consumes, so the thumbnail stays honest about what's OCR'd."""
    return Response(_scan_sample_png(), media_type="image/png")


@app.get("/")
async def index():
    return FileResponse(FRONTEND / "index.html")


# HF runs `python server.py` (README app_file) and proxies port 7860. `gradio.Server`
# launches at MODULE TOP LEVEL; on a Space gradio binds 0.0.0.0:7860 automatically.
# We launch non-blocking and hold the main thread ourselves so the (daemon) uvicorn
# server keeps serving. (Locally, set GRADIO_SERVER_PORT to use a different port.)
#
# Two deploy gotchas, both learned the hard way:
#   * gradio is pinned to 6.10.0. On 6.17.x a custom gradio.Server doesn't stay up
#     under the Space runtime. 6.10.0 is the version gradio's own `Server` example
#     ships, and it still allows huggingface-hub<1.0 (needed by the real model).
#   * Stub mode registers no `@spaces.GPU` function, which makes a ZeroGPU Space's
#     `spaces` reload server error out and flip the Space to RUNTIME_ERROR even
#     though uvicorn serves fine. So the stub demo runs on CPU-basic hardware. Once
#     the real model (with `@spaces.GPU`) is wired, switch hardware back to ZeroGPU.
# Keep the README sdk_version and requirements*.txt gradio pins in lockstep.
app.launch(show_error=True, prevent_thread_lock=True)
threading.Event().wait()