File size: 12,651 Bytes
da26903
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40c272a
da26903
40c272a
da26903
 
 
 
 
40c272a
da26903
 
 
 
 
 
40c272a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da26903
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40c272a
 
 
 
da26903
 
 
40c272a
da26903
40c272a
da26903
 
 
 
40c272a
 
 
da26903
 
40c272a
 
 
 
 
 
 
da26903
40c272a
 
 
 
 
 
 
 
 
da26903
40c272a
da26903
 
 
40c272a
da26903
 
40c272a
 
 
da26903
 
 
 
40c272a
da26903
 
 
 
40c272a
da26903
 
 
 
 
40c272a
da26903
 
 
 
 
40c272a
da26903
 
 
40c272a
 
 
 
da26903
 
40c272a
 
da26903
 
 
 
 
 
 
 
 
 
 
 
 
 
40c272a
da26903
 
 
 
 
40c272a
da26903
40c272a
da26903
 
 
 
 
 
 
40c272a
da26903
 
 
 
 
 
 
 
40c272a
da26903
 
40c272a
da26903
 
 
 
 
 
40c272a
da26903
 
 
 
 
 
40c272a
da26903
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1871d1
da26903
c1871d1
 
 
 
 
 
 
 
da26903
 
 
 
 
 
 
c1871d1
 
 
e92c089
b7ffc8d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
"""
Recall — custom frontend server (NAH-36).

Serves the polished `Recall.dc.html` design (frontend/index.html) and exposes a
thin JSON API over the EXISTING backend. The learning/content logic and the
`schema.py` data contract are treated as an API and are NOT modified here.

The Session dict lives server-side, keyed by a short id the client carries
around (mirrors the single-session gr.State model the Gradio app uses) — so the
reference answers in the deck never leave the server.

Run it (stub mode is on by default):

    pip install -r requirements.txt
    python server.py            # http://127.0.0.1:7860

Flip RECALL_STUB=0 once the real model is wired:

    RECALL_STUB=0 python server.py

The legacy Gradio UI is still mounted at /gradio.
"""
from __future__ import annotations

import os
import tempfile
import time
import uuid
from collections import OrderedDict
from pathlib import Path

from fastapi import FastAPI, File, Form, UploadFile
from fastapi.responses import FileResponse, JSONResponse
from pydantic import BaseModel
from starlette.concurrency import run_in_threadpool

import content_pipeline as cp
import learning_engine as le

FRONTEND = Path(__file__).parent / "frontend"

# Single-process session store. Bounded so a public Space can't be OOM'd by a
# client looping /api/generate: sessions are evicted least-recently-used past
# MAX_SESSIONS and expire after SESSION_TTL_SECONDS of inactivity.
MAX_SESSIONS = int(os.getenv("RECALL_MAX_SESSIONS", "500"))
SESSION_TTL_SECONDS = int(os.getenv("RECALL_SESSION_TTL", str(2 * 60 * 60)))  # 2h
# Caps on input size so a single request can't exhaust memory/disk.
MAX_UPLOAD_BYTES = int(os.getenv("RECALL_MAX_UPLOAD_MB", "10")) * 1024 * 1024
MAX_TEXT_CHARS = int(os.getenv("RECALL_MAX_TEXT_CHARS", "200000"))  # ~50k tokens

# sid -> (session, last_access_epoch). OrderedDict gives O(1) LRU eviction.
SESSIONS: "OrderedDict[str, tuple[dict, float]]" = OrderedDict()


def _purge_expired(now: float | None = None) -> None:
    now = time.time() if now is None else now
    stale = [sid for sid, (_, ts) in SESSIONS.items()
             if now - ts > SESSION_TTL_SECONDS]
    for sid in stale:
        SESSIONS.pop(sid, None)


def get_session(sid: str) -> dict | None:
    """Fetch a live session and mark it most-recently-used, or None if it's
    unknown/expired (callers already return a friendly 'session expired')."""
    _purge_expired()
    entry = SESSIONS.get(sid)
    if entry is None:
        return None
    session, _ = entry
    SESSIONS[sid] = (session, time.time())
    SESSIONS.move_to_end(sid)
    return session


def put_session(sid: str, session: dict) -> None:
    """Store/refresh a session, evicting the least-recently-used past the cap."""
    _purge_expired()
    SESSIONS[sid] = (session, time.time())
    SESSIONS.move_to_end(sid)
    while len(SESSIONS) > MAX_SESSIONS:
        SESSIONS.popitem(last=False)

# The photosynthesis notes the design's "sample" affordances load. Lets the
# Upload screen's sample chip work even with no real PDF on disk.
SAMPLE_NOTES = (
    "Photosynthesis happens in the chloroplast. The light-dependent reactions "
    "occur in the thylakoid membranes, where water is split, ATP and NADPH are "
    "produced, and oxygen is released. The Calvin cycle takes place in the "
    "stroma, where the enzyme RuBisCO fixes CO2 onto RuBP. Cellular respiration "
    "occurs in the mitochondria; most ATP is made during oxidative "
    "phosphorylation, as the electron transport chain pumps protons and oxygen "
    "acts as the final electron acceptor, forming water."
)

# Reused verbatim from content_pipeline's image-only branch so the "scanned PDF"
# sample chip demonstrates the real error copy even in stub mode.
IMAGE_ONLY_MSG = (
    "This PDF has no selectable text (looks scanned/image-only). "
    "Try a text-based PDF, or paste the notes instead."
)

app = FastAPI(title="Recall")


# ---- serialization ---------------------------------------------------------

def _card_out(card: dict | None) -> dict | None:
    """The client never needs (or should see) the reference answer or the raw
    source chunk — strip the card down to what the UI renders."""
    if not card:
        return None
    return {
        "id": card["id"],
        "question": card["question"],
        "topic": card["topic"],
        "difficulty": card["difficulty"],
        "parent_id": card.get("parent_id"),
    }


def _view(session: dict) -> dict:
    """Display state the header / mastery bars / deck rail are built from."""
    deck = session["deck"]
    history = session["history"]
    answered = len(history)
    total = len(deck)

    stats: dict[str, dict] = {}
    for h in history:
        s = stats.setdefault(h["topic"], {"correct": 0, "total": 0})
        s["total"] += 1
        if h["grade"] >= 3:
            s["correct"] += 1

    return {
        "total": total,
        "answered": answered,
        "posDisplay": min(answered + 1, total) if total else 0,
        "streak": session["streak"],
        "topicStats": stats,
        "rail": [
            {"id": c["id"], "topic": c["topic"], "injected": bool(c.get("parent_id"))}
            for c in deck
        ],
    }


# ---- request models --------------------------------------------------------

class SidBody(BaseModel):
    sid: str


class GradeBody(BaseModel):
    sid: str
    answer: str = ""


class RegenBody(BaseModel):
    sid: str
    direction: str  # "harder" | "easier"


# ---- API -------------------------------------------------------------------

@app.post("/api/generate")
async def api_generate(
    text: str = Form(""),
    sample: str = Form(""),
    file: UploadFile | None = File(None),
):
    # The scanned-slides sample shows the real image-only error state.
    if sample == "scan":
        return JSONResponse({"error": IMAGE_ONLY_MSG}, status_code=422)

    need_more = ("I need a little more to work with — paste a paragraph of "
                 "notes or pick a PDF, and I'll build your deck.")

    source = ""
    if text and text.strip():
        source = text.strip()[:MAX_TEXT_CHARS]  # cap to bound chunking work
        if len(source) < 40:
            return JSONResponse({"error": need_more}, status_code=400)
    elif sample == "bio":
        source = SAMPLE_NOTES
    elif file is not None:
        suffix = Path(file.filename or "upload").suffix or ".txt"
        # Stream to a temp file in capped chunks so an oversized upload never
        # gets fully buffered in memory.
        too_large = False
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
            tmp_path = tmp.name
            size = 0
            while chunk := await file.read(1024 * 1024):
                size += len(chunk)
                if size > MAX_UPLOAD_BYTES:
                    too_large = True
                    break
                tmp.write(chunk)
        try:
            if too_large:
                return JSONResponse(
                    {"error": f"That file is too large (limit "
                              f"{MAX_UPLOAD_BYTES // (1024 * 1024)} MB). Upload a "
                              "smaller PDF or paste the notes instead."},
                    status_code=413,
                )
            # Extraction (PDF parsing) is blocking — keep it off the event loop.
            source = await run_in_threadpool(cp.extract_text, tmp_path)
        except cp.ExtractionError as e:
            return JSONResponse({"error": str(e)}, status_code=422)
        finally:
            os.unlink(tmp_path)
    else:
        return JSONResponse({"error": need_more}, status_code=400)

    try:
        # Deck generation hits the model — run it in a worker thread so a slow
        # call doesn't block every other request on the event loop.
        deck = await run_in_threadpool(cp.generate_deck, source)
    except Exception as e:  # noqa: BLE001 — surface as friendly copy, never crash
        return JSONResponse(
            {"error": f"Couldn't build a deck from that ({type(e).__name__}). "
                      "Try different material."},
            status_code=422,
        )
    if not deck:
        return JSONResponse(
            {"error": "Couldn't generate questions from that. Try different material."},
            status_code=422,
        )

    session = le.init_session(deck)
    card = le.next_card(session)
    sid = uuid.uuid4().hex
    put_session(sid, session)
    return {"sid": sid, "card": _card_out(card), "view": _view(session)}


@app.post("/api/grade")
async def api_grade(body: GradeBody):
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)

    # Grading + follow-up generation hit the model; run the whole study step in
    # a worker thread so it doesn't block the event loop.
    grade, fups = await run_in_threadpool(le.grade_and_adapt, session, body.answer or "")
    if grade is None:
        return {"done": True, "view": _view(session)}

    injected_ids = [f["id"] for f in fups]
    put_session(body.sid, session)
    return {
        "grade": {
            "score": grade["score"],
            "correct": grade["correct"],
            "explanation": grade["explanation"],
            "missed": grade["missed_concept"],
        },
        "injectedIds": injected_ids,
        "view": _view(session),
    }


@app.post("/api/regenerate")
async def api_regenerate(body: RegenBody):
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)
    card = le.next_card(session)
    if card is None:
        return {"card": None, "view": _view(session)}
    new = await run_in_threadpool(cp.regenerate, card, body.direction)  # hits the model
    session = le.replace_card(session, card["id"], new)
    put_session(body.sid, session)
    out = _card_out(new)
    out["diffLabel"] = "harder" if body.direction == "harder" else "easier"
    return {"card": out, "view": _view(session)}


@app.post("/api/next")
async def api_next(body: SidBody):
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)
    card = le.next_card(session)
    return {"card": _card_out(card), "view": _view(session)}


@app.post("/api/recap")
async def api_recap(body: SidBody):
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)
    r = await run_in_threadpool(le.recap, session)  # reflection line hits the model
    return {"recap": r, "view": _view(session)}


@app.post("/api/restart")
async def api_restart(body: SidBody):
    """Study the same source deck again from a clean session (Recap → restart)."""
    session = get_session(body.sid)
    if session is None:
        return JSONResponse({"error": "session expired"}, status_code=404)
    # Rebuild from the original (non-injected) cards only.
    base = [c for c in session["deck"] if not c.get("parent_id")]
    fresh = le.init_session(base)
    card = le.next_card(fresh)
    put_session(body.sid, fresh)
    return {"card": _card_out(card), "view": _view(fresh)}


# ---- frontend --------------------------------------------------------------

@app.get("/")
async def index():
    return FileResponse(FRONTEND / "index.html")


# Keep the original Gradio Blocks app available as a fallback / debug surface.
# Optional: the custom frontend + API don't need Gradio, so a broken/missing
# Gradio install never takes the new server down.
try:
    import gradio as gr

    from app import demo as _gradio_demo

    app = gr.mount_gradio_app(app, _gradio_demo, path="/gradio")

    # On HuggingFace Spaces, the Gradio SDK looks for a module-level `demo`
    # object.  `gr.mount_gradio_app` returns a FastAPI app, not a Gradio demo,
    # so the SDK would fall back to running `python server.py` — which would
    # then call uvicorn and collide with the Space's own port.  Expose the
    # underlying Gradio Blocks as `demo` so the SDK picks it up directly.
    demo = _gradio_demo
except Exception as _e:  # noqa: BLE001
    print(f"[recall] legacy Gradio UI not mounted ({type(_e).__name__}: {_e})")


if __name__ == "__main__":
    import uvicorn

    # On HuggingFace Spaces the Gradio SDK serves the app — running uvicorn
    # here would collide with the Space's own server on the same port.
    # Locally (no SPACE_ID), start uvicorn directly.
    if not os.getenv("SPACE_ID"):
        uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "7860")))