Spaces:
Runtime error
Runtime error
File size: 12,651 Bytes
da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 40c272a da26903 c1871d1 da26903 c1871d1 da26903 c1871d1 e92c089 b7ffc8d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 | """
Recall — custom frontend server (NAH-36).
Serves the polished `Recall.dc.html` design (frontend/index.html) and exposes a
thin JSON API over the EXISTING backend. The learning/content logic and the
`schema.py` data contract are treated as an API and are NOT modified here.
The Session dict lives server-side, keyed by a short id the client carries
around (mirrors the single-session gr.State model the Gradio app uses) — so the
reference answers in the deck never leave the server.
Run it (stub mode is on by default):
pip install -r requirements.txt
python server.py # http://127.0.0.1:7860
Flip RECALL_STUB=0 once the real model is wired:
RECALL_STUB=0 python server.py
The legacy Gradio UI is still mounted at /gradio.
"""
from __future__ import annotations
import os
import tempfile
import time
import uuid
from collections import OrderedDict
from pathlib import Path
from fastapi import FastAPI, File, Form, UploadFile
from fastapi.responses import FileResponse, JSONResponse
from pydantic import BaseModel
from starlette.concurrency import run_in_threadpool
import content_pipeline as cp
import learning_engine as le
FRONTEND = Path(__file__).parent / "frontend"
# Single-process session store. Bounded so a public Space can't be OOM'd by a
# client looping /api/generate: sessions are evicted least-recently-used past
# MAX_SESSIONS and expire after SESSION_TTL_SECONDS of inactivity.
MAX_SESSIONS = int(os.getenv("RECALL_MAX_SESSIONS", "500"))
SESSION_TTL_SECONDS = int(os.getenv("RECALL_SESSION_TTL", str(2 * 60 * 60))) # 2h
# Caps on input size so a single request can't exhaust memory/disk.
MAX_UPLOAD_BYTES = int(os.getenv("RECALL_MAX_UPLOAD_MB", "10")) * 1024 * 1024
MAX_TEXT_CHARS = int(os.getenv("RECALL_MAX_TEXT_CHARS", "200000")) # ~50k tokens
# sid -> (session, last_access_epoch). OrderedDict gives O(1) LRU eviction.
SESSIONS: "OrderedDict[str, tuple[dict, float]]" = OrderedDict()
def _purge_expired(now: float | None = None) -> None:
now = time.time() if now is None else now
stale = [sid for sid, (_, ts) in SESSIONS.items()
if now - ts > SESSION_TTL_SECONDS]
for sid in stale:
SESSIONS.pop(sid, None)
def get_session(sid: str) -> dict | None:
"""Fetch a live session and mark it most-recently-used, or None if it's
unknown/expired (callers already return a friendly 'session expired')."""
_purge_expired()
entry = SESSIONS.get(sid)
if entry is None:
return None
session, _ = entry
SESSIONS[sid] = (session, time.time())
SESSIONS.move_to_end(sid)
return session
def put_session(sid: str, session: dict) -> None:
"""Store/refresh a session, evicting the least-recently-used past the cap."""
_purge_expired()
SESSIONS[sid] = (session, time.time())
SESSIONS.move_to_end(sid)
while len(SESSIONS) > MAX_SESSIONS:
SESSIONS.popitem(last=False)
# The photosynthesis notes the design's "sample" affordances load. Lets the
# Upload screen's sample chip work even with no real PDF on disk.
SAMPLE_NOTES = (
"Photosynthesis happens in the chloroplast. The light-dependent reactions "
"occur in the thylakoid membranes, where water is split, ATP and NADPH are "
"produced, and oxygen is released. The Calvin cycle takes place in the "
"stroma, where the enzyme RuBisCO fixes CO2 onto RuBP. Cellular respiration "
"occurs in the mitochondria; most ATP is made during oxidative "
"phosphorylation, as the electron transport chain pumps protons and oxygen "
"acts as the final electron acceptor, forming water."
)
# Reused verbatim from content_pipeline's image-only branch so the "scanned PDF"
# sample chip demonstrates the real error copy even in stub mode.
IMAGE_ONLY_MSG = (
"This PDF has no selectable text (looks scanned/image-only). "
"Try a text-based PDF, or paste the notes instead."
)
app = FastAPI(title="Recall")
# ---- serialization ---------------------------------------------------------
def _card_out(card: dict | None) -> dict | None:
"""The client never needs (or should see) the reference answer or the raw
source chunk — strip the card down to what the UI renders."""
if not card:
return None
return {
"id": card["id"],
"question": card["question"],
"topic": card["topic"],
"difficulty": card["difficulty"],
"parent_id": card.get("parent_id"),
}
def _view(session: dict) -> dict:
"""Display state the header / mastery bars / deck rail are built from."""
deck = session["deck"]
history = session["history"]
answered = len(history)
total = len(deck)
stats: dict[str, dict] = {}
for h in history:
s = stats.setdefault(h["topic"], {"correct": 0, "total": 0})
s["total"] += 1
if h["grade"] >= 3:
s["correct"] += 1
return {
"total": total,
"answered": answered,
"posDisplay": min(answered + 1, total) if total else 0,
"streak": session["streak"],
"topicStats": stats,
"rail": [
{"id": c["id"], "topic": c["topic"], "injected": bool(c.get("parent_id"))}
for c in deck
],
}
# ---- request models --------------------------------------------------------
class SidBody(BaseModel):
sid: str
class GradeBody(BaseModel):
sid: str
answer: str = ""
class RegenBody(BaseModel):
sid: str
direction: str # "harder" | "easier"
# ---- API -------------------------------------------------------------------
@app.post("/api/generate")
async def api_generate(
text: str = Form(""),
sample: str = Form(""),
file: UploadFile | None = File(None),
):
# The scanned-slides sample shows the real image-only error state.
if sample == "scan":
return JSONResponse({"error": IMAGE_ONLY_MSG}, status_code=422)
need_more = ("I need a little more to work with — paste a paragraph of "
"notes or pick a PDF, and I'll build your deck.")
source = ""
if text and text.strip():
source = text.strip()[:MAX_TEXT_CHARS] # cap to bound chunking work
if len(source) < 40:
return JSONResponse({"error": need_more}, status_code=400)
elif sample == "bio":
source = SAMPLE_NOTES
elif file is not None:
suffix = Path(file.filename or "upload").suffix or ".txt"
# Stream to a temp file in capped chunks so an oversized upload never
# gets fully buffered in memory.
too_large = False
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp_path = tmp.name
size = 0
while chunk := await file.read(1024 * 1024):
size += len(chunk)
if size > MAX_UPLOAD_BYTES:
too_large = True
break
tmp.write(chunk)
try:
if too_large:
return JSONResponse(
{"error": f"That file is too large (limit "
f"{MAX_UPLOAD_BYTES // (1024 * 1024)} MB). Upload a "
"smaller PDF or paste the notes instead."},
status_code=413,
)
# Extraction (PDF parsing) is blocking — keep it off the event loop.
source = await run_in_threadpool(cp.extract_text, tmp_path)
except cp.ExtractionError as e:
return JSONResponse({"error": str(e)}, status_code=422)
finally:
os.unlink(tmp_path)
else:
return JSONResponse({"error": need_more}, status_code=400)
try:
# Deck generation hits the model — run it in a worker thread so a slow
# call doesn't block every other request on the event loop.
deck = await run_in_threadpool(cp.generate_deck, source)
except Exception as e: # noqa: BLE001 — surface as friendly copy, never crash
return JSONResponse(
{"error": f"Couldn't build a deck from that ({type(e).__name__}). "
"Try different material."},
status_code=422,
)
if not deck:
return JSONResponse(
{"error": "Couldn't generate questions from that. Try different material."},
status_code=422,
)
session = le.init_session(deck)
card = le.next_card(session)
sid = uuid.uuid4().hex
put_session(sid, session)
return {"sid": sid, "card": _card_out(card), "view": _view(session)}
@app.post("/api/grade")
async def api_grade(body: GradeBody):
session = get_session(body.sid)
if session is None:
return JSONResponse({"error": "session expired"}, status_code=404)
# Grading + follow-up generation hit the model; run the whole study step in
# a worker thread so it doesn't block the event loop.
grade, fups = await run_in_threadpool(le.grade_and_adapt, session, body.answer or "")
if grade is None:
return {"done": True, "view": _view(session)}
injected_ids = [f["id"] for f in fups]
put_session(body.sid, session)
return {
"grade": {
"score": grade["score"],
"correct": grade["correct"],
"explanation": grade["explanation"],
"missed": grade["missed_concept"],
},
"injectedIds": injected_ids,
"view": _view(session),
}
@app.post("/api/regenerate")
async def api_regenerate(body: RegenBody):
session = get_session(body.sid)
if session is None:
return JSONResponse({"error": "session expired"}, status_code=404)
card = le.next_card(session)
if card is None:
return {"card": None, "view": _view(session)}
new = await run_in_threadpool(cp.regenerate, card, body.direction) # hits the model
session = le.replace_card(session, card["id"], new)
put_session(body.sid, session)
out = _card_out(new)
out["diffLabel"] = "harder" if body.direction == "harder" else "easier"
return {"card": out, "view": _view(session)}
@app.post("/api/next")
async def api_next(body: SidBody):
session = get_session(body.sid)
if session is None:
return JSONResponse({"error": "session expired"}, status_code=404)
card = le.next_card(session)
return {"card": _card_out(card), "view": _view(session)}
@app.post("/api/recap")
async def api_recap(body: SidBody):
session = get_session(body.sid)
if session is None:
return JSONResponse({"error": "session expired"}, status_code=404)
r = await run_in_threadpool(le.recap, session) # reflection line hits the model
return {"recap": r, "view": _view(session)}
@app.post("/api/restart")
async def api_restart(body: SidBody):
"""Study the same source deck again from a clean session (Recap → restart)."""
session = get_session(body.sid)
if session is None:
return JSONResponse({"error": "session expired"}, status_code=404)
# Rebuild from the original (non-injected) cards only.
base = [c for c in session["deck"] if not c.get("parent_id")]
fresh = le.init_session(base)
card = le.next_card(fresh)
put_session(body.sid, fresh)
return {"card": _card_out(card), "view": _view(fresh)}
# ---- frontend --------------------------------------------------------------
@app.get("/")
async def index():
return FileResponse(FRONTEND / "index.html")
# Keep the original Gradio Blocks app available as a fallback / debug surface.
# Optional: the custom frontend + API don't need Gradio, so a broken/missing
# Gradio install never takes the new server down.
try:
import gradio as gr
from app import demo as _gradio_demo
app = gr.mount_gradio_app(app, _gradio_demo, path="/gradio")
# On HuggingFace Spaces, the Gradio SDK looks for a module-level `demo`
# object. `gr.mount_gradio_app` returns a FastAPI app, not a Gradio demo,
# so the SDK would fall back to running `python server.py` — which would
# then call uvicorn and collide with the Space's own port. Expose the
# underlying Gradio Blocks as `demo` so the SDK picks it up directly.
demo = _gradio_demo
except Exception as _e: # noqa: BLE001
print(f"[recall] legacy Gradio UI not mounted ({type(_e).__name__}: {_e})")
if __name__ == "__main__":
import uvicorn
# On HuggingFace Spaces the Gradio SDK serves the app — running uvicorn
# here would collide with the Space's own server on the same port.
# Locally (no SPACE_ID), start uvicorn directly.
if not os.getenv("SPACE_ID"):
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "7860")))
|