""" Recall — Module B: Learning Engine. OWNER: Nikolai The brain: scheduling (SM-2-lite), grading, adaptation, follow-up generation, and the recap. Runs in STUB mode out of the box. Public signatures are fixed — app.py depends on them. """ from __future__ import annotations import llm from schema import ( Card, GradeResult, Session, new_card, new_card_state, new_grade, validate_card, ) # STUB is owned by llm (single source of truth) and read dynamically as # `llm.STUB` so every module agrees and runtime/reload changes are honored. # ---- Session lifecycle ----------------------------------------------------- def init_session(deck: list[Card]) -> Session: states = {c["id"]: new_card_state(c["id"]) for c in deck} return Session( deck=list(deck), states=states, queue=[c["id"] for c in deck], history=[], streak=0, ) WEAK_TOPIC_THRESHOLD = 3.0 # avg grade below this = a topic the user is weak on WEAK_LOOKAHEAD = 4 # how far down the queue we'll reach to surface a weak card def next_card(session: Session) -> Card | None: """ Return the next card to study. Among the next few due cards we bias toward the user's weakest topic (lowest average grade so far) — so once the model sees you're shaky on a topic, that topic comes back sooner. With no history yet this is a no-op and we serve the queue in order. The chosen card is rotated to the front of the queue so `apply_result`'s "pop the front" contract still holds. """ queue = session["queue"] if not queue: return None idx = _weak_biased_index(session) if idx > 0: queue.insert(0, queue.pop(idx)) # bring the weak-topic card to the front return _find(session, queue[0]) # ---- Grading --------------------------------------------------------------- def grade_answer(card: Card, user_answer: str) -> GradeResult: if llm.STUB: # Trivial heuristic so the stub demo "feels" responsive. ans = (user_answer or "").strip().lower() ref = card["answer"].strip().lower() overlap = len(set(ans.split()) & set(ref.split())) score = 5 if overlap >= 2 else (3 if overlap == 1 else 1) expl = ("Correct — you hit the key idea." if score >= 3 else f"Not quite. Expected something like: {card['answer']}") return new_grade(score, expl, missed_concept=card["topic"]) messages = [ {"role": "system", "content": "You grade a student's answer against a reference answer. " "Return ONLY a JSON object with keys: " "score (integer 0-5), explanation (string for the student), " "missed_concept (short string naming what they got wrong, or \"\")."}, {"role": "user", "content": f"Question: {card['question']}\nReference answer: {card['answer']}\n" f"Student answer: {user_answer}\nGrade it."}, ] # Parser + one repair retry; safe default if the model never returns JSON. data = llm.chat_json(messages, max_tokens=256) if not _valid_grade(data): return new_grade( 2, "Couldn't grade automatically — compare your answer to the " f"reference: {card['answer']}", card["topic"], ) return new_grade( int(data["score"]), str(data.get("explanation", "")).strip() or f"Reference answer: {card['answer']}", str(data.get("missed_concept") or card["topic"]).strip(), ) def _valid_grade(data) -> bool: """A grade is usable only if it carries a numeric, in-range score.""" if not isinstance(data, dict) or "score" not in data: return False try: return 0 <= int(data["score"]) <= 5 except (TypeError, ValueError): return False # ---- Adaptation: SM-2-lite ------------------------------------------------- def apply_result(session: Session, card: Card, grade: GradeResult, user_answer: str = "") -> Session: st = session["states"][card["id"]] st["reps"] += 1 st["last_grade"] = grade["score"] # remove this card from the front of the queue if session["queue"] and session["queue"][0] == card["id"]: session["queue"].pop(0) if grade["correct"]: st["ease"] = min(3.0, st["ease"] + 0.1) st["interval"] = max(2, int(st["interval"] * st["ease"])) session["streak"] += 1 _insert_at(session, card["id"], st["interval"]) # comes back later else: st["lapses"] += 1 st["ease"] = max(1.3, st["ease"] - 0.2) st["interval"] = 1 session["streak"] = 0 _insert_at(session, card["id"], 2) # comes back soon session["history"].append({ "card_id": card["id"], "user_answer": user_answer, "grade": grade["score"], "topic": card["topic"], }) return session def generate_followups(card: Card, grade: GradeResult, n: int = 2) -> list[Card]: """The money feature: new cards drilling exactly what was missed.""" if llm.STUB: # Two canned drills so the demo shows the design's "+2 new questions" # adaptive moment. The real path below returns up to `n`. prompts = [ f"[follow-up] In your own words, what's the key idea behind: {card['question']}", f"[follow-up] Restate: {card['question']}", ] return [ new_card( p, card["answer"], topic=card["topic"], source_chunk=card["source_chunk"], difficulty=max(1, card["difficulty"] - 1), parent_id=card["id"], ) for p in prompts[:n] ] messages = [ {"role": "system", "content": "The student missed a concept. Generate follow-up quiz questions that " "drill it. Return ONLY a JSON array with keys: question, answer, topic."}, {"role": "user", "content": f"Original question: {card['question']}\n" f"Missed concept: {grade['missed_concept']}\n" f"Source: {card['source_chunk']}\nGenerate {n} simpler follow-ups."}, ] data = llm.extract_json(llm.chat(messages, max_tokens=400)) out: list[Card] = [] if isinstance(data, list): for item in data[:n]: if not isinstance(item, dict): continue c = new_card( str(item.get("question", "")).strip(), str(item.get("answer", "")).strip(), topic=str(item.get("topic", card["topic"])).strip() or card["topic"], source_chunk=card["source_chunk"], difficulty=max(1, card["difficulty"] - 1), parent_id=card["id"], ) if validate_card(c): out.append(c) return out def add_followups(session: Session, cards: list[Card]) -> Session: """Register generated follow-ups into the deck + queue (near-term).""" for c in cards: session["deck"].append(c) session["states"][c["id"]] = new_card_state(c["id"]) _insert_at(session, c["id"], 1) return session def grade_and_adapt(session: Session, user_answer: str) -> tuple[GradeResult | None, list[Card]]: """One full study step: grade the current card, apply the result, and on a miss generate + enqueue follow-ups. Returns (grade, injected_cards), with grade None only when the queue is empty. This is the canonical study-loop sequence. Both the Gradio app and the JSON server call it instead of re-implementing the next_card → grade → apply → follow-up dance, so the loop can never drift between the two frontends. """ card = next_card(session) if card is None: return None, [] grade = grade_answer(card, user_answer or "") apply_result(session, card, grade, user_answer=user_answer or "") injected: list[Card] = [] if not grade["correct"]: fups = generate_followups(card, grade) if fups: add_followups(session, fups) injected = fups return grade, injected def replace_card(session: Session, old_id: str, new: Card) -> Session: """Swap a card in place (used by the difficulty toggle, NAH-32). Replaces the deck entry, resets its CardState (it's effectively a new question), and rewrites every queue occurrence so the queue's "pop the front" contract still holds. """ session["deck"] = [new if c["id"] == old_id else c for c in session["deck"]] session["states"].pop(old_id, None) session["states"][new["id"]] = new_card_state(new["id"]) session["queue"] = [new["id"] if cid == old_id else cid for cid in session["queue"]] return session # ---- Recap ----------------------------------------------------------------- def recap(session: Session) -> dict: grades_by_topic: dict[str, list[int]] = {} for h in session["history"]: grades_by_topic.setdefault(h["topic"], []).append(h["grade"]) # Same threshold the scheduler uses to decide what to resurface, so a topic # the recap calls "weak" is exactly one next_card brings back sooner. mastered = [t for t, g in grades_by_topic.items() if _avg(g) >= WEAK_TOPIC_THRESHOLD] weak = [t for t, g in grades_by_topic.items() if _avg(g) < WEAK_TOPIC_THRESHOLD] if llm.STUB: reflection = ("Solid start. You're strong on " f"{', '.join(mastered) or 'nothing yet'}; " f"{', '.join(weak) or 'no weak spots'} could use another pass.") else: msg = [ {"role": "system", "content": "Write one encouraging sentence reflecting on a study session."}, {"role": "user", "content": f"Mastered: {mastered}. Weak: {weak}. Streak: {session['streak']}."}, ] reflection = llm.chat(msg, max_tokens=80) return { "mastered": mastered, "weak_topics": weak, "reflection": reflection, "streak": session["streak"], "answered": len(session["history"]), } # ---- helpers --------------------------------------------------------------- def _find(session: Session, card_id: str) -> Card | None: return next((c for c in session["deck"] if c["id"] == card_id), None) def _topic_averages(session: Session) -> dict[str, float]: """Average grade per topic across answered history (empty until first answer).""" grades: dict[str, list[int]] = {} for h in session["history"]: grades.setdefault(h["topic"], []).append(h["grade"]) return {t: _avg(g) for t, g in grades.items()} def _weak_biased_index(session: Session) -> int: """ Index into the queue of the card to serve next. Looks at the next WEAK_LOOKAHEAD cards and picks the one whose topic has the lowest average grade, as long as that topic is actually weak (avg < threshold). Returns 0 (keep normal order) when nothing in reach is weak or there's no history yet. """ queue = session["queue"] averages = _topic_averages(session) if not averages: return 0 best_idx, best_avg = 0, None for i, card_id in enumerate(queue[:WEAK_LOOKAHEAD]): card = _find(session, card_id) if card is None: continue avg = averages.get(card["topic"]) if avg is None or avg >= WEAK_TOPIC_THRESHOLD: continue if best_avg is None or avg < best_avg: best_idx, best_avg = i, avg return best_idx def _insert_at(session: Session, card_id: str, pos: int) -> None: pos = max(0, min(pos, len(session["queue"]))) session["queue"].insert(pos, card_id) def _avg(xs: list[int]) -> float: return sum(xs) / len(xs) if xs else 0.0