update

by Clementio - opened 6 days ago

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+6202

-9100

This PR is in draft mode

Files changed (28) hide show

.gitattributes +36 -1
README.md +3 -52
app.py +394 -483
data/skill_encoder.csv +0 -0
data/skill_encoder_real.csv +20 -0
data/skill_encoder_v2.csv +0 -0
index.html +0 -609
{data/knowledge_maps → knowledge_maps}/cs_dag.json +0 -0
{data/knowledge_maps → knowledge_maps}/math_dag.json +0 -0
plrs/__init__.py +0 -30
plrs/constraints/__init__.py +0 -3
plrs/constraints/dag.py +0 -201
plrs/curriculum/__init__.py +0 -3
plrs/curriculum/loader.py +0 -144
plrs/model/__init__.py +0 -5
plrs/model/evaluator.py +0 -374
plrs/model/model_loader.py +0 -116
plrs/model/sakt.py +0 -219
plrs/model/sakt_decay.py +0 -253
plrs/model/trainer.py +0 -437
plrs/pipeline.py +0 -236
plrs/ranking/__init__.py +0 -3
plrs/ranking/ranker.py +0 -189
requirements.txt +3 -5
sakt_decay_best.pt +3 -0
models/sakt_model.pt → sakt_model.pt +0 -0
sakt_vanilla_best.pt +3 -0
training_curves.png +3 -0

.gitattributes CHANGED Viewed

	@@ -1 +1,36 @@
1	- ~~models/sakt_model~~.pt filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+training_curves.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,52 +1,3 @@
----
-title: PLRS Logic Engine
-emoji: 🧠
-colorFrom: blue
-colorTo: indigo
-sdk: streamlit
-sdk_version: 1.33.0
-app_file: app.py
-pinned: true
-license: mit
-tags:
-  - education
-  - knowledge-tracing
-  - recommendation-system
-  - pytorch
-  - transformers
----
-# PLRS — Personalized Learning Recommendation System
-> Constraint-aware personalized learning recommendations powered by Self-Attentive Knowledge Tracing (SAKT) and DAG prerequisite constraints.
-## What it does
-PLRS combines a SAKT transformer model with a curriculum knowledge graph to generate recommendations that are both **personalized** and **pedagogically sound**. Topics are classified into three tiers:
-- ✅ **Approved** — prerequisites met, ready to learn
-- ⚠️ **Challenging** — prerequisites partially met
-- ❌ **Vetoed** — prerequisites not met, blocked
-## Key results
-| Metric | PLRS | Collaborative Filtering |
-|--------|------|------------------------|
-| Val AUC | **0.7692** | — |
-| Prerequisite Violation Rate | **0.0%** | 81.3% |
-## Bundled curricula
-- **Nigerian Secondary School Mathematics** (38 topics, 45 edges, JSS3–SS2)
-- **CS Fundamentals / Digital Technologies** (31 topics, 39 edges)
-## Architecture
-```
-Student History → SAKT → Mastery Vector → DAG Constraint Layer → Ranker → Recommendations
-```
-## Links
-- 📦 GitHub: [clementina-tom/plrs](https://github.com/clementina-tom/plrs)
-- 📄 Paper/Report: Final Year Project, Computer Science

+---
+license: mit
+---

app.py CHANGED Viewed

@@ -1,492 +1,403 @@
-"""
-PLRS — Logic Engine
-HuggingFace Space entry point.
-Loads SAKT model weights from HF Hub (Clementio/PLRS).
-Bundles the plrs package inline (until PyPI release).
-"""
-import json
-import sys
-from pathlib import Path
-import numpy as np
 import streamlit as st
 import torch
-ROOT = Path(__file__).resolve().parent
-sys.path.insert(0, str(ROOT))
-from plrs.curriculum.loader import load_dag
-from plrs.pipeline import PLRSPipeline
-# ── Page config ───────────────────────────────────────────────────────────────
-st.set_page_config(
-    page_title="PLRS · Logic Engine",
-    page_icon="🧠",
-    layout="wide",
-    initial_sidebar_state="expanded",
-)
-# ── Styling ───────────────────────────────────────────────────────────────────
-st.markdown("""
-<style>
-@import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@300;400;500&family=Syne:wght@400;600;700;800&display=swap');
-html, body, [class*="css"] {
-    font-family: 'Syne', sans-serif;
-    background-color: #0a0e1a;
-    color: #c8d0e0;
-}
-#MainMenu, footer, header { visibility: hidden; }
-.block-container { padding: 1.5rem 2rem 2rem 2rem; max-width: 1400px; }
-[data-testid="stSidebar"] {
-    background: #0d1221;
-    border-right: 1px solid #1e2a40;
-}
-[data-testid="stSidebar"] .stMarkdown p {
-    font-family: 'DM Mono', monospace;
-    font-size: 0.75rem;
-    color: #4a5568;
-    letter-spacing: 0.08em;
-}
-.plrs-header {
-    display: flex; align-items: baseline; gap: 1rem;
-    padding-bottom: 1rem; border-bottom: 1px solid #1e2a40; margin-bottom: 1.5rem;
-}
-.plrs-title { font-size: 1.75rem; font-weight: 800; letter-spacing: -0.02em; color: #e8edf5; }
-.plrs-sub {
-    font-family: 'DM Mono', monospace; font-size: 0.7rem; color: #3d8bcd;
-    letter-spacing: 0.12em; text-transform: uppercase; padding: 2px 8px;
-    border: 1px solid #1e3a5f; border-radius: 2px;
-}
-.stat-row { display: flex; gap: 0.75rem; margin-bottom: 1.5rem; }
-.stat-card {
-    flex: 1; background: #0d1221; border: 1px solid #1e2a40;
-    border-radius: 4px; padding: 0.9rem 1rem; position: relative; overflow: hidden;
-}
-.stat-card::before {
-    content: ''; position: absolute; top: 0; left: 0; right: 0;
-    height: 2px; background: var(--accent, #3d8bcd);
-}
-.stat-card.green::before { --accent: #22c55e; }
-.stat-card.amber::before { --accent: #f59e0b; }
-.stat-card.red::before   { --accent: #ef4444; }
-.stat-card.blue::before  { --accent: #3d8bcd; }
-.stat-label { font-family: 'DM Mono', monospace; font-size: 0.62rem; color: #4a5568; letter-spacing: 0.12em; text-transform: uppercase; margin-bottom: 0.25rem; }
-.stat-value { font-size: 1.6rem; font-weight: 700; color: #e8edf5; line-height: 1; }
-.stat-sub   { font-family: 'DM Mono', monospace; font-size: 0.65rem; color: #4a5568; margin-top: 0.2rem; }
-.rec-card {
-    background: #0d1221; border: 1px solid #1e2a40; border-radius: 4px;
-    padding: 0.9rem 1rem; margin-bottom: 0.5rem;
-}
-.rec-card.approved   { border-left: 3px solid #22c55e; }
-.rec-card.challenging { border-left: 3px solid #f59e0b; }
-.rec-card.vetoed     { border-left: 3px solid #ef4444; opacity: 0.6; }
-.rec-title  { font-size: 0.95rem; font-weight: 700; color: #e8edf5; margin-bottom: 0.15rem; }
-.rec-meta   { font-family: 'DM Mono', monospace; font-size: 0.65rem; color: #4a5568; letter-spacing: 0.06em; }
-.rec-reason { font-size: 0.75rem; color: #8899aa; margin-top: 0.35rem; padding-top: 0.35rem; border-top: 1px solid #1e2a40; }
-.score-bar-wrap { background: #131a2e; border-radius: 2px; height: 3px; margin-top: 0.5rem; overflow: hidden; }
-.score-bar  { height: 100%; border-radius: 2px; background: var(--bar-color, #3d8bcd); }
-.section-label {
-    font-family: 'DM Mono', monospace; font-size: 0.65rem; letter-spacing: 0.14em;
-    text-transform: uppercase; color: #4a5568; border-bottom: 1px solid #1e2a40;
-    padding-bottom: 0.4rem; margin-bottom: 0.75rem; margin-top: 1.25rem;
-}
-.unlock-chip {
-    display: inline-block; font-family: 'DM Mono', monospace; font-size: 0.65rem;
-    background: #131a2e; border: 1px solid #1e3a5f; border-radius: 2px;
-    padding: 2px 7px; margin: 2px 3px 2px 0; color: #3d8bcd;
-}
-.blocked-chip {
-    display: inline-block; font-family: 'DM Mono', monospace; font-size: 0.65rem;
-    background: #1a1010; border: 1px solid #3f1e1e; border-radius: 2px;
-    padding: 2px 7px; margin: 2px 3px 2px 0; color: #ef4444;
-}
-.stTabs [data-baseweb="tab-list"] { gap: 0; border-bottom: 1px solid #1e2a40; background: transparent; }
-.stTabs [data-baseweb="tab"] { font-family: 'DM Mono', monospace; font-size: 0.7rem; letter-spacing: 0.08em; color: #4a5568; padding: 0.5rem 1.25rem; border-bottom: 2px solid transparent; }
-.stTabs [aria-selected="true"] { color: #3d8bcd; border-bottom-color: #3d8bcd; background: transparent; }
-</style>
-""", unsafe_allow_html=True)
-# ── Model + pipeline loading ──────────────────────────────────────────────────
-@st.cache_resource(show_spinner="Loading curriculum & model from HuggingFace...")
-def load_pipelines():
-    from plrs.model.model_loader import load_model_from_hub
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    maps = ROOT / "data" / "knowledge_maps"
-    # Load model (tries decay, vanilla, then base)
-    model, model_type = load_model_from_hub(device=str(device))
-    pipelines = {}
-    for domain, fname in [("math", "math_dag.json"), ("cs", "cs_dag.json")]:
-        path = maps / fname
-        if path.exists():
-            curriculum = load_dag(path)
-            pipeline   = PLRSPipeline(curriculum)
-            if model:
-                pipeline._model = model
-            pipelines[domain] = pipeline
-    return pipelines, model is not None, model_type
 @st.cache_data
 def load_skill_encoder():
-    import pandas as pd
-    path = ROOT / "data" / "skill_encoder_v2.csv"
-    if path.exists():
-        return pd.read_csv(path)
-    return None
-pipelines, has_model, model_type = load_pipelines()
-skill_encoder = load_skill_encoder()
-# ── Sidebar ───────────────────────────────────────────────────────────────────
-with st.sidebar:
-    st.markdown("### 🧠 PLRS")
-    st.markdown('<p style="font-family:\'DM Mono\',monospace;font-size:0.65rem;color:#4a5568;letter-spacing:0.1em;">LOGIC ENGINE v0.2.0</p>', unsafe_allow_html=True)
-    if has_model:
-        st.markdown(f'<p style="color:#22c55e;font-size:0.7rem;font-family:\'DM Mono\',monospace;">● {model_type} LOADED</p>', unsafe_allow_html=True)
-    else:
-        st.markdown('<p style="color:#f59e0b;font-size:0.7rem;font-family:\'DM Mono\',monospace;">● MANUAL MODE</p>', unsafe_allow_html=True)
-    st.markdown("---")
-    domain_label = st.selectbox("Curriculum", ["Nigerian SS Mathematics", "CS Fundamentals"])
-    domain_key   = "math" if "Mathematics" in domain_label else "cs"
-    pipeline     = pipelines[domain_key]
-    curriculum   = pipeline.curriculum
-    st.markdown("---")
-    threshold      = st.slider("Mastery threshold",   0.50, 0.90, 0.70, 0.05)
-    soft_threshold = st.slider("Challenging threshold", 0.20, 0.65, 0.50, 0.05)
-    top_n          = st.slider("Top N recommendations", 3, 10, 5)
-    pipeline.threshold      = threshold
-    pipeline.soft_threshold = soft_threshold
-    pipeline.top_n          = top_n
-    st.markdown("---")
-    st.markdown(f'<p style="font-family:\'DM Mono\',monospace;font-size:0.65rem;color:#4a5568;">NODES: <span style="color:#e8edf5;">{curriculum.num_nodes}</span></p>', unsafe_allow_html=True)
-    st.markdown(f'<p style="font-family:\'DM Mono\',monospace;font-size:0.65rem;color:#4a5568;">EDGES: <span style="color:#e8edf5;">{curriculum.num_edges}</span></p>', unsafe_allow_html=True)
-    st.markdown(f'<p style="font-family:\'DM Mono\',monospace;font-size:0.65rem;color:#4a5568;">MODEL: <span style="color:#e8edf5;">{model_type}</span></p>', unsafe_allow_html=True)
-    st.markdown(f'<p style="font-family:\'DM Mono\',monospace;font-size:0.65rem;color:#4a5568;">VIOLATION RATE: <span style="color:#22c55e;">0.0%</span></p>', unsafe_allow_html=True)
-    st.markdown("---")
-    st.markdown('<p style="font-family:\'DM Mono\',monospace;font-size:0.6rem;color:#2a3a50;">github.com/clementina-tom/plrs</p>', unsafe_allow_html=True)
-# ── Header ────────────────────────────────────────────────────────────────────
-st.markdown("""
-<div class="plrs-header">
-    <span class="plrs-title">Logic Engine</span>
-    <span class="plrs-sub">Personalized Learning · Constraint-Aware · SAKT + DAG</span>
-</div>
-""", unsafe_allow_html=True)
-# ── Tabs ──────────────────────────────────────────────────────────────────────
-tab1, tab2, tab3 = st.tabs(["RECOMMENDATIONS", "WHAT-IF SIMULATOR", "CURRICULUM MAP"])
-ACTIVITY_TO_DOMAIN = {
-    "math": {
-        "oucontent": "algebraic_expressions", "forumng": "statistics_basic",
-        "homepage": "whole_numbers", "subpage": "plane_shapes",
-        "resource": "indices", "url": "number_bases",
-        "ouwiki": "proportion_variation", "glossary": "algebraic_factorization",
-        "quiz": "quadratic_equations",
-    },
-    "cs": {
-        "oucontent": "programming_concepts", "forumng": "ethics_technology",
-        "homepage": "computer_basics", "subpage": "html_basics",
-        "resource": "networking_fundamentals", "url": "internet_basics",
-        "ouwiki": "cloud_basics", "glossary": "intro_databases",
-        "quiz": "python_basics",
-    },
-}
-# ══════════════════════════════════════════════════════════════════════════════
-# TAB 1 — RECOMMENDATIONS
-# ══════════════════════════════════════════════════════════════════════════════
-with tab1:
-    col_left, col_right = st.columns([1, 1.4], gap="large")
-    with col_left:
-        st.markdown('<div class="section-label">Learner Profile</div>', unsafe_allow_html=True)
-        mode = st.radio("Input mode", ["Manual sliders", "Simulate student"], horizontal=True, label_visibility="collapsed")
-        mastery_scores = {}
-        if mode == "Manual sliders":
-            for node in curriculum.nodes:
-                label = curriculum.label(node)
-                level = curriculum.level(node)
-                val = st.slider(
-                    f"{label}",
-                    0.0, 1.0, 0.0, 0.05,
-                    key=f"mastery_{node}",
-                    help=f"Level: {level}"
-                )
-                mastery_scores[node] = val
         else:
-            seq_len = st.slider("Sequence length", 10, 200, 50)
-            seed    = st.number_input("Student seed", 1, 9999, 42)
             np.random.seed(int(seed))
-            activity_types = list(ACTIVITY_TO_DOMAIN[domain_key].keys())
-            activity_probs = [0.38, 0.20, 0.15, 0.10, 0.06, 0.04, 0.03, 0.02, 0.02]
-            mapping = ACTIVITY_TO_DOMAIN[domain_key]
-            # Use skill_encoder to simulate skills that actually exist in the mapping
-            if skill_encoder is not None:
-                available_skills = skill_encoder["skill_id"].tolist()
-                sim_skills   = np.random.choice(available_skills, seq_len).tolist()
             else:
-                n_skills = 5736
-                sim_skills   = np.random.randint(0, n_skills, seq_len).tolist()
-            sim_corrects = np.random.randint(0, 2, seq_len).tolist()
-            topic_scores: dict = {}
-            # Map simulated skills back to topics using the CSV work you did
-            for skill_id in sim_skills:
-                # If we have the encoder, find the activity type
-                if skill_encoder is not None:
-                    row = skill_encoder[skill_encoder["skill_id"] == skill_id]
-                    if not row.empty:
-                        act = row["activity_type"].values[0]
-                        topic_id = mapping.get(act)
-                        if topic_id and topic_id in curriculum.nodes:
-                            # Generate a mastery signal based on frequency/success
-                            score = topic_scores.get(topic_id, 0.1)
-                            # Every time they see this topic, increase mastery slightly
-                            topic_scores[topic_id] = min(1.0, score + np.random.random() * 0.2)
                 else:
-                    # Fallback to the old simple mapping if CSV is missing
-                    act_idx = skill_id % 100
-                    cumulative = 0
-                    thresholds = [int(p * 100) for p in activity_probs]
-                    thresholds[-1] += 100 - sum(thresholds)
-                    act = activity_types[-1]
-                    for a, thresh in zip(activity_types, thresholds):
-                        cumulative += thresh
-                        if act_idx < cumulative:
-                            act = a
-                            break
-                    topic_id = mapping.get(act)
-                    if topic_id and topic_id in curriculum.nodes:
-                        topic_scores[topic_id] = 0.5 + np.random.random() * 0.4
-            mastery_scores = {n: 0.0 for n in curriculum.nodes}
-            mastery_scores.update(topic_scores)
-            st.success(f"Simulated {seq_len} interactions → {len(topic_scores)} topics mapped")
-            if topic_scores:
-                st.markdown('<div class="section-label">Mapped Mastery Signal</div>', unsafe_allow_html=True)
-                for tid, score in sorted(topic_scores.items(), key=lambda x: -x[1]):
-                    pct   = int(score * 100)
-                    color = "#22c55e" if score >= threshold else "#f59e0b" if score >= soft_threshold else "#ef4444"
-                    st.markdown(f"""
-                    <div style="margin-bottom:6px;">
-                        <div style="display:flex;justify-content:space-between;font-size:0.72rem;color:#8899aa;margin-bottom:2px;">
-                            <span>{curriculum.label(tid)}</span>
-                            <span style="font-family:'DM Mono',monospace;">{pct}%</span>
-                        </div>
-                        <div class="score-bar-wrap">
-                            <div class="score-bar" style="width:{pct}%;--bar-color:{color};"></div>
-                        </div>
-                    </div>
-                    """, unsafe_allow_html=True)
-        run = st.button("⚡ Generate Recommendations", type="primary", use_container_width=True)
-    with col_right:
-        if run or mode == "Simulate student":
-            # Enable cascading for simulation to ensure prerequisites are also "mastered"
-            is_sim = (mode == "Simulate student")
-            results = pipeline.recommend_from_mastery(mastery_scores, cascade=is_sim)
-            summary = results["mastery_summary"]
-            stats   = results["stats"]
-            mastery_pct = int(summary["mastery_rate"] * 100)
-            vrate_pct   = int(stats["prerequisite_violation_rate"] * 100)
-            st.markdown(f"""
-            <div class="stat-row">
-                <div class="stat-card blue">
-                    <div class="stat-label">Mastered</div>
-                    <div class="stat-value">{summary['mastered']}<span style="font-size:0.9rem;color:#4a5568;">/{summary['total_topics']}</span></div>
-                    <div class="stat-sub">{mastery_pct}% rate</div>
-                </div>
-                <div class="stat-card green">
-                    <div class="stat-label">Approved</div>
-                    <div class="stat-value">{stats['approved_count']}</div>
-                    <div class="stat-sub">ready to learn</div>
-                </div>
-                <div class="stat-card amber">
-                    <div class="stat-label">Challenging</div>
-                    <div class="stat-value">{stats['challenging_count']}</div>
-                    <div class="stat-sub">partial prereqs</div>
-                </div>
-                <div class="stat-card red">
-                    <div class="stat-label">Violation rate</div>
-                    <div class="stat-value">{vrate_pct}<span style="font-size:0.9rem;color:#4a5568;">%</span></div>
-                    <div class="stat-sub">blocked topics</div>
-                </div>
-            </div>
-            """, unsafe_allow_html=True)
-            if results["approved"]:
-                st.markdown('<div class="section-label">✅ Approved Recommendations</div>', unsafe_allow_html=True)
-                for i, rec in enumerate(results["approved"]):
-                    score_pct = int(rec["score"] * 100)
-                    st.markdown(f"""
-                    <div class="rec-card approved">
-                        <div class="rec-title">{i+1}. {rec['topic_label']}</div>
-                        <div class="rec-meta">score: {rec['score']:.3f} &nbsp;·&nbsp; mastery: {int(rec['mastery']*100)}% &nbsp;·&nbsp; unlocks: {rec['downstream_count']}</div>
-                        <div class="rec-reason">{rec['reasoning']}</div>
-                        <div class="score-bar-wrap"><div class="score-bar" style="width:{score_pct}%;--bar-color:#22c55e;"></div></div>
-                    </div>
-                    """, unsafe_allow_html=True)
-            else:
-                st.info("No approved topics — lower the mastery threshold or set some mastery levels.")
-            if results["challenging"]:
-                st.markdown('<div class="section-label">⚠️ Challenging</div>', unsafe_allow_html=True)
-                for rec in results["challenging"]:
-                    score_pct = int(rec["score"] * 100)
-                    unmet = ", ".join(rec["unmet_prerequisites"]) or "—"
-                    st.markdown(f"""
-                    <div class="rec-card challenging">
-                        <div class="rec-title">{rec['topic_label']}</div>
-                        <div class="rec-meta">score: {rec['score']:.3f} &nbsp;·&nbsp; strengthen: {unmet}</div>
-                        <div class="rec-reason">{rec['reasoning']}</div>
-                        <div class="score-bar-wrap"><div class="score-bar" style="width:{score_pct}%;--bar-color:#f59e0b;"></div></div>
-                    </div>
-                    """, unsafe_allow_html=True)
-            if results["vetoed"]:
-                with st.expander(f"❌ Vetoed topics ({stats['vetoed_count']} total — prerequisite check failed)"):
-                    for rec in results["vetoed"]:
-                        unmet = ", ".join(rec["unmet_prerequisites"]) or "—"
-                        st.markdown(f"""
-                        <div class="rec-card vetoed">
-                            <div class="rec-title">{rec['topic_label']}</div>
-                            <div class="rec-meta">blocked by: {unmet}</div>
-                        </div>
-                        """, unsafe_allow_html=True)
-        else:
-            st.markdown("""
-            <div style="height:280px;display:flex;align-items:center;justify-content:center;
-                        border:1px dashed #1e2a40;border-radius:4px;color:#2a3a50;">
-                <div style="text-align:center;">
-                    <div style="font-size:2rem;margin-bottom:0.5rem;">⚡</div>
-                    <div style="font-family:'DM Mono',monospace;font-size:0.7rem;letter-spacing:0.1em;">
-                        SET MASTERY LEVELS · THEN GENERATE
-                    </div>
-                </div>
-            </div>
-            """, unsafe_allow_html=True)
-# ══════════════════════════════════════════════════════════════════════════════
-# TAB 2 — WHAT-IF SIMULATOR
-# ══════════════════════════════════════════════════════════════════════════════
-with tab2:
-    st.markdown('<div class="section-label">Prerequisite Impact Simulator</div>', unsafe_allow_html=True)
-    st.markdown('<p style="font-size:0.8rem;color:#8899aa;">Select any topic to see what it unlocks and what currently blocks it.</p>', unsafe_allow_html=True)
-    node_options   = {curriculum.label(n): n for n in curriculum.nodes}
-    selected_label = st.selectbox("Select topic", list(node_options.keys()))
-    selected_id    = node_options[selected_label]
-    wi             = pipeline.what_if(selected_id)
-    col_a, col_b = st.columns(2, gap="large")
-    with col_a:
-        st.markdown('<div class="section-label">🔓 What This Unlocks</div>', unsafe_allow_html=True)
-        if wi["direct_unlocks"]:
-            st.markdown("**Directly unlocks:**")
-            st.markdown("".join(f'<span class="unlock-chip">{u["label"]}</span>' for u in wi["direct_unlocks"]), unsafe_allow_html=True)
-        else:
-            st.markdown('<span style="color:#4a5568;font-size:0.8rem;">Leaf node — no further topics.</span>', unsafe_allow_html=True)
-        if wi["all_unlocks"]:
-            st.markdown(f"**All downstream ({wi['total_unlocked']}):**")
-            st.markdown("".join(f'<span class="unlock-chip">{u["label"]}</span>' for u in wi["all_unlocks"]), unsafe_allow_html=True)
-        st.markdown(f"""
-        <div class="stat-card blue" style="margin-top:1rem;max-width:180px;">
-            <div class="stat-label">Total Unlocked</div>
-            <div class="stat-value">{wi['total_unlocked']}</div>
-        </div>
-        """, unsafe_allow_html=True)
-    with col_b:
-        st.markdown('<div class="section-label">🔒 What Blocks This</div>', unsafe_allow_html=True)
-        if wi["blocked_by"]:
-            st.markdown("**Prerequisites:**")
-            st.markdown("".join(f'<span class="blocked-chip">{b["label"]}</span>' for b in wi["blocked_by"]), unsafe_allow_html=True)
-        else:
-            st.markdown('<span style="color:#22c55e;font-size:0.8rem;font-family:\'DM Mono\',monospace;">Root topic — no prerequisites.</span>', unsafe_allow_html=True)
-# ══════════════════════════════════════════════════════════════════════════════
-# TAB 3 — CURRICULUM MAP
-# ══════════════════════════════════════════════════════════════════════════════
-with tab3:
-    st.markdown('<div class="section-label">Curriculum Knowledge Graph</div>', unsafe_allow_html=True)
-    col_info, col_table = st.columns([1, 2], gap="large")
-    with col_info:
-        roots  = [n for n in curriculum.nodes if not curriculum.prerequisites(n)]
-        leaves = [n for n in curriculum.nodes if not curriculum.successors(n)]
-        st.markdown(f"""
-        <div class="stat-card blue" style="margin-bottom:0.75rem;">
-            <div class="stat-label">Domain</div>
-            <div style="font-size:0.85rem;font-weight:700;color:#e8edf5;">{curriculum.domain}</div>
-        </div>
-        <div class="stat-card green" style="margin-bottom:0.75rem;">
-            <div class="stat-label">Topics</div><div class="stat-value">{curriculum.num_nodes}</div>
-        </div>
-        <div class="stat-card amber">
-            <div class="stat-label">Prerequisite Edges</div><div class="stat-value">{curriculum.num_edges}</div>
-        </div>
-        """, unsafe_allow_html=True)
-        st.markdown('<div class="section-label">Root Topics</div>', unsafe_allow_html=True)
-        st.markdown("".join(f'<span class="unlock-chip">{curriculum.label(r)}</span>' for r in roots), unsafe_allow_html=True)
-        st.markdown('<div class="section-label">Leaf Topics</div>', unsafe_allow_html=True)
-        st.markdown("".join(f'<span class="blocked-chip">{curriculum.label(l)}</span>' for l in leaves), unsafe_allow_html=True)
-    with col_table:
-        import pandas as pd
-        st.markdown('<div class="section-label">All Topics</div>', unsafe_allow_html=True)
-        rows = []
-        for node in curriculum.nodes:
-            rows.append({
-                "Topic":            curriculum.label(node),
-                "Level":            curriculum.level(node),
-                "Prerequisites":    len(curriculum.prerequisites(node)),
-                "Unlocks (direct)": len(curriculum.successors(node)),
-                "Total Downstream": len(curriculum.descendants(node)),
-            })
-        df = pd.DataFrame(rows).sort_values("Total Downstream", ascending=False)
-        st.dataframe(df, use_container_width=True, height=480, hide_index=True)

 import streamlit as st
 import torch
+import torch.nn as nn
+import json
+import pandas as pd
+import networkx as nx
+import numpy as np
+from huggingface_hub import hf_hub_download
+from typing import Dict, List, Optional, Tuple
+st.set_page_config(page_title='Logic Engine', page_icon='🧠', layout='wide')
+HF_REPO = 'Clementio/PLRS'
+@st.cache_resource
+def load_model():
+    config_path = hf_hub_download(repo_id=HF_REPO, filename='config.json')
+    with open(config_path) as f:
+        config = json.load(f)
+    model_path = hf_hub_download(repo_id=HF_REPO, filename='sakt_model.pt')
+    class SAKT(nn.Module):
+        def __init__(self, num_skills, embed_dim, num_heads, num_layers, max_seq_len, dropout):
+            super(SAKT, self).__init__()
+            self.num_skills = num_skills
+            self.interaction_embed = nn.Embedding(num_skills * 2 + 1, embed_dim, padding_idx=0)
+            self.skill_embed = nn.Embedding(num_skills + 1, embed_dim, padding_idx=0)
+            self.pos_embed = nn.Embedding(max_seq_len + 1, embed_dim)
+            encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dropout=dropout, batch_first=True, dim_feedforward=embed_dim * 4, norm_first=True)
+            self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers, enable_nested_tensor=False)
+            self.dropout = nn.Dropout(dropout)
+            self.output = nn.Linear(embed_dim, 1)
+        def forward(self, interactions, target_skills, mask, return_attention=False):
+            batch_size, seq_len = interactions.shape
+            positions = torch.arange(seq_len, device=interactions.device).unsqueeze(0).expand(batch_size, -1)
+            x = self.interaction_embed(interactions)
+            x = x + self.pos_embed(positions)
+            x = x * mask.unsqueeze(-1).float()
+            x = self.dropout(x)
+            causal_mask = torch.triu(torch.full((seq_len, seq_len), float('-inf')), diagonal=1)
+            x = self.transformer(x, mask=causal_mask, is_causal=False)
+            x = x * mask.unsqueeze(-1).float()
+            x = x + self.skill_embed(target_skills)
+            return self.output(x).squeeze(-1)
+    device = torch.device('cpu')
+    model = SAKT(num_skills=config['num_skills'], embed_dim=config['embed_dim'], num_heads=config['num_heads'], num_layers=config['num_layers'], max_seq_len=config['max_seq_len'], dropout=config['dropout'])
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.eval()
+    return model, config, device
+@st.cache_resource
+def load_knowledge_maps():
+    def load_dag(path):
+        with open(path) as f:
+            data = json.load(f)
+        G = nx.DiGraph()
+        for node in data['nodes']:
+            G.add_node(node['id'], label=node['label'], level=node['level'], term=node['term'])
+        for edge in data['edges']:
+            G.add_edge(edge['from'], edge['to'])
+        return G
+    return load_dag('knowledge_maps/math_dag.json'), load_dag('knowledge_maps/cs_dag.json')
 @st.cache_data
 def load_skill_encoder():
+    return pd.read_csv('data/skill_encoder.csv')
+class MasteryVector:
+    def __init__(self, graph, threshold=0.70):
+        self.graph = graph
+        self.threshold = threshold
+        self.mastery = {node: 0.0 for node in graph.nodes}
+    def update(self, topic_id, probability):
+        if topic_id in self.mastery: self.mastery[topic_id] = probability
+    def is_mastered(self, topic_id):
+        return self.mastery.get(topic_id, 0.0) >= self.threshold
+    def get_mastery(self, topic_id):
+        return self.mastery.get(topic_id, 0.0)
+    def get_mastery_summary(self):
+        mastered = [t for t in self.mastery if self.is_mastered(t)]
+        return {'total_topics': len(self.mastery), 'mastered': len(mastered), 'mastery_rate': round(len(mastered)/len(self.mastery), 3), 'mastered_topics': mastered}
+class DAGConstraintLayer:
+    def __init__(self, graph, threshold=0.70, soft_threshold=0.50):
+        self.graph = graph
+        self.threshold = threshold
+        self.soft_threshold = soft_threshold  # below full threshold but above this = challenging
+    def validate(self, topic_id, mastery_vector):
+        if topic_id not in self.graph.nodes: return 'vetoed', 'Topic not found.'
+        prerequisites = list(self.graph.predecessors(topic_id))
+        label = self.graph.nodes[topic_id].get('label', topic_id)
+        if not prerequisites: return 'approved', f'✅ Foundational topic — no prerequisites.'
+        hard_fails = []
+        soft_fails = []
+        for p in prerequisites:
+            m = mastery_vector.get_mastery(p)
+            plabel = self.graph.nodes[p].get('label', p)
+            if m < self.soft_threshold:
+                hard_fails.append((plabel, m))
+            elif m < self.threshold:
+                soft_fails.append((plabel, m))
+        if hard_fails:
+            gaps = ', '.join([f"{l} ({m:.0%} mastered, need {self.threshold:.0%})" for l,m in hard_fails])
+            return 'vetoed', f'❌ Prerequisites not met: {gaps}'
+        elif soft_fails:
+            gaps = ', '.join([f"{l} ({m:.0%} mastered, need {self.threshold:.0%})" for l,m in soft_fails])
+            return 'challenging', f'⚠️ Challenging — prerequisites nearly met: {gaps}. Proceed with caution.'
+        else:
+            prereq_labels = [self.graph.nodes[p].get('label',p) for p in prerequisites]
+            return 'approved', f'✅ Prerequisites mastered: {", ".join(prereq_labels)}'
+class RankingFunction:
+    def __init__(self, graph, threshold=0.70, w_gap=0.40, w_ready=0.35, w_downstream=0.25):
+        self.graph=graph; self.threshold=threshold; self.w_gap=w_gap; self.w_ready=w_ready; self.w_downstream=w_downstream
+        scores = {n: len(nx.descendants(graph, n)) for n in graph.nodes}
+        mx = max(scores.values()) if scores else 1
+        self._downstream = {n: s/mx for n,s in scores.items()}
+    def score(self, topic_id, mastery_vector):
+        current = mastery_vector.get_mastery(topic_id)
+        gap = min(max(0.0, self.threshold-current)/self.threshold, 1.0)
+        prereqs = list(self.graph.predecessors(topic_id))
+        readiness = 1.0 if not prereqs else sum(1 for p in prereqs if mastery_vector.is_mastered(p))/len(prereqs)
+        downstream = self._downstream.get(topic_id, 0.0)
+        # Near-mastery boost: topics the student has already started
+        # rank higher than untouched topics with the same gap score
+        near_mastery_boost = 0.0
+        if 0.10 <= current < self.threshold:
+            near_mastery_boost = 0.15 * (current / self.threshold)
+        return round(self.w_gap*gap + self.w_ready*readiness + self.w_downstream*downstream + near_mastery_boost, 3)
+class LearningRecommendationPipeline:
+    def __init__(self, graph, threshold=0.70, soft_threshold=0.50, top_n=5):
+        self.graph=graph
+        self.constraint=DAGConstraintLayer(graph, threshold, soft_threshold)
+        self.ranker=RankingFunction(graph, threshold)
+        self.top_n=top_n
+    def run(self, mastery_vector):
+        approved, challenging, vetoed = [], [], []
+        for topic_id in self.graph.nodes:
+            status, reasoning = self.constraint.validate(topic_id, mastery_vector)
+            entry = {'topic_id': topic_id, 'topic_label': self.graph.nodes[topic_id].get('label', topic_id), 'mastery': round(mastery_vector.get_mastery(topic_id),3), 'reasoning': reasoning, 'status': status}
+            if status == 'approved' and not mastery_vector.is_mastered(topic_id):
+                entry['score'] = self.ranker.score(topic_id, mastery_vector)
+                approved.append(entry)
+            elif status == 'challenging' and not mastery_vector.is_mastered(topic_id):
+                entry['score'] = self.ranker.score(topic_id, mastery_vector) * 0.8  # slight penalty
+                challenging.append(entry)
+            elif status == 'vetoed':
+                vetoed.append(entry)
+        approved.sort(key=lambda x: x['score'], reverse=True)
+        challenging.sort(key=lambda x: x['score'], reverse=True)
+        return {'top_recommendations': approved[:self.top_n], 'challenging': challenging[:3], 'total_approved': len(approved), 'total_challenging': len(challenging), 'total_vetoed': len(vetoed), 'vetoed_sample': vetoed[:5], 'prerequisite_violation_rate': round(len(vetoed)/max(len(list(self.graph.nodes)),1),3)}
+ACTIVITY_TO_MATH = {'oucontent':'algebraic_expressions','forumng':'statistics_basic','homepage':'whole_numbers','subpage':'plane_shapes','resource':'indices','url':'number_bases','ouwiki':'proportion_variation','glossary':'algebraic_factorization','quiz':'quadratic_equations'}
+ACTIVITY_TO_CS   = {'oucontent':'programming_concepts','forumng':'ethics_technology','homepage':'computer_basics','subpage':'html_basics','resource':'networking_fundamentals','url':'internet_basics','ouwiki':'cloud_basics','glossary':'intro_databases','quiz':'python_basics'}
+def run_sakt_inference(model, config, skill_seq, correct_seq, device):
+    max_len=config['max_seq_len']; n_skills=config['num_skills']
+    if len(skill_seq)>max_len: skill_seq=skill_seq[-max_len:]; correct_seq=correct_seq[-max_len:]
+    interactions=[s+c*n_skills for s,c in zip(skill_seq[:-1],correct_seq[:-1])]
+    target_skills=skill_seq[1:]
+    seq_len=len(interactions); pad_len=max_len-seq_len
+    interactions=[0]*pad_len+interactions; target_skills=[0]*pad_len+target_skills; mask=[False]*pad_len+[True]*seq_len
+    with torch.no_grad():
+        logits=model(torch.LongTensor([interactions]).to(device),torch.LongTensor([target_skills]).to(device),torch.BoolTensor([mask]).to(device))
+        probs=torch.sigmoid(logits).squeeze(0)
+    mastery={}; real_probs=probs[torch.BoolTensor(mask)].cpu().numpy(); real_skills=target_skills[pad_len:]
+    for skill_id,prob in zip(real_skills,real_probs): mastery[int(skill_id)]=float(prob)
+    return mastery
+def build_mastery_vector(skill_probs, graph, skill_encoder_df, domain, threshold, soft_threshold):
+    mv=MasteryVector(graph, threshold); mapping=ACTIVITY_TO_MATH if domain=='math' else ACTIVITY_TO_CS
+    topic_scores={}
+    for skill_id,prob in skill_probs.items():
+        row=skill_encoder_df[skill_encoder_df['skill_id']==skill_id]
+        if row.empty: continue
+        act=row['activity_type'].values[0] if 'activity_type' in row.columns else None
+        topic_id=mapping.get(act) if act else None
+        if topic_id: topic_scores[topic_id]=max(topic_scores.get(topic_id,0.0),prob)
+    for topic_id,score in topic_scores.items(): mv.update(topic_id,score)
+    return mv
+def what_if_analysis(topic_id, graph):
+    unlocks = list(nx.descendants(graph, topic_id))
+    direct_unlocks = list(graph.successors(topic_id))
+    blocked_by = list(graph.predecessors(topic_id))
+    unlock_labels = [graph.nodes[n].get('label',n) for n in direct_unlocks]
+    all_unlock_labels = [graph.nodes[n].get('label',n) for n in unlocks]
+    blocked_labels = [graph.nodes[n].get('label',n) for n in blocked_by]
+    return {'direct_unlocks': unlock_labels, 'all_unlocks': all_unlock_labels, 'blocked_by': blocked_labels, 'total_unlocked': len(unlocks)}
+def cascade_mastery(mastery_vector, graph):
+    """
+    If a student has high mastery on a topic, infer that their
+    prerequisites are also likely mastered (propagate upward).
+    A student who scores 80% on Modular Arithmetic almost certainly
+    knows Whole Numbers — cascade fills these realistic gaps.
+    """
+    changed = True
+    while changed:
+        changed = False
+        for node in graph.nodes:
+            node_mastery = mastery_vector.get_mastery(node)
+            if node_mastery < 0.40:
+                continue
+            # For each prerequisite of this node
+            for prereq in graph.predecessors(node):
+                prereq_mastery = mastery_vector.get_mastery(prereq)
+                # Infer prerequisite mastery as at least 85% of descendant mastery
+                inferred = min(node_mastery * 0.85, 0.95)
+                if inferred > prereq_mastery:
+                    mastery_vector.update(prereq, inferred)
+                    changed = True
+    return mastery_vector
+def cascade_mastery(mastery_vector, graph):
+    """
+    If a student has high mastery on a topic, infer that their
+    prerequisites are also likely mastered (propagate upward).
+    A student who scores 80% on Modular Arithmetic almost certainly
+    knows Whole Numbers — cascade fills these realistic gaps.
+    """
+    changed = True
+    while changed:
+        changed = False
+        for node in graph.nodes:
+            node_mastery = mastery_vector.get_mastery(node)
+            if node_mastery < 0.40:
+                continue
+            # For each prerequisite of this node
+            for prereq in graph.predecessors(node):
+                prereq_mastery = mastery_vector.get_mastery(prereq)
+                # Infer prerequisite mastery as at least 85% of descendant mastery
+                inferred = min(node_mastery * 0.85, 0.95)
+                if inferred > prereq_mastery:
+                    mastery_vector.update(prereq, inferred)
+                    changed = True
+    return mastery_vector
+def get_attention_weights(model, config, skill_seq, correct_seq, device):
+    max_len=config['max_seq_len']; n_skills=config['num_skills']
+    if len(skill_seq)>max_len: skill_seq=skill_seq[-max_len:]; correct_seq=correct_seq[-max_len:]
+    interactions=[s+c*n_skills for s,c in zip(skill_seq[:-1],correct_seq[:-1])]
+    target_skills=skill_seq[1:]
+    seq_len=len(interactions); pad_len=max_len-seq_len
+    interactions=[0]*pad_len+interactions; target_skills=[0]*pad_len+target_skills; mask_list=[False]*pad_len+[True]*seq_len
+    interactions_t=torch.LongTensor([interactions]); target_t=torch.LongTensor([target_skills]); mask_t=torch.BoolTensor([mask_list])
+    attention_weights = []
+    def hook_fn(module, input, output):
+        if hasattr(module, 'self_attn'):
+            pass
+    with torch.no_grad():
+        positions=torch.arange(max_len).unsqueeze(0)
+        x=model.interaction_embed(interactions_t)+model.pos_embed(positions)
+        x=x*mask_t.unsqueeze(-1).float()
+        real_mask=mask_t.squeeze(0)
+        real_skills=target_skills[pad_len:]
+        real_probs=torch.sigmoid(model(interactions_t,target_t,mask_t)).squeeze(0)[real_mask].numpy()
+    return real_skills[-10:], real_probs[-10:], seq_len
+def main():
+    model, config, device = load_model()
+    math_graph, cs_graph  = load_knowledge_maps()
+    skill_encoder         = load_skill_encoder()
+    st.title('🧠 Logic Engine')
+    st.subheader('Domain-Agnostic Constraint-Aware Learning Recommender')
+    st.markdown('---')
+    st.sidebar.title('⚙️ Configuration')
+    domain    = st.sidebar.selectbox('Select Domain', ['Mathematics', 'CS Fundamentals'])
+    threshold = st.sidebar.slider('Mastery Threshold', 0.50, 0.90, 0.70, 0.05, help='Minimum mastery to consider a topic fully mastered')
+    soft_threshold = st.sidebar.slider('Challenging Threshold', 0.30, 0.70, 0.50, 0.05, help='Topics above this but below mastery threshold are marked Challenging')
+    top_n     = st.sidebar.slider('Top N Recommendations', 3, 10, 5)
+    graph      = math_graph if domain=='Mathematics' else cs_graph
+    domain_key = 'math'     if domain=='Mathematics' else 'cs'
+    pipeline   = LearningRecommendationPipeline(graph, threshold, soft_threshold, top_n)
+    st.sidebar.markdown('---')
+    st.sidebar.markdown('**About**')
+    st.sidebar.markdown('SAKT-based knowledge tracing with DAG prerequisite constraints. Three-tier recommendations: ✅ Approved, ⚠️ Challenging, ❌ Vetoed.')
+    tab1, tab2, tab3, tab4 = st.tabs(['🎯 Recommendations','🔍 What-If Simulator','🗺️ Knowledge Map','📊 Diagnostics'])
+    with tab1:
+        st.header('Learner Profile')
+        mode = st.radio('Input Mode', ['Manual Mastery Input','Simulate Student Sequence'], horizontal=True)
+        mastery_vector = MasteryVector(graph, threshold)
+        if mode=='Manual Mastery Input':
+            st.markdown('Set your current mastery level for each topic:')
+            cols=st.columns(2); nodes=list(graph.nodes)
+            for i,node in enumerate(nodes):
+                label=graph.nodes[node].get('label',node); level=graph.nodes[node].get('level','')
+                val=cols[i%2].slider(f'{label} ({level})',0.0,1.0,0.0,0.05,key=f'mastery_{node}')
+                mastery_vector.update(node,val)
         else:
+            seq_length=st.slider('Sequence Length',10,200,50)
+            seed=st.number_input('Student Seed',1,1000,42,1)
             np.random.seed(int(seed))
+            topic_nodes = list(graph.nodes)
+            n_topics    = len(topic_nodes)
+            raw_scores  = np.random.beta(1.5, 3.0, size=n_topics)
+            scale       = min(seq_length / 200.0 * 1.4, 1.0)
+            scores      = np.clip(raw_scores * scale, 0.0, 1.0)
+            for topic_id, score in zip(topic_nodes, scores):
+                mastery_vector.update(topic_id, float(score))
+            mastery_df = pd.DataFrame({
+                'Topic': [graph.nodes[t].get('label', t)[:25] for t in topic_nodes],
+                'Mastery': [round(float(s), 3) for s in scores]
+            }).sort_values('Mastery', ascending=False).head(10)
+            st.markdown('**📈 Simulated Learner Mastery Signal (top 10 topics):**')
+            st.bar_chart(mastery_df.set_index('Topic'))
+            # Cascade mastery upward through DAG
+            mastery_vector = cascade_mastery(mastery_vector, graph)
+            n_mastered = sum(1 for t in topic_nodes if mastery_vector.is_mastered(t))
+            st.success(f'Learner simulation complete — {n_mastered}/{n_topics} topics above mastery threshold')
+        if st.button('🚀 Generate Recommendations', type='primary'):
+            output=pipeline.run(mastery_vector)
+            summary=mastery_vector.get_mastery_summary()
+            col1,col2,col3,col4,col5=st.columns(5)
+            col1.metric('Topics Mastered',f"{summary['mastered']} / {summary['total_topics']}")
+            col2.metric('Mastery Rate',f"{summary['mastery_rate']:.1%}")
+            col3.metric('✅ Approved',output['total_approved'])
+            col4.metric('⚠️ Challenging',output['total_challenging'])
+            col5.metric('Violation Rate',f"{output['prerequisite_violation_rate']:.1%}")
+            st.markdown('---')
+            st.subheader(f'✅ Top {top_n} Approved Recommendations')
+            if not output['top_recommendations']: st.warning('No approved recommendations — adjust mastery or lower threshold.')
             else:
+                for i,rec in enumerate(output['top_recommendations'],1):
+                    with st.expander(f"{i}. {rec['topic_label']} — Score: {rec['score']} | Mastery: {rec['mastery']:.1%}", expanded=(i<=3)):
+                        st.markdown(f"**Reasoning:** {rec['reasoning']}")
+                        st.progress(rec['mastery'])
+            if output['challenging']:
+                st.markdown('---')
+                st.subheader('⚠️ Challenging Topics (proceed with caution)')
+                for rec in output['challenging']:
+                    with st.expander(f"{rec['topic_label']} | Mastery: {rec['mastery']:.1%}"):
+                        st.markdown(f"**Reasoning:** {rec['reasoning']}")
+                        st.progress(rec['mastery'])
+            if output['vetoed_sample']:
+                st.markdown('---'); st.subheader('❌ Sample Vetoed Topics')
+                for rec in output['vetoed_sample']:
+                    with st.expander(f"✗ {rec['topic_label']}"):
+                        st.markdown(f"**Reason:** {rec['reasoning']}")
+    with tab2:
+        st.header('🔍 What-If Prerequisite Simulator')
+        st.markdown('Explore how mastering a topic unlocks future learning paths — or what is blocking you from starting it.')
+        nodes_list = list(graph.nodes)
+        labels_list = [graph.nodes[n].get('label',n) for n in nodes_list]
+        selected_label = st.selectbox('Select a topic to analyse:', labels_list)
+        selected_node = nodes_list[labels_list.index(selected_label)]
+        if st.button('🔍 Analyse Topic', type='primary'):
+            result = what_if_analysis(selected_node, graph)
+            col1, col2 = st.columns(2)
+            with col1:
+                st.subheader('🔓 If you master this topic...')
+                if result['direct_unlocks']:
+                    st.markdown(f"**Directly unlocks {len(result['direct_unlocks'])} topic(s):**")
+                    for t in result['direct_unlocks']: st.markdown(f'  → {t}')
                 else:
+                    st.info('This is a terminal topic — it does not unlock further topics in this map.')
+                if result['all_unlocks']:
+                    st.markdown(f"**Total topics eventually unlocked: {result['total_unlocked']}**")
+            with col2:
+                st.subheader('🔒 To start this topic you need...')
+                if result['blocked_by']:
+                    st.markdown('**Prerequisites required:**')
+                    for t in result['blocked_by']: st.markdown(f'  ✓ {t}')
+                else:
+                    st.success('This is a foundational topic — no prerequisites needed. You can start it now!')
+            if result['all_unlocks']:
+                st.markdown('---')
+                st.markdown('**Full learning path unlocked:**')
+                st.markdown(' → '.join([selected_label] + result['all_unlocks'][:8]) + ('...' if len(result['all_unlocks'])>8 else ''))
+    with tab3:
+        st.header(f'{domain} Knowledge Map')
+        st.markdown(f"**{graph.number_of_nodes()} topics** | **{graph.number_of_edges()} prerequisite relationships**")
+        rows=[]
+        for node in graph.nodes:
+            label=graph.nodes[node].get('label',node); level=graph.nodes[node].get('level',''); term=graph.nodes[node].get('term','')
+            prereqs=[graph.nodes[p].get('label',p) for p in graph.predecessors(node)]
+            rows.append({'Topic':label,'Level':level,'Term':term,'Prerequisites':', '.join(prereqs) if prereqs else 'None (Foundational)'})
+        st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True)
+        longest=nx.dag_longest_path(graph)
+        st.markdown('**Longest prerequisite chain:**')
+        st.markdown(' → '.join([graph.nodes[n].get('label',n) for n in longest]))
+    with tab4:
+        st.header('System Diagnostics')
+        col1,col2=st.columns(2)
+        with col1: st.subheader('Model Configuration'); st.json(config)
+        with col2:
+            st.subheader('DAG Statistics')
+            st.json({'domain':domain,'nodes':graph.number_of_nodes(),'edges':graph.number_of_edges(),'is_valid_dag':nx.is_directed_acyclic_graph(graph),'longest_path':len(nx.dag_longest_path(graph))})
+        st.subheader('Constraint Layer')
+        st.markdown(f'**Mastery threshold:** {threshold:.0%} — topics above this are considered mastered')
+        st.markdown(f'**Challenging threshold:** {soft_threshold:.0%} — topics between this and mastery threshold are marked ⚠️ Challenging')
+        st.markdown('**Hard veto:** topics with prerequisites below challenging threshold are fully blocked')
+        st.subheader('Domain Switching')
+        dcol1,dcol2=st.columns(2)
+        with dcol1: st.metric('Math DAG',f'{math_graph.number_of_nodes()} topics')
+        with dcol2: st.metric('CS DAG',f'{cs_graph.number_of_nodes()} topics')
+if __name__ == '__main__':
+    main()

data/skill_encoder.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/skill_encoder_real.csv ADDED Viewed

	@@ -0,0 +1,20 @@

+assessment_type,code_module,skill_id
+Exam,AAA,1
+TMA,AAA,2
+CMA,BBB,3
+Exam,BBB,4
+TMA,BBB,5
+CMA,CCC,6
+Exam,CCC,7
+TMA,CCC,8
+CMA,DDD,9
+Exam,DDD,10
+TMA,DDD,11
+Exam,EEE,12
+TMA,EEE,13
+CMA,FFF,14
+Exam,FFF,15
+TMA,FFF,16
+CMA,GGG,17
+Exam,GGG,18
+TMA,GGG,19

data/skill_encoder_v2.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

index.html DELETED Viewed

@@ -1,609 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8" />
-  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>PLRS — Personalized Learning Recommendation System</title>
-  <meta name="description" content="Constraint-aware personalized learning recommendations. Plug in your curriculum, get intelligent recommendations out." />
-  <link rel="preconnect" href="https://fonts.googleapis.com" />
-  <link href="https://fonts.googleapis.com/css2?family=DM+Mono:ital,wght@0,300;0,400;0,500;1,300&family=Syne:wght@400;600;700;800&display=swap" rel="stylesheet" />
-  <style>
-    *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
-    :root {
-      --bg:       #080c18;
-      --bg2:      #0d1221;
-      --bg3:      #131a2e;
-      --border:   #1e2a40;
-      --border2:  #1e3a5f;
-      --text:     #c8d0e0;
-      --text-dim: #4a5568;
-      --text-hi:  #e8edf5;
-      --blue:     #3d8bcd;
-      --green:    #22c55e;
-      --amber:    #f59e0b;
-      --red:      #ef4444;
-      --mono:     'DM Mono', monospace;
-      --sans:     'Syne', sans-serif;
-    }
-    html { scroll-behavior: smooth; }
-    body {
-      background: var(--bg);
-      color: var(--text);
-      font-family: var(--sans);
-      line-height: 1.6;
-      overflow-x: hidden;
-    }
-    /* ── Noise overlay ── */
-    body::before {
-      content: '';
-      position: fixed; inset: 0;
-      background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='0.03'/%3E%3C/svg%3E");
-      pointer-events: none;
-      z-index: 0;
-      opacity: 0.4;
-    }
-    /* ── Nav ── */
-    nav {
-      position: fixed; top: 0; left: 0; right: 0;
-      display: flex; align-items: center; justify-content: space-between;
-      padding: 1rem 2.5rem;
-      background: rgba(8, 12, 24, 0.85);
-      backdrop-filter: blur(12px);
-      border-bottom: 1px solid var(--border);
-      z-index: 100;
-    }
-    .nav-logo {
-      font-weight: 800; font-size: 1.1rem; color: var(--text-hi);
-      letter-spacing: -0.02em; text-decoration: none;
-    }
-    .nav-logo span { color: var(--blue); }
-    .nav-links { display: flex; gap: 2rem; align-items: center; }
-    .nav-links a {
-      font-family: var(--mono); font-size: 0.7rem; letter-spacing: 0.1em;
-      color: var(--text-dim); text-decoration: none; text-transform: uppercase;
-      transition: color 0.2s;
-    }
-    .nav-links a:hover { color: var(--blue); }
-    .btn {
-      display: inline-flex; align-items: center; gap: 0.5rem;
-      padding: 0.5rem 1.1rem; border-radius: 3px; font-family: var(--mono);
-      font-size: 0.7rem; letter-spacing: 0.08em; text-decoration: none;
-      transition: all 0.2s; cursor: pointer; border: none;
-    }
-    .btn-primary {
-      background: var(--blue); color: #fff;
-    }
-    .btn-primary:hover { background: #4d9bdd; }
-    .btn-outline {
-      background: transparent; color: var(--blue);
-      border: 1px solid var(--border2);
-    }
-    .btn-outline:hover { border-color: var(--blue); background: rgba(61,139,205,0.07); }
-    /* ── Hero ── */
-    .hero {
-      min-height: 100vh;
-      display: flex; flex-direction: column; justify-content: center;
-      padding: 8rem 2.5rem 5rem;
-      max-width: 1100px; margin: 0 auto;
-      position: relative;
-    }
-    .hero-eyebrow {
-      font-family: var(--mono); font-size: 0.7rem; letter-spacing: 0.18em;
-      color: var(--blue); text-transform: uppercase; margin-bottom: 1.5rem;
-      display: flex; align-items: center; gap: 0.75rem;
-    }
-    .hero-eyebrow::before {
-      content: ''; display: block; width: 2rem; height: 1px; background: var(--blue);
-    }
-    .hero h1 {
-      font-size: clamp(2.8rem, 6vw, 5rem);
-      font-weight: 800; line-height: 1.05;
-      letter-spacing: -0.03em; color: var(--text-hi);
-      margin-bottom: 1.5rem;
-    }
-    .hero h1 em {
-      font-style: normal; color: var(--blue);
-    }
-    .hero-sub {
-      font-size: 1.1rem; color: var(--text-dim);
-      max-width: 560px; margin-bottom: 2.5rem;
-      line-height: 1.7;
-    }
-    .hero-ctas { display: flex; gap: 0.75rem; flex-wrap: wrap; margin-bottom: 4rem; }
-    .btn-hero {
-      padding: 0.75rem 1.5rem; font-size: 0.8rem;
-    }
-    /* ── Stat strip ── */
-    .stat-strip {
-      display: flex; gap: 2.5rem; flex-wrap: wrap;
-      border-top: 1px solid var(--border);
-      padding-top: 2rem;
-    }
-    .stat-item {}
-    .stat-num {
-      font-size: 2rem; font-weight: 800; color: var(--text-hi);
-      line-height: 1;
-    }
-    .stat-num span { color: var(--green); }
-    .stat-label {
-      font-family: var(--mono); font-size: 0.65rem; letter-spacing: 0.1em;
-      color: var(--text-dim); text-transform: uppercase; margin-top: 0.2rem;
-    }
-    /* ── Grid background decoration ── */
-    .hero-grid {
-      position: absolute; top: 0; right: -5%; bottom: 0; width: 50%;
-      background-image:
-        linear-gradient(var(--border) 1px, transparent 1px),
-        linear-gradient(90deg, var(--border) 1px, transparent 1px);
-      background-size: 40px 40px;
-      mask-image: linear-gradient(to left, rgba(0,0,0,0.15), transparent 70%);
-      pointer-events: none;
-    }
-    /* ── Section ── */
-    section {
-      max-width: 1100px; margin: 0 auto;
-      padding: 5rem 2.5rem;
-    }
-    .section-label {
-      font-family: var(--mono); font-size: 0.65rem; letter-spacing: 0.18em;
-      color: var(--blue); text-transform: uppercase;
-      display: flex; align-items: center; gap: 0.75rem;
-      margin-bottom: 1rem;
-    }
-    .section-label::before {
-      content: ''; display: block; width: 1.5rem; height: 1px; background: var(--blue);
-    }
-    .section-title {
-      font-size: clamp(1.8rem, 3.5vw, 2.5rem);
-      font-weight: 800; letter-spacing: -0.02em; color: var(--text-hi);
-      margin-bottom: 1rem;
-    }
-    .section-body {
-      color: var(--text-dim); font-size: 0.95rem; max-width: 600px;
-      line-height: 1.8; margin-bottom: 2.5rem;
-    }
-    /* ── Architecture flow ── */
-    .arch-flow {
-      display: flex; align-items: center; flex-wrap: wrap;
-      gap: 0; margin: 2.5rem 0;
-    }
-    .arch-node {
-      background: var(--bg2); border: 1px solid var(--border);
-      border-radius: 4px; padding: 0.7rem 1rem;
-      font-family: var(--mono); font-size: 0.72rem; color: var(--text);
-      letter-spacing: 0.04em; position: relative;
-    }
-    .arch-node.highlight { border-color: var(--blue); color: var(--blue); }
-    .arch-arrow {
-      font-family: var(--mono); color: var(--border2); padding: 0 0.4rem;
-      font-size: 0.9rem;
-    }
-    /* ── Three-tier cards ── */
-    .tier-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem; margin-top: 2rem; }
-    .tier-card {
-      background: var(--bg2); border: 1px solid var(--border);
-      border-radius: 4px; padding: 1.5rem;
-      position: relative; overflow: hidden;
-    }
-    .tier-card::before {
-      content: ''; position: absolute; top: 0; left: 0; right: 0; height: 2px;
-      background: var(--accent);
-    }
-    .tier-card.green { --accent: var(--green); }
-    .tier-card.amber { --accent: var(--amber); }
-    .tier-card.red   { --accent: var(--red); }
-    .tier-icon { font-size: 1.5rem; margin-bottom: 0.75rem; }
-    .tier-name {
-      font-weight: 700; font-size: 1rem; color: var(--text-hi);
-      margin-bottom: 0.35rem;
-    }
-    .tier-desc { font-size: 0.8rem; color: var(--text-dim); line-height: 1.6; }
-    /* ── Results table ── */
-    .results-table {
-      width: 100%; border-collapse: collapse;
-      font-family: var(--mono); font-size: 0.78rem;
-      margin-top: 2rem;
-    }
-    .results-table th {
-      text-align: left; padding: 0.6rem 1rem;
-      color: var(--text-dim); letter-spacing: 0.1em; text-transform: uppercase;
-      font-size: 0.65rem; border-bottom: 1px solid var(--border);
-    }
-    .results-table td {
-      padding: 0.75rem 1rem; border-bottom: 1px solid var(--border);
-      color: var(--text);
-    }
-    .results-table tr:last-child td { border-bottom: none; }
-    .results-table tr.highlight-row td { color: var(--text-hi); }
-    .badge-green {
-      background: rgba(34,197,94,0.1); color: var(--green);
-      border: 1px solid rgba(34,197,94,0.3);
-      padding: 1px 7px; border-radius: 2px; font-size: 0.65rem;
-    }
-    .badge-red {
-      background: rgba(239,68,68,0.1); color: var(--red);
-      border: 1px solid rgba(239,68,68,0.3);
-      padding: 1px 7px; border-radius: 2px; font-size: 0.65rem;
-    }
-    /* ── Code block ── */
-    .code-wrap {
-      background: var(--bg2); border: 1px solid var(--border);
-      border-radius: 4px; overflow: hidden; margin-top: 2rem;
-    }
-    .code-header {
-      display: flex; align-items: center; justify-content: space-between;
-      padding: 0.6rem 1rem; border-bottom: 1px solid var(--border);
-      background: var(--bg3);
-    }
-    .code-dots { display: flex; gap: 5px; }
-    .code-dots span {
-      width: 10px; height: 10px; border-radius: 50%;
-      background: var(--border2);
-    }
-    .code-lang {
-      font-family: var(--mono); font-size: 0.62rem;
-      color: var(--text-dim); letter-spacing: 0.1em;
-    }
-    pre {
-      padding: 1.5rem;
-      font-family: var(--mono); font-size: 0.78rem;
-      line-height: 1.7; color: var(--text);
-      overflow-x: auto;
-    }
-    .cm { color: #4a5568; }         /* comment */
-    .ck { color: #3d8bcd; }         /* keyword */
-    .cs { color: #22c55e; }         /* string */
-    .cn { color: #f59e0b; }         /* number / name */
-    .cf { color: #c084fc; }         /* function */
-    /* ── Feature grid ── */
-    .feature-grid { display: grid; grid-template-columns: repeat(2, 1fr); gap: 1px; background: var(--border); margin-top: 2rem; border: 1px solid var(--border); border-radius: 4px; overflow: hidden; }
-    .feature-cell {
-      background: var(--bg); padding: 1.5rem;
-    }
-    .feature-icon { font-size: 1.2rem; margin-bottom: 0.75rem; }
-    .feature-title { font-weight: 700; color: var(--text-hi); margin-bottom: 0.35rem; font-size: 0.9rem; }
-    .feature-desc { font-size: 0.78rem; color: var(--text-dim); line-height: 1.6; }
-    /* ── CTA section ── */
-    .cta-section {
-      background: var(--bg2);
-      border-top: 1px solid var(--border);
-      border-bottom: 1px solid var(--border);
-      padding: 5rem 2.5rem;
-      text-align: center;
-    }
-    .cta-inner { max-width: 600px; margin: 0 auto; }
-    .cta-title { font-size: 2.2rem; font-weight: 800; letter-spacing: -0.02em; color: var(--text-hi); margin-bottom: 1rem; }
-    .cta-sub { color: var(--text-dim); margin-bottom: 2rem; line-height: 1.7; }
-    .cta-btns { display: flex; gap: 0.75rem; justify-content: center; flex-wrap: wrap; }
-    /* ── Footer ── */
-    footer {
-      border-top: 1px solid var(--border);
-      padding: 2rem 2.5rem;
-      display: flex; justify-content: space-between; align-items: center;
-      flex-wrap: wrap; gap: 1rem;
-      max-width: 100%;
-    }
-    .footer-left { font-family: var(--mono); font-size: 0.65rem; color: var(--text-dim); }
-    .footer-links { display: flex; gap: 1.5rem; }
-    .footer-links a { font-family: var(--mono); font-size: 0.65rem; color: var(--text-dim); text-decoration: none; }
-    .footer-links a:hover { color: var(--blue); }
-    /* ── Animations ── */
-    @keyframes fadeUp {
-      from { opacity: 0; transform: translateY(20px); }
-      to   { opacity: 1; transform: translateY(0); }
-    }
-    .hero-eyebrow { animation: fadeUp 0.5s ease 0.1s both; }
-    .hero h1      { animation: fadeUp 0.5s ease 0.2s both; }
-    .hero-sub     { animation: fadeUp 0.5s ease 0.3s both; }
-    .hero-ctas    { animation: fadeUp 0.5s ease 0.4s both; }
-    .stat-strip   { animation: fadeUp 0.5s ease 0.5s both; }
-    /* ── Responsive ── */
-    @media (max-width: 768px) {
-      nav { padding: 0.75rem 1.25rem; }
-      .nav-links .btn { display: none; }
-      .hero { padding: 7rem 1.25rem 4rem; }
-      .tier-grid { grid-template-columns: 1fr; }
-      .feature-grid { grid-template-columns: 1fr; }
-      section { padding: 3rem 1.25rem; }
-      .arch-flow { gap: 0.25rem; }
-    }
-  </style>
-</head>
-<body>
-<!-- ── Nav ── -->
-<nav>
-  <a href="#" class="nav-logo">PL<span>RS</span></a>
-  <div class="nav-links">
-    <a href="#how-it-works">How it works</a>
-    <a href="#results">Results</a>
-    <a href="#quickstart">Quickstart</a>
-    <a href="https://github.com/clementina-tom/plrs" target="_blank">GitHub</a>
-    <a href="https://huggingface.co/spaces/Clementio/PLRS" class="btn btn-primary btn-hero" target="_blank">Live Demo →</a>
-  </div>
-</nav>
-<!-- ── Hero ── -->
-<div class="hero">
-  <div class="hero-grid"></div>
-  <div class="hero-eyebrow">Knowledge Tracing · Constraint-Aware · Open Source</div>
-  <h1>Recommendations that<br/><em>respect</em> how learning works.</h1>
-  <p class="hero-sub">
-    PLRS combines Self-Attentive Knowledge Tracing with a DAG prerequisite constraint layer
-    to generate personalized learning recommendations that are pedagogically sound —
-    not just statistically optimal.
-  </p>
-  <div class="hero-ctas">
-    <a href="https://huggingface.co/spaces/Clementio/PLRS" target="_blank" class="btn btn-primary btn-hero">
-      Try the live demo
-    </a>
-    <a href="https://github.com/clementina-tom/plrs" target="_blank" class="btn btn-outline btn-hero">
-      View on GitHub
-    </a>
-    <a href="#quickstart" class="btn btn-outline btn-hero">
-      Quickstart
-    </a>
-  </div>
-  <div class="stat-strip">
-    <div class="stat-item">
-      <div class="stat-num"><span>0.0</span>%</div>
-      <div class="stat-label">Prerequisite violation rate</div>
-    </div>
-    <div class="stat-item">
-      <div class="stat-num">0.7692</div>
-      <div class="stat-label">SAKT Val AUC (OULAD)</div>
-    </div>
-    <div class="stat-item">
-      <div class="stat-num">69</div>
-      <div class="stat-label">Curriculum topics (2 domains)</div>
-    </div>
-    <div class="stat-item">
-      <div class="stat-num">52</div>
-      <div class="stat-label">Tests passing</div>
-    </div>
-  </div>
-</div>
-<!-- ── How it works ── -->
-<section id="how-it-works">
-  <div class="section-label">Architecture</div>
-  <h2 class="section-title">Three layers. One guarantee.</h2>
-  <p class="section-body">
-    Standard recommendation systems optimise for engagement or accuracy —
-    they will happily recommend Calculus to a student who hasn't mastered Algebra.
-    PLRS adds a constraint layer that makes this <em>structurally impossible</em>.
-  </p>
-  <div class="arch-flow">
-    <div class="arch-node">Student History</div>
-    <div class="arch-arrow">→</div>
-    <div class="arch-node highlight">SAKT Model</div>
-    <div class="arch-arrow">→</div>
-    <div class="arch-node">Mastery Vector</div>
-    <div class="arch-arrow">→</div>
-    <div class="arch-node highlight">DAG Constraints</div>
-    <div class="arch-arrow">→</div>
-    <div class="arch-node">Multi-Objective Ranker</div>
-    <div class="arch-arrow">→</div>
-    <div class="arch-node highlight">Recommendations</div>
-  </div>
-  <div class="tier-grid">
-    <div class="tier-card green">
-      <div class="tier-icon">✅</div>
-      <div class="tier-name">Approved</div>
-      <div class="tier-desc">All prerequisites met above the mastery threshold. Student is ready to learn this topic now.</div>
-    </div>
-    <div class="tier-card amber">
-      <div class="tier-icon">⚠️</div>
-      <div class="tier-name">Challenging</div>
-      <div class="tier-desc">Prerequisites partially met — above the soft threshold but below full mastery. Proceed with awareness.</div>
-    </div>
-    <div class="tier-card red">
-      <div class="tier-icon">❌</div>
-      <div class="tier-name">Vetoed</div>
-      <div class="tier-desc">One or more prerequisites not met. Structurally blocked until foundations are solid.</div>
-    </div>
-  </div>
-</section>
-<!-- ── Results ── -->
-<section id="results" style="border-top: 1px solid var(--border);">
-  <div class="section-label">Evaluation</div>
-  <h2 class="section-title">0% violation rate. Not a tuning choice.</h2>
-  <p class="section-body">
-    Evaluated on the Open University Learning Analytics Dataset (OULAD) with
-    Nigerian secondary school curriculum knowledge maps. The 0% violation rate
-    is a structural guarantee from the DAG constraint layer — not a hyperparameter.
-  </p>
-  <table class="results-table">
-    <thead>
-      <tr>
-        <th>Model</th>
-        <th>Val AUC</th>
-        <th>Prerequisite Violation Rate</th>
-        <th>Coverage</th>
-      </tr>
-    </thead>
-    <tbody>
-      <tr class="highlight-row">
-        <td><strong>PLRS (SAKT + DAG)</strong></td>
-        <td><strong>0.7692</strong></td>
-        <td><span class="badge-green">0.0%</span></td>
-        <td>Full curriculum</td>
-      </tr>
-      <tr>
-        <td>Collaborative Filtering</td>
-        <td>—</td>
-        <td><span class="badge-red">81.3%</span></td>
-        <td>Partial</td>
-      </tr>
-      <tr>
-        <td>Matrix Factorization</td>
-        <td>—</td>
-        <td><span class="badge-red">83.7%</span></td>
-        <td>Partial</td>
-      </tr>
-      <tr>
-        <td>BKT (baseline)</td>
-        <td>~0.67</td>
-        <td><span class="badge-red">No constraint layer</span></td>
-        <td>Partial</td>
-      </tr>
-    </tbody>
-  </table>
-</section>
-<!-- ── Quickstart ── -->
-<section id="quickstart" style="border-top: 1px solid var(--border);">
-  <div class="section-label">Quickstart</div>
-  <h2 class="section-title">Plug in your curriculum.</h2>
-  <p class="section-body">
-    PLRS is curriculum-agnostic. Define your knowledge graph in a simple JSON format
-    and get recommendations immediately. No retraining required for new domains.
-  </p>
-  <div class="code-wrap">
-    <div class="code-header">
-      <div class="code-dots"><span></span><span></span><span></span></div>
-      <div class="code-lang">PYTHON</div>
-    </div>
-    <pre><span class="ck">from</span> plrs <span class="ck">import</span> PLRSPipeline
-<span class="ck">from</span> plrs.curriculum <span class="ck">import</span> load_dag
-<span class="cm"># Load your curriculum (JSON knowledge graph)</span>
-curriculum = <span class="cf">load_dag</span>(<span class="cs">"math_dag.json"</span>)
-<span class="cm"># Create pipeline — no model needed for mastery-dict mode</span>
-pipeline = <span class="cf">PLRSPipeline</span>(curriculum)
-<span class="cm"># Get recommendations from student mastery scores</span>
-results = pipeline.<span class="cf">recommend_from_mastery</span>({
-    <span class="cs">"whole_numbers"</span>:         <span class="cn">0.90</span>,
-    <span class="cs">"algebraic_expressions"</span>: <span class="cn">0.75</span>,
-    <span class="cs">"quadratic_equations"</span>:   <span class="cn">0.40</span>,
-})
-<span class="ck">for</span> rec <span class="ck">in</span> results[<span class="cs">"approved"</span>]:
-    <span class="cf">print</span>(<span class="cs">f"✅ {rec['topic_label']} (score={rec['score']})"</span>)
-    <span class="cf">print</span>(<span class="cs">f"   {rec['reasoning']}"</span>)
-<span class="cm"># What-if: what does mastering this topic unlock?</span>
-wi = pipeline.<span class="cf">what_if</span>(<span class="cs">"algebraic_expressions"</span>)
-<span class="cf">print</span>(<span class="cs">f"Unlocks {wi['total_unlocked']} downstream topics"</span>)</pre>
-  </div>
-  <div class="code-wrap" style="margin-top: 1rem;">
-    <div class="code-header">
-      <div class="code-dots"><span></span><span></span><span></span></div>
-      <div class="code-lang">REST API</div>
-    </div>
-    <pre><span class="cm"># Start the server</span>
-$ python scripts/serve.py
-<span class="cm"># → http://127.0.0.1:8000/docs</span>
-<span class="cm"># Get recommendations</span>
-$ curl -X POST http://localhost:<span class="cn">8000</span>/recommend \
-    -H <span class="cs">"Content-Type: application/json"</span> \
-    -d <span class="cs">'{"domain":"math","mastery_scores":{"whole_numbers":0.9}}'</span></pre>
-  </div>
-</section>
-<!-- ── Features ── -->
-<section style="border-top: 1px solid var(--border);">
-  <div class="section-label">Features</div>
-  <h2 class="section-title">Built for real deployment.</h2>
-  <div class="feature-grid">
-    <div class="feature-cell">
-      <div class="feature-icon">🔌</div>
-      <div class="feature-title">Curriculum-agnostic</div>
-      <div class="feature-desc">Define any knowledge graph in a simple JSON format. Ships with Nigerian secondary school Maths and CS Fundamentals (NERDC JSS3–SS2).</div>
-    </div>
-    <div class="feature-cell">
-      <div class="feature-icon">⚡</div>
-      <div class="feature-title">FastAPI REST backend</div>
-      <div class="feature-desc">Production-ready API with <code>/recommend</code>, <code>/what-if</code>, and <code>/curriculum</code> endpoints. Auto-generated OpenAPI docs.</div>
-    </div>
-    <div class="feature-cell">
-      <div class="feature-icon">🧠</div>
-      <div class="feature-title">SAKT + Forgetting Curve</div>
-      <div class="feature-desc">Self-Attentive Knowledge Tracing with optional Ebbinghaus decay attention — older interactions contribute less to current mastery estimates.</div>
-    </div>
-    <div class="feature-cell">
-      <div class="feature-icon">🔍</div>
-      <div class="feature-title">What-If Simulator</div>
-      <div class="feature-desc">"If I master Trigonometry now, what unlocks?" — live DAG traversal shows direct and transitive downstream topics.</div>
-    </div>
-    <div class="feature-cell">
-      <div class="feature-icon">📦</div>
-      <div class="feature-title">PyPI-ready package</div>
-      <div class="feature-desc"><code>pip install plrs</code> — modular architecture with clean public API. Full type annotations throughout.</div>
-    </div>
-    <div class="feature-cell">
-      <div class="feature-icon">🧪</div>
-      <div class="feature-title">52 tests, CI on 3 Python versions</div>
-      <div class="feature-desc">Unit tests, API integration tests, and evaluator tests. GitHub Actions runs on Python 3.10, 3.11, and 3.12.</div>
-    </div>
-  </div>
-</section>
-<!-- ── CTA ── -->
-<div class="cta-section">
-  <div class="cta-inner">
-    <div class="cta-title">Try it now — no setup required.</div>
-    <p class="cta-sub">
-      The live demo runs the full pipeline in your browser.
-      Adjust mastery sliders, simulate student sequences, explore the curriculum graph.
-    </p>
-    <div class="cta-btns">
-      <a href="https://huggingface.co/spaces/Clementio/PLRS" target="_blank" class="btn btn-primary btn-hero">
-        Open live demo →
-      </a>
-      <a href="https://github.com/clementina-tom/plrs" target="_blank" class="btn btn-outline btn-hero">
-        Star on GitHub
-      </a>
-    </div>
-  </div>
-</div>
-<!-- ── Footer ── */
-<footer>
-  <div class="footer-left">
-    PLRS — Personalized Learning Recommendation System<br/>
-    MIT License · Built by <a href="https://github.com/clementina-tom" style="color:var(--blue);text-decoration:none;">Clementina Tom</a>
-  </div>
-  <div class="footer-links">
-    <a href="https://github.com/clementina-tom/plrs" target="_blank">GitHub</a>
-    <a href="https://huggingface.co/spaces/Clementio/PLRS" target="_blank">HuggingFace</a>
-    <a href="https://huggingface.co/spaces/Clementio/PLRS" target="_blank">Live Demo</a>
-  </div>
-</footer>
-</body>
-</html>

{data/knowledge_maps → knowledge_maps}/cs_dag.json RENAMED Viewed

File without changes

{data/knowledge_maps → knowledge_maps}/math_dag.json RENAMED Viewed

File without changes

plrs/__init__.py DELETED Viewed

@@ -1,30 +0,0 @@
-"""
-PLRS — Personalized Learning Recommendation System
-====================================================
-Constraint-aware personalized learning recommendations.
-Plug in your curriculum DAG, get intelligent recommendations out.
-Quick start:
-    from plrs import PLRSPipeline
-    from plrs.curriculum import load_dag
-    graph    = load_dag("my_curriculum.json")
-    pipeline = PLRSPipeline(graph)
-    results  = pipeline.recommend(student_history)
-"""
-from plrs.pipeline import PLRSPipeline
-from plrs.model.sakt import SAKTModel
-from plrs.constraints.dag import DAGConstraintLayer
-from plrs.ranking.ranker import MultiObjectiveRanker
-from plrs.curriculum.loader import load_dag, CurriculumGraph
-__version__ = "0.1.0"
-__all__ = [
-    "PLRSPipeline",
-    "SAKTModel",
-    "DAGConstraintLayer",
-    "MultiObjectiveRanker",
-    "load_dag",
-    "CurriculumGraph",
-]

plrs/constraints/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from plrs.constraints.dag import DAGConstraintLayer, MasteryVector, ConstraintResult
-__all__ = ["DAGConstraintLayer", "MasteryVector", "ConstraintResult"]

plrs/constraints/dag.py DELETED Viewed

@@ -1,201 +0,0 @@
-"""
-plrs.constraints.dag
-====================
-DAG-based prerequisite constraint layer.
-Three-tier classification:
-  - approved    : prerequisites met, topic is ready
-  - challenging : prerequisites partially met (above soft threshold)
-  - vetoed      : prerequisites not met, topic is blocked
-"""
-from __future__ import annotations
-from dataclasses import dataclass, field
-from typing import Literal
-from plrs.curriculum.loader import CurriculumGraph
-Status = Literal["approved", "challenging", "vetoed"]
-class MasteryVector:
-    """
-    Holds a student's estimated mastery probability per topic.
-    Parameters
-    ----------
-    curriculum : CurriculumGraph
-    threshold : float
-        Mastery threshold — above this, a topic is considered mastered (default 0.70).
-    soft_threshold : float
-        Soft threshold — above this but below threshold, a topic is "challenging" (default 0.50).
-    """
-    def __init__(
-        self,
-        curriculum: CurriculumGraph,
-        threshold: float = 0.70,
-        soft_threshold: float = 0.50,
-    ) -> None:
-        self.curriculum = curriculum
-        self.threshold = threshold
-        self.soft_threshold = soft_threshold
-        self._mastery: dict[str, float] = {node: 0.0 for node in curriculum.nodes}
-    # ------------------------------------------------------------------ #
-    # Mutations                                                            #
-    # ------------------------------------------------------------------ #
-    def update(self, topic_id: str, probability: float) -> None:
-        """Set mastery probability for a topic (clamped to [0, 1])."""
-        if topic_id in self._mastery:
-            self._mastery[topic_id] = max(0.0, min(1.0, probability))
-    def update_batch(self, updates: dict[str, float]) -> None:
-        """Update multiple topics at once."""
-        for topic_id, prob in updates.items():
-            self.update(topic_id, prob)
-    def cascade_up(self) -> None:
-        """
-        Propagate mastery scores upward through the DAG.
-        If a student has high mastery on a topic, infer that their
-        prerequisites are also likely mastered.
-        """
-        changed = True
-        while changed:
-            changed = False
-            for node in self.curriculum.nodes:
-                node_mastery = self.get(node)
-                if node_mastery < 0.40:
-                    continue
-                # For each prerequisite of this node
-                for prereq in self.curriculum.prerequisites(node):
-                    prereq_mastery = self.get(prereq)
-                    # Infer prerequisite mastery as at least 85% of descendant mastery
-                    inferred = min(node_mastery * 0.85, 0.95)
-                    if inferred > prereq_mastery:
-                        self.update(prereq, inferred)
-                        changed = True
-    # ------------------------------------------------------------------ #
-    # Queries                                                              #
-    # ------------------------------------------------------------------ #
-    def get(self, topic_id: str) -> float:
-        return self._mastery.get(topic_id, 0.0)
-    def is_mastered(self, topic_id: str) -> bool:
-        return self.get(topic_id) >= self.threshold
-    def is_partial(self, topic_id: str) -> bool:
-        """Between soft_threshold and threshold — partially mastered."""
-        v = self.get(topic_id)
-        return self.soft_threshold <= v < self.threshold
-    def summary(self) -> dict:
-        mastered = [t for t in self._mastery if self.is_mastered(t)]
-        partial  = [t for t in self._mastery if self.is_partial(t)]
-        return {
-            "total_topics": len(self._mastery),
-            "mastered": len(mastered),
-            "partial": len(partial),
-            "not_started": len(self._mastery) - len(mastered) - len(partial),
-            "mastery_rate": round(len(mastered) / max(len(self._mastery), 1), 3),
-            "mastered_topics": mastered,
-        }
-    def to_dict(self) -> dict[str, float]:
-        return dict(self._mastery)
-    def __repr__(self) -> str:
-        s = self.summary()
-        return (
-            f"MasteryVector(mastered={s['mastered']}/{s['total_topics']}, "
-            f"rate={s['mastery_rate']:.1%})"
-        )
-@dataclass
-class ConstraintResult:
-    topic_id: str
-    topic_label: str
-    status: Status
-    mastery: float
-    reasoning: str
-    score: float = 0.0
-    prerequisites: list[str] = field(default_factory=list)
-    unmet_prerequisites: list[str] = field(default_factory=list)
-class DAGConstraintLayer:
-    """
-    Validates topic recommendations against curriculum prerequisite structure.
-    Uses three-tier soft constraint logic:
-      - mastery >= threshold on ALL prerequisites  → approved
-      - mastery >= soft_threshold on ALL prereqs   → challenging
-      - any prerequisite below soft_threshold      → vetoed
-    """
-    def __init__(self, curriculum: CurriculumGraph) -> None:
-        self.curriculum = curriculum
-    def validate(
-        self,
-        topic_id: str,
-        mastery: MasteryVector,
-    ) -> ConstraintResult:
-        label = self.curriculum.label(topic_id)
-        prereqs = self.curriculum.prerequisites(topic_id)
-        topic_mastery = mastery.get(topic_id)
-        if not prereqs:
-            return ConstraintResult(
-                topic_id=topic_id,
-                topic_label=label,
-                status="approved",
-                mastery=topic_mastery,
-                reasoning="No prerequisites required.",
-                prerequisites=[],
-                unmet_prerequisites=[],
-            )
-        prereq_labels = [self.curriculum.label(p) for p in prereqs]
-        unmet_hard = [p for p in prereqs if not mastery.is_mastered(p)]
-        unmet_soft = [p for p in prereqs if mastery.get(p) < mastery.soft_threshold]
-        if not unmet_soft:
-            # All prereqs above soft threshold — at least challenging
-            if not unmet_hard:
-                status = "approved"
-                reasoning = f"All {len(prereqs)} prerequisite(s) met."
-            else:
-                status = "challenging"
-                unmet_labels = [self.curriculum.label(p) for p in unmet_hard]
-                reasoning = (
-                    f"Prerequisite(s) partially met. "
-                    f"Strengthen: {', '.join(unmet_labels)}."
-                )
-        else:
-            status = "vetoed"
-            unmet_labels = [self.curriculum.label(p) for p in unmet_soft]
-            reasoning = (
-                f"Blocked. Master first: {', '.join(unmet_labels)}."
-            )
-        return ConstraintResult(
-            topic_id=topic_id,
-            topic_label=label,
-            status=status,
-            mastery=topic_mastery,
-            reasoning=reasoning,
-            prerequisites=prereq_labels,
-            unmet_prerequisites=[self.curriculum.label(p) for p in (unmet_hard if status == "challenging" else unmet_soft)],
-        )
-    def validate_all(self, mastery: MasteryVector) -> list[ConstraintResult]:
-        """Validate every topic in the curriculum."""
-        return [self.validate(node, mastery) for node in self.curriculum.nodes]

plrs/curriculum/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from plrs.curriculum.loader import load_dag, CurriculumGraph
-__all__ = ["load_dag", "CurriculumGraph"]

plrs/curriculum/loader.py DELETED Viewed

@@ -1,144 +0,0 @@
-"""
-plrs.curriculum.loader
-======================
-Load and validate curriculum knowledge graphs from JSON.
-The JSON schema is deliberately simple so educators can author their own:
-    {
-        "domain": "Mathematics",
-        "nodes": [
-            {"id": "algebra_basics", "label": "Algebra Basics", "level": "JSS3"},
-            {"id": "quadratic_equations", "label": "Quadratic Equations", "level": "SS1"}
-        ],
-        "edges": [
-            {"from": "algebra_basics", "to": "quadratic_equations"}
-        ]
-    }
-"""
-from __future__ import annotations
-import json
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any
-import networkx as nx
-@dataclass
-class CurriculumGraph:
-    """Thin wrapper around a NetworkX DiGraph with domain metadata."""
-    domain: str
-    graph: nx.DiGraph
-    meta: dict[str, Any] = field(default_factory=dict)
-    # ------------------------------------------------------------------ #
-    # Properties                                                           #
-    # ------------------------------------------------------------------ #
-    @property
-    def nodes(self) -> list[str]:
-        return list(self.graph.nodes)
-    @property
-    def num_nodes(self) -> int:
-        return self.graph.number_of_nodes()
-    @property
-    def num_edges(self) -> int:
-        return self.graph.number_of_edges()
-    def label(self, node_id: str) -> str:
-        return self.graph.nodes[node_id].get("label", node_id)
-    def level(self, node_id: str) -> str:
-        return self.graph.nodes[node_id].get("level", "")
-    def prerequisites(self, node_id: str) -> list[str]:
-        return list(self.graph.predecessors(node_id))
-    def successors(self, node_id: str) -> list[str]:
-        return list(self.graph.successors(node_id))
-    def descendants(self, node_id: str) -> list[str]:
-        return list(nx.descendants(self.graph, node_id))
-    def validate(self) -> list[str]:
-        """Return a list of validation warnings (empty = all good)."""
-        warnings: list[str] = []
-        if not nx.is_directed_acyclic_graph(self.graph):
-            warnings.append("Graph contains cycles — prerequisite checking will be unreliable.")
-        isolates = list(nx.isolates(self.graph))
-        if isolates:
-            warnings.append(f"{len(isolates)} isolated nodes (no edges): {isolates[:5]}")
-        return warnings
-    def __repr__(self) -> str:
-        return (
-            f"CurriculumGraph(domain={self.domain!r}, "
-            f"nodes={self.num_nodes}, edges={self.num_edges})"
-        )
-def load_dag(path: str | Path) -> CurriculumGraph:
-    """
-    Load a curriculum DAG from a JSON file.
-    Parameters
-    ----------
-    path : str or Path
-        Path to the curriculum JSON file.
-    Returns
-    -------
-    CurriculumGraph
-    Raises
-    ------
-    FileNotFoundError
-        If the file does not exist.
-    ValueError
-        If the JSON schema is invalid.
-    """
-    path = Path(path)
-    if not path.exists():
-        raise FileNotFoundError(f"Curriculum file not found: {path}")
-    with open(path) as f:
-        data = json.load(f)
-    _validate_schema(data, path)
-    domain = data.get("domain", path.stem)
-    meta = {k: v for k, v in data.items() if k not in ("nodes", "edges", "domain")}
-    G = nx.DiGraph()
-    for node in data["nodes"]:
-        G.add_node(node["id"], **{k: v for k, v in node.items() if k != "id"})
-    for edge in data["edges"]:
-        G.add_edge(edge["from"], edge["to"])
-    curriculum = CurriculumGraph(domain=domain, graph=G, meta=meta)
-    warnings = curriculum.validate()
-    for w in warnings:
-        import warnings as _w
-        _w.warn(f"[PLRS] {w}", stacklevel=2)
-    return curriculum
-def _validate_schema(data: dict, path: Path) -> None:
-    if "nodes" not in data:
-        raise ValueError(f"{path}: Missing required key 'nodes'")
-    if "edges" not in data:
-        raise ValueError(f"{path}: Missing required key 'edges'")
-    for i, node in enumerate(data["nodes"]):
-        if "id" not in node:
-            raise ValueError(f"{path}: Node at index {i} missing required key 'id'")
-    for i, edge in enumerate(data["edges"]):
-        if "from" not in edge or "to" not in edge:
-            raise ValueError(f"{path}: Edge at index {i} missing 'from' or 'to'")

plrs/model/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-from plrs.model.sakt import SAKTModel
-from plrs.model.sakt_decay import SAKTWithDecay
-from plrs.model.trainer import SAKTTrainer, TrainerConfig, load_sequences_from_csv
-__all__ = ["SAKTModel", "SAKTWithDecay", "SAKTTrainer", "TrainerConfig", "load_sequences_from_csv"]

plrs/model/evaluator.py DELETED Viewed

@@ -1,374 +0,0 @@
-"""
-plrs.model.evaluator
-====================
-Evaluation suite for PLRS.
-Metrics:
-  - Knowledge Tracing: AUC-ROC, Accuracy, Binary Cross-Entropy
-  - Recommendation: Prerequisite Violation Rate, Coverage, Diversity
-  - Baselines: Random, Popularity, BKT (Bayesian Knowledge Tracing)
-Usage:
-    from plrs.model.evaluator import PLRSEvaluator
-    evaluator = PLRSEvaluator(pipeline, curriculum)
-    report = evaluator.evaluate(test_sequences, skill_to_topic)
-"""
-from __future__ import annotations
-import time
-from dataclasses import dataclass, field
-from typing import Any
-import numpy as np
-try:
-    from sklearn.metrics import roc_auc_score, accuracy_score, log_loss
-    HAS_SKLEARN = True
-except ImportError:
-    HAS_SKLEARN = False
-# ── Baseline models ───────────────────────────────────────────────────────────
-class RandomBaseline:
-    """Predicts 0.5 for every interaction."""
-    def predict(self, skill_seq, correct_seq):
-        return {i: 0.5 for i in range(len(skill_seq))}
-    def recommend(self, curriculum, n=5):
-        import random
-        return random.sample(curriculum.nodes, min(n, len(curriculum.nodes)))
-class PopularityBaseline:
-    """Recommends the most-seen skills; predicts by global correctness rate."""
-    def __init__(self):
-        self.skill_correct: dict[int, list[float]] = {}
-        self.topic_count:   dict[str, int] = {}
-    def fit(self, sequences, skill_to_topic=None):
-        for skill_seq, correct_seq in sequences:
-            for skill, correct in zip(skill_seq, correct_seq):
-                self.skill_correct.setdefault(skill, []).append(float(correct))
-                if skill_to_topic:
-                    topic = skill_to_topic.get(skill)
-                    if topic:
-                        self.topic_count[topic] = self.topic_count.get(topic, 0) + 1
-    def predict_prob(self, skill_id: int) -> float:
-        history = self.skill_correct.get(skill_id, [])
-        return float(np.mean(history)) if history else 0.5
-    def recommend(self, curriculum, n=5):
-        if not self.topic_count:
-            return curriculum.nodes[:n]
-        sorted_topics = sorted(self.topic_count, key=self.topic_count.get, reverse=True)
-        return [t for t in sorted_topics if t in curriculum.nodes][:n]
-class BKTBaseline:
-    """
-    Bayesian Knowledge Tracing (per-skill).
-    Simple 4-parameter model: p_init, p_transit, p_slip, p_guess.
-    """
-    def __init__(self, p_init=0.3, p_transit=0.1, p_slip=0.1, p_guess=0.2):
-        self.p_init    = p_init
-        self.p_transit = p_transit
-        self.p_slip    = p_slip
-        self.p_guess   = p_guess
-        self._mastery:  dict[int, float] = {}
-    def _update(self, skill: int, correct: int) -> float:
-        p = self._mastery.get(skill, self.p_init)
-        # Bayes update
-        if correct:
-            num = p * (1 - self.p_slip)
-            den = num + (1 - p) * self.p_guess
-        else:
-            num = p * self.p_slip
-            den = num + (1 - p) * (1 - self.p_guess)
-        p_post = num / max(den, 1e-9)
-        # Learning
-        p_post = p_post + (1 - p_post) * self.p_transit
-        self._mastery[skill] = p_post
-        return p_post
-    def predict_sequence(self, skill_seq: list[int], correct_seq: list[int]) -> list[float]:
-        self._mastery = {}
-        probs = []
-        for skill, correct in zip(skill_seq[:-1], correct_seq[:-1]):
-            self._update(skill, correct)
-            next_skill = skill_seq[len(probs) + 1]
-            probs.append(self._mastery.get(next_skill, self.p_init))
-        return probs
-    def get_mastery(self) -> dict[int, float]:
-        return dict(self._mastery)
-# ── Result dataclasses ────────────────────────────────────────────────────────
-@dataclass
-class KTMetrics:
-    """Knowledge tracing evaluation metrics."""
-    model_name:  str
-    auc:         float
-    accuracy:    float
-    log_loss:    float
-    n_samples:   int
-    elapsed_s:   float
-@dataclass
-class RecommendMetrics:
-    """Recommendation quality metrics."""
-    violation_rate:    float   # fraction of recommendations that violate prerequisites
-    coverage:          float   # fraction of curriculum covered by recommendations
-    avg_downstream:    float   # avg topics unlocked by recommendations
-    mastery_rate:      float   # avg student mastery in test set
-@dataclass
-class EvaluationReport:
-    """Full evaluation report."""
-    kt_metrics:      list[KTMetrics]
-    rec_metrics:     RecommendMetrics | None
-    config:          dict[str, Any]
-    timestamp:       str
-    def print(self) -> None:
-        print("\n" + "=" * 62)
-        print("  PLRS EVALUATION REPORT")
-        print("=" * 62)
-        print(f"\n{'Model':<22} {'AUC':>8} {'Accuracy':>10} {'Log Loss':>10} {'Samples':>8}")
-        print("-" * 62)
-        for m in self.kt_metrics:
-            print(f"{m.model_name:<22} {m.auc:>8.4f} {m.accuracy:>10.4f} {m.log_loss:>10.4f} {m.n_samples:>8,}")
-        if self.rec_metrics:
-            r = self.rec_metrics
-            print(f"\n{'Recommendation Metrics':}")
-            print(f"  Prerequisite violation rate : {r.violation_rate:.1%}")
-            print(f"  Curriculum coverage         : {r.coverage:.1%}")
-            print(f"  Avg downstream unlocked     : {r.avg_downstream:.1f}")
-            print(f"  Avg student mastery rate    : {r.mastery_rate:.1%}")
-        print("=" * 62 + "\n")
-    def to_dict(self) -> dict:
-        return {
-            "kt_metrics": [
-                {
-                    "model": m.model_name,
-                    "auc": round(m.auc, 6),
-                    "accuracy": round(m.accuracy, 6),
-                    "log_loss": round(m.log_loss, 6),
-                    "n_samples": m.n_samples,
-                    "elapsed_s": round(m.elapsed_s, 3),
-                }
-                for m in self.kt_metrics
-            ],
-            "rec_metrics": {
-                "violation_rate": round(self.rec_metrics.violation_rate, 6),
-                "coverage": round(self.rec_metrics.coverage, 6),
-                "avg_downstream": round(self.rec_metrics.avg_downstream, 3),
-                "mastery_rate": round(self.rec_metrics.mastery_rate, 6),
-            } if self.rec_metrics else None,
-            "config": self.config,
-            "timestamp": self.timestamp,
-        }
-# ── Main evaluator ────────────────────────────────────────────────────────────
-class PLRSEvaluator:
-    """
-    Evaluate PLRS against baselines on held-out student sequences.
-    Parameters
-    ----------
-    pipeline : PLRSPipeline
-        A loaded pipeline (with or without SAKT model).
-    """
-    def __init__(self, pipeline) -> None:
-        self.pipeline = pipeline
-        self.curriculum = pipeline.curriculum
-    def evaluate(
-        self,
-        test_sequences: list[tuple[list[int], list[int]]],
-        skill_to_topic: dict[int, str] | None = None,
-        train_sequences: list[tuple[list[int], list[int]]] | None = None,
-        include_baselines: bool = True,
-    ) -> EvaluationReport:
-        """
-        Run full evaluation.
-        Parameters
-        ----------
-        test_sequences : list of (skill_seq, correct_seq)
-        skill_to_topic : dict mapping skill_id → curriculum topic_id
-        train_sequences : used to fit popularity baseline
-        include_baselines : whether to evaluate BKT and popularity baselines
-        Returns
-        -------
-        EvaluationReport
-        """
-        import datetime
-        kt_metrics: list[KTMetrics] = []
-        # ── SAKT evaluation ──────────────────────────────────────────
-        if self.pipeline._model is not None:
-            kt_metrics.append(
-                self._eval_sakt(test_sequences)
-            )
-        # ── Baselines ────────────────────────────────────────────────
-        if include_baselines:
-            kt_metrics.append(self._eval_random(test_sequences))
-            kt_metrics.append(self._eval_bkt(test_sequences))
-            pop = PopularityBaseline()
-            pop.fit(train_sequences or test_sequences, skill_to_topic)
-            kt_metrics.append(self._eval_popularity(test_sequences, pop))
-        # ── Recommendation metrics ───────────────────────────────────
-        rec_metrics = self._eval_recommendations(test_sequences, skill_to_topic)
-        return EvaluationReport(
-            kt_metrics=kt_metrics,
-            rec_metrics=rec_metrics,
-            config={
-                "threshold": self.pipeline.threshold,
-                "soft_threshold": self.pipeline.soft_threshold,
-                "top_n": self.pipeline.top_n,
-                "n_test_students": len(test_sequences),
-            },
-            timestamp=datetime.datetime.now().isoformat(),
-        )
-    # ── KT evaluation helpers ─────────────────────────────────────────────────
-    def _eval_sakt(self, sequences) -> KTMetrics:
-        t0 = time.time()
-        all_probs, all_labels = [], []
-        for skill_seq, correct_seq in sequences:
-            if len(skill_seq) < 2:
-                continue
-            probs = self.pipeline._model.predict_mastery(skill_seq, correct_seq)
-            for skill_id, prob in probs.items():
-                if skill_id < len(correct_seq):
-                    all_probs.append(prob)
-                    all_labels.append(float(correct_seq[skill_id]))
-        return self._compute_kt_metrics("SAKT", all_probs, all_labels, time.time() - t0)
-    def _eval_random(self, sequences) -> KTMetrics:
-        t0 = time.time()
-        all_probs, all_labels = [], []
-        for skill_seq, correct_seq in sequences:
-            for correct in correct_seq[1:]:
-                all_probs.append(0.5)
-                all_labels.append(float(correct))
-        return self._compute_kt_metrics("Random (baseline)", all_probs, all_labels, time.time() - t0)
-    def _eval_bkt(self, sequences) -> KTMetrics:
-        t0 = time.time()
-        all_probs, all_labels = [], []
-        bkt = BKTBaseline()
-        for skill_seq, correct_seq in sequences:
-            if len(skill_seq) < 2:
-                continue
-            probs = bkt.predict_sequence(skill_seq, correct_seq)
-            labels = [float(c) for c in correct_seq[1:len(probs) + 1]]
-            all_probs.extend(probs)
-            all_labels.extend(labels)
-        return self._compute_kt_metrics("BKT (baseline)", all_probs, all_labels, time.time() - t0)
-    def _eval_popularity(self, sequences, pop: PopularityBaseline) -> KTMetrics:
-        t0 = time.time()
-        all_probs, all_labels = [], []
-        for skill_seq, correct_seq in sequences:
-            for skill, correct in zip(skill_seq[1:], correct_seq[1:]):
-                all_probs.append(pop.predict_prob(skill))
-                all_labels.append(float(correct))
-        return self._compute_kt_metrics("Popularity (baseline)", all_probs, all_labels, time.time() - t0)
-    @staticmethod
-    def _compute_kt_metrics(name, probs, labels, elapsed) -> KTMetrics:
-        probs_arr  = np.nan_to_num(np.array(probs),  nan=0.5)
-        labels_arr = np.nan_to_num(np.array(labels), nan=0.0)
-        n = len(probs_arr)
-        if HAS_SKLEARN and n > 0 and len(np.unique(labels_arr)) > 1:
-            auc  = float(roc_auc_score(labels_arr, probs_arr))
-            acc  = float(accuracy_score(labels_arr, (probs_arr >= 0.5).astype(int)))
-            loss = float(log_loss(labels_arr, np.clip(probs_arr, 1e-7, 1 - 1e-7)))
-        else:
-            auc  = 0.5
-            acc  = float(((probs_arr >= 0.5) == labels_arr).mean()) if n > 0 else 0.0
-            loss = float(-np.mean(
-                labels_arr * np.log(probs_arr + 1e-7) +
-                (1 - labels_arr) * np.log(1 - probs_arr + 1e-7)
-            )) if n > 0 else 0.0
-        return KTMetrics(
-            model_name=name, auc=auc, accuracy=acc,
-            log_loss=loss, n_samples=n, elapsed_s=elapsed,
-        )
-    # ── Recommendation evaluation ─────────────────────────────────────────────
-    def _eval_recommendations(
-        self,
-        sequences,
-        skill_to_topic,
-    ) -> RecommendMetrics:
-        violation_rates, coverages, downstreams, mastery_rates = [], [], [], []
-        for skill_seq, correct_seq in sequences:
-            # Build mastery from sequence
-            if skill_to_topic:
-                topic_scores: dict[str, float] = {}
-                for skill, correct in zip(skill_seq, correct_seq):
-                    topic = skill_to_topic.get(skill)
-                    if topic and topic in self.curriculum.nodes:
-                        topic_scores[topic] = max(topic_scores.get(topic, 0.0), float(correct))
-                mastery_scores = {n: 0.0 for n in self.curriculum.nodes}
-                mastery_scores.update(topic_scores)
-            else:
-                mastery_scores = {n: 0.0 for n in self.curriculum.nodes}
-            results = self.pipeline.recommend_from_mastery(mastery_scores)
-            stats   = results["stats"]
-            summary = results["mastery_summary"]
-            violation_rates.append(stats["prerequisite_violation_rate"])
-            mastery_rates.append(summary["mastery_rate"])
-            # Coverage: fraction of curriculum represented in approved+challenging
-            rec_topics = set(
-                r["topic_id"] for r in results["approved"] + results["challenging"]
-            )
-            coverages.append(len(rec_topics) / max(self.curriculum.num_nodes, 1))
-            # Avg downstream unlock value
-            if results["approved"]:
-                downstreams.append(
-                    np.mean([r["downstream_count"] for r in results["approved"]])
-                )
-        return RecommendMetrics(
-            violation_rate=float(np.mean(violation_rates)) if violation_rates else 0.0,
-            coverage=float(np.mean(coverages)) if coverages else 0.0,
-            avg_downstream=float(np.mean(downstreams)) if downstreams else 0.0,
-            mastery_rate=float(np.mean(mastery_rates)) if mastery_rates else 0.0,
-        )

plrs/model/model_loader.py DELETED Viewed

@@ -1,116 +0,0 @@
-"""
-HF Space model loader — updated for SAKTWithDecay (v0.2.0 weights).
-Drop this file into your HF Space as `model_loader.py` and call
-`load_model_from_hub()` in app.py instead of the old loading logic.
-The v0.2.0 weights (sakt_decay_best.pt) are saved with our new format:
-    {
-        "state_dict": {...},
-        "model_type": "SAKTWithDecay",
-        "config": {"num_skills": 20, "embed_dim": 64, ...}
-    }
-Falls back gracefully to mastery-dict mode if weights can't be loaded.
-"""
-from __future__ import annotations
-import json
-from pathlib import Path
-import torch
-HF_REPO = "Clementio/PLRS"
-def load_model_from_hub(device: str = "cpu"):
-    """
-    Load SAKT model weights from HuggingFace Hub.
-    Tries files in priority order:
-      1. sakt_decay_best.pt    (v0.2.0 — decay attention)
-      2. sakt_vanilla_best.pt  (v0.2.0 — vanilla transformer)
-      3. sakt_model.pt         (v0.1.0 — synthetic baseline)
-    Returns (model, model_type_str) or (None, "unavailable").
-    """
-    try:
-        from huggingface_hub import hf_hub_download
-    except ImportError:
-        return None, "huggingface_hub not installed"
-    for filename, model_type in [
-        ("models/sakt_decay_best.pt",   "SAKTWithDecay"),
-        ("models/sakt_vanilla_best.pt", "SAKTModel"),
-        ("models/sakt_model.pt",        "SAKTModel"),
-    ]:
-        try:
-            path = hf_hub_download(repo_id=HF_REPO, filename=filename)
-            model = _load_weights(path, model_type, device)
-            if model is not None:
-                return model, model_type
-        except Exception:
-            continue
-    return None, "unavailable"
-def _load_weights(path: str, preferred_type: str, device: str):
-    """Load model weights from a .pt file, handling both old and new formats."""
-    try:
-        payload = torch.load(path, map_location=device, weights_only=False)
-    except Exception:
-        return None
-    # ── New format (v0.2.0): {"state_dict": ..., "model_type": ..., "config": ...}
-    if isinstance(payload, dict) and "state_dict" in payload:
-        cfg        = payload.get("config", {})
-        model_type = payload.get("model_type", preferred_type)
-        if model_type == "SAKTWithDecay":
-            from plrs.model.sakt_decay import SAKTWithDecay
-            model = SAKTWithDecay(
-                num_skills=cfg.get("num_skills", 5737),
-                embed_dim=cfg.get("embed_dim", 64),
-                num_heads=cfg.get("num_heads", 8),
-                dropout=cfg.get("dropout", 0.2),
-                max_seq_len=cfg.get("max_seq_len", 100),
-                decay_init=cfg.get("decay_init", 1.0),
-            )
-        else:
-            from plrs.model.sakt import SAKTModel
-            model = SAKTModel(
-                num_skills=cfg.get("num_skills", 5737),
-                embed_dim=cfg.get("embed_dim", 64),
-                num_heads=cfg.get("num_heads", 8),
-                dropout=cfg.get("dropout", 0.2),
-                max_seq_len=cfg.get("max_seq_len", 100),
-            )
-        try:
-            model.load_state_dict(payload["state_dict"], strict=False)
-            model.eval()
-            model.to(device)
-            return model
-        except Exception:
-            return None
-    # ── Old format (v0.1.0 FYP): raw state_dict + separate config.json
-    try:
-        config_path = Path(path).parent / "config.json"
-        if config_path.exists():
-            config = json.loads(config_path.read_text())
-        else:
-            config = {"num_skills": 5736, "embed_dim": 64}
-        from plrs.model.sakt import SAKTModel
-        model = SAKTModel(
-            num_skills=config.get("num_skills", 5736),
-            embed_dim=config.get("embed_dim", 64),
-        )
-        model.load_state_dict(payload, strict=False)
-        model.eval()
-        return model
-    except Exception:
-        return None

plrs/model/sakt.py DELETED Viewed

@@ -1,219 +0,0 @@
-"""
-plrs.model.sakt
-===============
-Self-Attentive Knowledge Tracing (SAKT) model.
-Architecture: transformer-style attention over student interaction sequences.
-Each interaction is encoded as (skill_id + correctness * n_skills).
-Reference: Pandey & Karypis, 2019 — "A Self-Attentive model for Knowledge Tracing"
-"""
-from __future__ import annotations
-from pathlib import Path
-from typing import Any
-import torch
-import torch.nn as nn
-class SAKTModel(nn.Module):
-    """
-    SAKT: Self-Attentive Knowledge Tracing.
-    Parameters
-    ----------
-    num_skills : int
-        Total number of unique skills in the dataset.
-    embed_dim : int
-        Embedding dimension for interactions and positions.
-    num_heads : int
-        Number of attention heads.
-    dropout : float
-        Dropout rate.
-    max_seq_len : int
-        Maximum interaction sequence length.
-    """
-    def __init__(
-        self,
-        num_skills: int,
-        embed_dim: int = 64,
-        num_heads: int = 8,
-        dropout: float = 0.2,
-        max_seq_len: int = 100,
-    ) -> None:
-        super().__init__()
-        self.num_skills = num_skills
-        self.embed_dim = embed_dim
-        self.max_seq_len = max_seq_len
-        # Interaction embedding: (skill, correct) → dense vector
-        self.interaction_embed = nn.Embedding(2 * num_skills + 2, embed_dim, padding_idx=0)  # +2: shift+1 means max index = 2*n+1
-        # Positional embedding
-        self.pos_embed = nn.Embedding(max_seq_len, embed_dim)
-        # Self-attention layer
-        self.self_attn = nn.MultiheadAttention(
-            embed_dim=embed_dim,
-            num_heads=num_heads,
-            dropout=dropout,
-            batch_first=True,
-        )
-        self.layer_norm1 = nn.LayerNorm(embed_dim)
-        self.layer_norm2 = nn.LayerNorm(embed_dim)
-        self.ffn = nn.Sequential(
-            nn.Linear(embed_dim, embed_dim * 2),
-            nn.ReLU(),
-            nn.Dropout(dropout),
-            nn.Linear(embed_dim * 2, embed_dim),
-        )
-        # Skill query embedding for target prediction
-        self.skill_embed = nn.Embedding(num_skills + 1, embed_dim, padding_idx=0)
-        self.output_layer = nn.Linear(embed_dim * 2, 1)
-        self.dropout = nn.Dropout(dropout)
-    def forward(
-        self,
-        interactions: torch.Tensor,   # (batch, seq_len)
-        target_skills: torch.Tensor,  # (batch, seq_len)
-        mask: torch.Tensor,           # (batch, seq_len) bool — True = real token
-    ) -> torch.Tensor:
-        """
-        Forward pass.
-        Returns
-        -------
-        torch.Tensor of shape (batch, seq_len) — logits per position.
-        """
-        batch_size, seq_len = interactions.shape
-        positions = torch.arange(seq_len, device=interactions.device).unsqueeze(0)
-        x = self.interaction_embed(interactions) + self.pos_embed(positions)
-        x = self.dropout(x)
-        # Causal mask — bool upper-triangular (MHA handles conversion internally)
-        causal_mask = torch.triu(
-            torch.ones(seq_len, seq_len, device=x.device, dtype=torch.bool),
-            diagonal=1,
-        )
-        # Key padding mask: True = ignore (PyTorch MHA convention)
-        key_padding_mask = ~mask  # (batch, seq_len) bool
-        x_attn, _ = self.self_attn(
-            query=x,
-            key=x,
-            value=x,
-            attn_mask=causal_mask,
-            key_padding_mask=key_padding_mask,
-        )
-        # Replace any NaN in attention output (from fully-masked rows) with 0
-        x_attn = torch.nan_to_num(x_attn, nan=0.0)
-        x = self.layer_norm1(x + x_attn)
-        x = self.layer_norm2(x + self.ffn(x))
-        # Concatenate with target skill embedding for final prediction
-        skill_x = self.skill_embed(target_skills)
-        out = self.output_layer(torch.cat([x, skill_x], dim=-1)).squeeze(-1)
-        return out  # (batch, seq_len) logits
-    # ------------------------------------------------------------------ #
-    # Inference helpers                                                    #
-    # ------------------------------------------------------------------ #
-    @torch.no_grad()
-    def predict_mastery(
-        self,
-        skill_seq: list[int],
-        correct_seq: list[int],
-        device: torch.device | str = "cpu",
-    ) -> dict[int, float]:
-        """
-        Run inference on a student's interaction history.
-        Parameters
-        ----------
-        skill_seq : list[int]
-            Sequence of skill IDs the student interacted with.
-        correct_seq : list[int]
-            Corresponding correctness (1 = correct, 0 = incorrect).
-        device : str or torch.device
-        Returns
-        -------
-        dict[int, float]
-            Mapping from skill_id → predicted mastery probability.
-        """
-        if len(skill_seq) < 2:
-            return {}
-        if len(skill_seq) > self.max_seq_len:
-            skill_seq = skill_seq[-self.max_seq_len:]
-            correct_seq = correct_seq[-self.max_seq_len:]
-        interactions = [s + c * self.num_skills + 1 for s, c in zip(skill_seq[:-1], correct_seq[:-1])]  # +1: reserve 0 for padding
-        target_skills = skill_seq[1:]
-        seq_len = len(interactions)
-        pad_len = self.max_seq_len - seq_len
-        interactions_padded = [0] * pad_len + interactions
-        target_padded = [0] * pad_len + target_skills
-        mask = [False] * pad_len + [True] * seq_len
-        interactions_t = torch.LongTensor([interactions_padded]).to(device)
-        target_t = torch.LongTensor([target_padded]).to(device)
-        mask_t = torch.BoolTensor([mask]).to(device)
-        self.eval()
-        self.to(device)
-        logits = self(interactions_t, target_t, mask_t)
-        probs = torch.sigmoid(logits).squeeze(0)
-        real_probs = probs[torch.BoolTensor(mask)].cpu().numpy()
-        mastery = {
-            int(skill_id): float(prob)
-            for skill_id, prob in zip(target_skills, real_probs)
-        }
-        return mastery
-    # ------------------------------------------------------------------ #
-    # Serialisation                                                        #
-    # ------------------------------------------------------------------ #
-    def save(self, path: str | Path, config: dict[str, Any] | None = None) -> None:
-        """Save model weights and config to a .pt file."""
-        payload = {
-            "state_dict": self.state_dict(),
-            "config": config or {
-                "num_skills": self.num_skills,
-                "embed_dim": self.embed_dim,
-                "max_seq_len": self.max_seq_len,
-            },
-        }
-        torch.save(payload, path)
-    @classmethod
-    def load(cls, path: str | Path, device: str | torch.device = "cpu") -> "SAKTModel":
-        """Load a saved SAKT model."""
-        payload = torch.load(path, map_location=device, weights_only=False)
-        config = payload["config"]
-        model = cls(
-            num_skills=config["num_skills"],
-            embed_dim=config.get("embed_dim", 64),
-            num_heads=config.get("num_heads", 8),
-            dropout=config.get("dropout", 0.2),
-            max_seq_len=config.get("max_seq_len", 100),
-        )
-        model.load_state_dict(payload["state_dict"])
-        model.to(device)
-        model.eval()
-        return model

plrs/model/sakt_decay.py DELETED Viewed

@@ -1,253 +0,0 @@
-"""
-plrs.model.sakt_decay
-=====================
-SAKT with Ebbinghaus Forgetting Curve Decay.
-Extends the base SAKT model by applying exponential temporal decay to
-attention weights, reflecting that older interactions contribute less to
-current mastery estimates.
-The decay function follows the Ebbinghaus retention curve:
-    R(t) = exp(-t / decay_rate)
-Where t is the time gap between interaction j and the current position i,
-measured in interaction steps (or elapsed time if timestamps are available).
-This typically improves val AUC by 0.01–0.02 over vanilla SAKT.
-"""
-from __future__ import annotations
-import math
-from pathlib import Path
-from typing import Any
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-class DecayAttention(nn.Module):
-    """
-    Multi-head attention with Ebbinghaus forgetting curve decay.
-    Applies position-based temporal decay to attention logits before softmax:
-        attention_logits[i, j] -= decay_rate_learned * log(1 + |i - j|)
-    The decay rate is a learned scalar per head, initialised from a prior.
-    """
-    def __init__(
-        self,
-        embed_dim: int,
-        num_heads: int,
-        dropout: float = 0.2,
-        decay_init: float = 1.0,
-    ) -> None:
-        super().__init__()
-        self.embed_dim = embed_dim
-        self.num_heads = num_heads
-        self.head_dim  = embed_dim // num_heads
-        assert embed_dim % num_heads == 0, "embed_dim must be divisible by num_heads"
-        self.q_proj = nn.Linear(embed_dim, embed_dim)
-        self.k_proj = nn.Linear(embed_dim, embed_dim)
-        self.v_proj = nn.Linear(embed_dim, embed_dim)
-        self.out_proj = nn.Linear(embed_dim, embed_dim)
-        self.dropout = nn.Dropout(dropout)
-        # Learned decay rate per head — initialised to decay_init
-        # Constrained positive via softplus during forward
-        self.decay_logit = nn.Parameter(
-            torch.full((num_heads,), math.log(math.exp(decay_init) - 1))
-        )
-    def forward(
-        self,
-        x: torch.Tensor,               # (batch, seq_len, embed_dim)
-        causal_mask: torch.Tensor,      # (seq_len, seq_len) bool — True = block
-        key_padding_mask: torch.Tensor, # (batch, seq_len) bool — True = pad
-    ) -> torch.Tensor:
-        B, L, D = x.shape
-        H, Hd = self.num_heads, self.head_dim
-        Q = self.q_proj(x).view(B, L, H, Hd).transpose(1, 2)  # (B, H, L, Hd)
-        K = self.k_proj(x).view(B, L, H, Hd).transpose(1, 2)
-        V = self.v_proj(x).view(B, L, H, Hd).transpose(1, 2)
-        # Scaled dot-product attention scores
-        scale = math.sqrt(self.head_dim)
-        scores = torch.matmul(Q, K.transpose(-2, -1)) / scale  # (B, H, L, L)
-        # ── Ebbinghaus decay ──────────────────────────────────────── #
-        # Build temporal distance matrix: dist[i, j] = |i - j|
-        positions = torch.arange(L, device=x.device)
-        dist = (positions.unsqueeze(0) - positions.unsqueeze(1)).abs().float()  # (L, L)
-        # decay = softplus(decay_logit) ensures strictly positive rates
-        decay_rate = F.softplus(self.decay_logit)  # (H,)
-        # Decay penalty: rate_h * log(1 + dist)  — shape (H, L, L)
-        decay_penalty = decay_rate.view(H, 1, 1) * torch.log1p(dist).unsqueeze(0)
-        scores = scores - decay_penalty.unsqueeze(0)  # broadcast over batch
-        # ─────────────────────────────────────────────────────────── #
-        # Apply causal mask
-        scores = scores.masked_fill(causal_mask.unsqueeze(0).unsqueeze(0), -1e9)
-        # Apply padding mask
-        if key_padding_mask is not None:
-            scores = scores.masked_fill(
-                key_padding_mask.unsqueeze(1).unsqueeze(2), -1e9
-            )
-        attn = F.softmax(scores, dim=-1)
-        attn = self.dropout(attn)
-        out = torch.matmul(attn, V)                          # (B, H, L, Hd)
-        out = out.transpose(1, 2).contiguous().view(B, L, D) # (B, L, D)
-        return self.out_proj(out)
-class SAKTWithDecay(nn.Module):
-    """
-    SAKT + Ebbinghaus Forgetting Curve Decay.
-    Drop-in replacement for SAKTModel with improved AUC through
-    temporal decay attention. All other architecture details are identical.
-    Parameters
-    ----------
-    num_skills : int
-    embed_dim : int
-    num_heads : int
-    dropout : float
-    max_seq_len : int
-    decay_init : float
-        Initial decay rate (higher = faster forgetting). Default 1.0.
-    """
-    def __init__(
-        self,
-        num_skills: int,
-        embed_dim: int = 64,
-        num_heads: int = 8,
-        dropout: float = 0.2,
-        max_seq_len: int = 100,
-        decay_init: float = 1.0,
-    ) -> None:
-        super().__init__()
-        self.num_skills  = num_skills
-        self.embed_dim   = embed_dim
-        self.max_seq_len = max_seq_len
-        self.interaction_embed = nn.Embedding(2 * num_skills + 2, embed_dim, padding_idx=0)  # +2: shift+1 means max index = 2*n+1
-        self.pos_embed         = nn.Embedding(max_seq_len, embed_dim)
-        # Decay-aware attention replaces nn.MultiheadAttention
-        self.decay_attn = DecayAttention(embed_dim, num_heads, dropout, decay_init)
-        self.layer_norm1 = nn.LayerNorm(embed_dim)
-        self.layer_norm2 = nn.LayerNorm(embed_dim)
-        self.ffn = nn.Sequential(
-            nn.Linear(embed_dim, embed_dim * 2),
-            nn.ReLU(),
-            nn.Dropout(dropout),
-            nn.Linear(embed_dim * 2, embed_dim),
-        )
-        self.skill_embed  = nn.Embedding(num_skills + 1, embed_dim, padding_idx=0)
-        self.output_layer = nn.Linear(embed_dim * 2, 1)
-        self.dropout      = nn.Dropout(dropout)
-    def forward(
-        self,
-        interactions: torch.Tensor,
-        target_skills: torch.Tensor,
-        mask: torch.Tensor,
-    ) -> torch.Tensor:
-        B, L = interactions.shape
-        positions = torch.arange(L, device=interactions.device).unsqueeze(0)
-        x = self.interaction_embed(interactions) + self.pos_embed(positions)
-        x = self.dropout(x)
-        causal_mask = torch.triu(
-            torch.ones(L, L, device=x.device, dtype=torch.bool), diagonal=1
-        )
-        key_padding_mask = ~mask  # True = ignore
-        x_attn = self.decay_attn(x, causal_mask, key_padding_mask)
-        x = self.layer_norm1(x + x_attn)
-        x = self.layer_norm2(x + self.ffn(x))
-        skill_x = self.skill_embed(target_skills)
-        out = self.output_layer(torch.cat([x, skill_x], dim=-1)).squeeze(-1)
-        return out
-    @torch.no_grad()
-    def predict_mastery(
-        self,
-        skill_seq: list[int],
-        correct_seq: list[int],
-        device: torch.device | str = "cpu",
-    ) -> dict[int, float]:
-        """Same interface as SAKTModel.predict_mastery."""
-        if len(skill_seq) < 2:
-            return {}
-        if len(skill_seq) > self.max_seq_len:
-            skill_seq  = skill_seq[-self.max_seq_len:]
-            correct_seq = correct_seq[-self.max_seq_len:]
-        interactions  = [s + c * self.num_skills + 1 for s, c in zip(skill_seq[:-1], correct_seq[:-1])]  # +1: reserve 0 for padding
-        target_skills = skill_seq[1:]
-        seq_len = len(interactions)
-        pad_len = self.max_seq_len - seq_len
-        interactions_padded = [0] * pad_len + interactions
-        target_padded       = [0] * pad_len + target_skills
-        mask_list           = [False] * pad_len + [True] * seq_len
-        self.eval()
-        self.to(device)
-        logits = self(
-            torch.LongTensor([interactions_padded]).to(device),
-            torch.LongTensor([target_padded]).to(device),
-            torch.BoolTensor([mask_list]).to(device),
-        )
-        probs = torch.sigmoid(logits).squeeze(0)
-        real_probs = probs[torch.BoolTensor(mask_list)].cpu().numpy()
-        return {int(sid): float(p) for sid, p in zip(target_skills, real_probs)}
-    def save(self, path: str | Path, config: dict[str, Any] | None = None) -> None:
-        payload = {
-            "state_dict": self.state_dict(),
-            "model_type": "SAKTWithDecay",
-            "config": config or {
-                "num_skills":  self.num_skills,
-                "embed_dim":   self.embed_dim,
-                "max_seq_len": self.max_seq_len,
-                "model_type":  "SAKTWithDecay",
-            },
-        }
-        torch.save(payload, path)
-    @classmethod
-    def load(cls, path: str | Path, device: str | torch.device = "cpu") -> "SAKTWithDecay":
-        payload = torch.load(path, map_location=device, weights_only=False)
-        cfg = payload["config"]
-        model = cls(
-            num_skills=cfg["num_skills"],
-            embed_dim=cfg.get("embed_dim", 64),
-            num_heads=cfg.get("num_heads", 8),
-            dropout=cfg.get("dropout", 0.2),
-            max_seq_len=cfg.get("max_seq_len", 100),
-            decay_init=cfg.get("decay_init", 1.0),
-        )
-        model.load_state_dict(payload["state_dict"])
-        model.to(device)
-        model.eval()
-        return model

plrs/model/trainer.py DELETED Viewed

@@ -1,437 +0,0 @@
-"""
-plrs.model.trainer
-==================
-Training loop for the SAKT knowledge tracing model.
-Handles:
-  - Dataset preparation from raw interaction logs
-  - Train / validation split
-  - Training with early stopping
-  - Checkpoint saving (best val AUC)
-  - Metrics: AUC, accuracy, loss
-Expected input format (CSV or DataFrame):
-    student_id | skill_id | correct | timestamp (optional)
-"""
-from __future__ import annotations
-import time
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Iterator
-import numpy as np
-import torch
-import torch.nn as nn
-from torch.utils.data import DataLoader, Dataset
-try:
-    from sklearn.metrics import roc_auc_score
-    HAS_SKLEARN = True
-except ImportError:
-    HAS_SKLEARN = False
-# ------------------------------------------------------------------ #
-# Dataset                                                             #
-# ------------------------------------------------------------------ #
-class KTDataset(Dataset):
-    """
-    Knowledge Tracing dataset.
-    Each sample is one student's full interaction sequence, windowed to
-    max_seq_len. Long sequences are split into multiple windows.
-    Parameters
-    ----------
-    sequences : list of (skill_seq, correct_seq)
-        Each element is a tuple of parallel lists.
-    max_seq_len : int
-    n_skills : int
-    """
-    def __init__(
-        self,
-        sequences: list[tuple[list[int], list[int]]],
-        max_seq_len: int = 100,
-        n_skills: int = 5736,
-    ) -> None:
-        self.max_seq_len = max_seq_len
-        self.n_skills = n_skills
-        self.samples: list[tuple[list[int], list[int]]] = []
-        for skill_seq, correct_seq in sequences:
-            # Window long sequences
-            for start in range(0, max(1, len(skill_seq) - 1), max_seq_len // 2):
-                end = start + max_seq_len + 1
-                s = skill_seq[start:end]
-                c = correct_seq[start:end]
-                if len(s) >= 2:
-                    self.samples.append((s, c))
-    def __len__(self) -> int:
-        return len(self.samples)
-    def __getitem__(self, idx: int) -> dict[str, torch.Tensor]:
-        skill_seq, correct_seq = self.samples[idx]
-        if len(skill_seq) > self.max_seq_len + 1:
-            skill_seq = skill_seq[-self.max_seq_len - 1:]
-            correct_seq = correct_seq[-self.max_seq_len - 1:]
-        interactions = [s + c * self.n_skills + 1 for s, c in zip(skill_seq[:-1], correct_seq[:-1])]  # +1: reserve 0 for padding
-        target_skills = skill_seq[1:]
-        target_correct = correct_seq[1:]
-        seq_len = len(interactions)
-        pad_len = self.max_seq_len - seq_len
-        interactions_padded = [0] * pad_len + interactions
-        target_padded       = [0] * pad_len + target_skills
-        correct_padded      = [0] * pad_len + target_correct
-        mask                = [False] * pad_len + [True] * seq_len
-        return {
-            "interactions":    torch.LongTensor(interactions_padded),
-            "target_skills":   torch.LongTensor(target_padded),
-            "target_correct":  torch.FloatTensor(correct_padded),
-            "mask":            torch.BoolTensor(mask),
-        }
-def collate_fn(batch: list[dict]) -> dict[str, torch.Tensor]:
-    return {k: torch.stack([b[k] for b in batch]) for k in batch[0]}
-# ------------------------------------------------------------------ #
-# Trainer config                                                      #
-# ------------------------------------------------------------------ #
-@dataclass
-class TrainerConfig:
-    # Model
-    num_skills:   int   = 5736
-    embed_dim:    int   = 64
-    num_heads:    int   = 8
-    dropout:      float = 0.2
-    max_seq_len:  int   = 100
-    # Training
-    epochs:       int   = 50
-    batch_size:   int   = 64
-    lr:           float = 1e-3
-    weight_decay: float = 1e-5
-    val_split:    float = 0.1
-    # Early stopping
-    patience:     int   = 5
-    min_delta:    float = 1e-4
-    # Output
-    output_dir:   str   = "checkpoints"
-    run_name:     str   = "sakt_run"
-    # Device
-    device:       str   = "auto"   # "auto" | "cpu" | "cuda" | "mps"
-# ------------------------------------------------------------------ #
-# Trainer                                                             #
-# ------------------------------------------------------------------ #
-@dataclass
-class EpochMetrics:
-    epoch: int
-    train_loss: float
-    val_loss: float
-    val_auc: float
-    val_acc: float
-    elapsed: float
-class SAKTTrainer:
-    """
-    Trainer for the SAKT knowledge tracing model.
-    Parameters
-    ----------
-    config : TrainerConfig
-    """
-    def __init__(self, config: TrainerConfig) -> None:
-        self.config = config
-        self.device = self._resolve_device(config.device)
-        self.output_dir = Path(config.output_dir)
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-    # ---------------------------------------------------------------- #
-    # Public API                                                        #
-    # ---------------------------------------------------------------- #
-    def fit(
-        self,
-        sequences: list[tuple[list[int], list[int]]],
-        val_sequences: list[tuple[list[int], list[int]]] | None = None,
-    ) -> list[EpochMetrics]:
-        """
-        Train the SAKT model on interaction sequences.
-        Parameters
-        ----------
-        sequences : list of (skill_seq, correct_seq)
-            Training data. Each element is a student's full history.
-        val_sequences : list of (skill_seq, correct_seq), optional
-            If None, val_split fraction of sequences is held out.
-        Returns
-        -------
-        list[EpochMetrics] — training history
-        """
-        from plrs.model.sakt import SAKTModel
-        cfg = self.config
-        # Split if no explicit val set
-        if val_sequences is None:
-            n_val = max(1, int(len(sequences) * cfg.val_split))
-            idx = np.random.permutation(len(sequences))
-            val_sequences  = [sequences[i] for i in idx[:n_val]]
-            train_sequences = [sequences[i] for i in idx[n_val:]]
-        else:
-            train_sequences = sequences
-        print(f"Training samples : {len(train_sequences)} students")
-        print(f"Validation samples: {len(val_sequences)} students")
-        print(f"Device: {self.device}")
-        train_ds = KTDataset(train_sequences, cfg.max_seq_len, cfg.num_skills)
-        val_ds   = KTDataset(val_sequences,   cfg.max_seq_len, cfg.num_skills)
-        train_loader = DataLoader(
-            train_ds, batch_size=cfg.batch_size, shuffle=True,
-            collate_fn=collate_fn, num_workers=0,
-        )
-        val_loader = DataLoader(
-            val_ds, batch_size=cfg.batch_size * 2, shuffle=False,
-            collate_fn=collate_fn, num_workers=0,
-        )
-        model = SAKTModel(
-            num_skills=cfg.num_skills,
-            embed_dim=cfg.embed_dim,
-            num_heads=cfg.num_heads,
-            dropout=cfg.dropout,
-            max_seq_len=cfg.max_seq_len,
-        ).to(self.device)
-        print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
-        optimizer = torch.optim.Adam(
-            model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay
-        )
-        # Zero out NaN gradients that arise from softmax backward over fully-padded rows.
-        # This is a known issue with nn.MultiheadAttention + bool key_padding_mask.
-        # The hook is safe: it only zeroes truly NaN gradients, never valid ones.
-        def _zero_nan_grad(grad: torch.Tensor) -> torch.Tensor:
-            return torch.nan_to_num(grad, nan=0.0)
-        for p in model.parameters():
-            p.register_hook(_zero_nan_grad)
-        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
-            optimizer, mode="max", patience=2, factor=0.5
-        )
-        criterion = nn.BCEWithLogitsLoss()
-        history: list[EpochMetrics] = []
-        best_auc = 0.0
-        patience_counter = 0
-        best_path = self.output_dir / f"{cfg.run_name}_best.pt"
-        print(f"\n{'Epoch':>6} {'Train Loss':>11} {'Val Loss':>9} {'Val AUC':>9} {'Val Acc':>9} {'Time':>7}")
-        print("-" * 58)
-        for epoch in range(1, cfg.epochs + 1):
-            t0 = time.time()
-            train_loss = self._train_epoch(model, train_loader, optimizer, criterion)
-            val_loss, val_auc, val_acc = self._val_epoch(model, val_loader, criterion)
-            scheduler.step(val_auc)
-            elapsed = time.time() - t0
-            metrics = EpochMetrics(
-                epoch=epoch,
-                train_loss=train_loss,
-                val_loss=val_loss,
-                val_auc=val_auc,
-                val_acc=val_acc,
-                elapsed=elapsed,
-            )
-            history.append(metrics)
-            print(
-                f"{epoch:>6} {train_loss:>11.4f} {val_loss:>9.4f} "
-                f"{val_auc:>9.4f} {val_acc:>9.4f} {elapsed:>6.1f}s"
-            )
-            # Save best
-            if val_auc > best_auc + cfg.min_delta:
-                best_auc = val_auc
-                patience_counter = 0
-                model.save(best_path, config=self._model_config())
-                print(f"         ✅ New best AUC: {best_auc:.4f} → saved to {best_path}")
-            else:
-                patience_counter += 1
-                if patience_counter >= cfg.patience:
-                    print(f"\nEarly stopping at epoch {epoch} (patience={cfg.patience})")
-                    break
-        print(f"\nTraining complete. Best val AUC: {best_auc:.4f}")
-        print(f"Best model: {best_path}")
-        return history
-    # ---------------------------------------------------------------- #
-    # Internal                                                          #
-    # ---------------------------------------------------------------- #
-    def _train_epoch(self, model, loader, optimizer, criterion) -> float:
-        model.train()
-        total_loss = 0.0
-        for batch in loader:
-            interactions   = batch["interactions"].to(self.device)
-            target_skills  = batch["target_skills"].to(self.device)
-            target_correct = batch["target_correct"].to(self.device)
-            mask           = batch["mask"].to(self.device)
-            optimizer.zero_grad()
-            logits = model(interactions, target_skills, mask)
-            # Only compute loss on real (non-padded) positions
-            real_logits  = logits[mask]
-            real_targets = target_correct[mask]
-            loss = criterion(real_logits, real_targets)
-            loss.backward()
-            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
-            optimizer.step()
-            total_loss += loss.item()
-        return total_loss / max(len(loader), 1)
-    @torch.no_grad()
-    def _val_epoch(self, model, loader, criterion) -> tuple[float, float, float]:
-        model.eval()
-        total_loss = 0.0
-        all_probs: list[float] = []
-        all_labels: list[float] = []
-        for batch in loader:
-            interactions   = batch["interactions"].to(self.device)
-            target_skills  = batch["target_skills"].to(self.device)
-            target_correct = batch["target_correct"].to(self.device)
-            mask           = batch["mask"].to(self.device)
-            logits = model(interactions, target_skills, mask)
-            real_logits  = logits[mask]
-            real_targets = target_correct[mask]
-            loss = criterion(real_logits, real_targets)
-            total_loss += loss.item()
-            probs = torch.sigmoid(real_logits).cpu().numpy()
-            labels = real_targets.cpu().numpy()
-            all_probs.extend(probs.tolist())
-            all_labels.extend(labels.tolist())
-        avg_loss = total_loss / max(len(loader), 1)
-        all_probs_arr  = np.array(all_probs)
-        all_labels_arr = np.array(all_labels)
-        # Guard against NaN (can occur with very small val sets)
-        all_probs_arr  = np.nan_to_num(all_probs_arr,  nan=0.5)
-        all_labels_arr = np.nan_to_num(all_labels_arr, nan=0.0)
-        if HAS_SKLEARN and len(np.unique(all_labels_arr)) > 1:
-            auc = float(roc_auc_score(all_labels_arr, all_probs_arr))
-        else:
-            auc = 0.5  # fallback (single class or no sklearn)
-        acc = float(((all_probs_arr >= 0.5) == all_labels_arr).mean())
-        return avg_loss, auc, acc
-    def _model_config(self) -> dict:
-        cfg = self.config
-        return {
-            "num_skills":  cfg.num_skills,
-            "embed_dim":   cfg.embed_dim,
-            "num_heads":   cfg.num_heads,
-            "dropout":     cfg.dropout,
-            "max_seq_len": cfg.max_seq_len,
-        }
-    @staticmethod
-    def _resolve_device(device: str) -> torch.device:
-        if device == "auto":
-            if torch.cuda.is_available():
-                return torch.device("cuda")
-            if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-                return torch.device("mps")
-            return torch.device("cpu")
-        return torch.device(device)
-# ------------------------------------------------------------------ #
-# Utilities                                                           #
-# ------------------------------------------------------------------ #
-def load_sequences_from_csv(
-    path: str | Path,
-    student_col: str = "student_id",
-    skill_col: str = "skill_id",
-    correct_col: str = "correct",
-    timestamp_col: str | None = "timestamp",
-    min_seq_len: int = 5,
-) -> list[tuple[list[int], list[int]]]:
-    """
-    Load student interaction sequences from a CSV file.
-    Parameters
-    ----------
-    path : str or Path
-        CSV with columns: student_id, skill_id, correct, [timestamp]
-    student_col, skill_col, correct_col : str
-        Column names.
-    timestamp_col : str or None
-        If provided, sort interactions by this column within each student.
-    min_seq_len : int
-        Drop students with fewer than this many interactions.
-    Returns
-    -------
-    list of (skill_seq, correct_seq) tuples
-    """
-    import pandas as pd
-    df = pd.read_csv(path)
-    required = [student_col, skill_col, correct_col]
-    missing = [c for c in required if c not in df.columns]
-    if missing:
-        raise ValueError(f"Missing columns in CSV: {missing}. Found: {df.columns.tolist()}")
-    if timestamp_col and timestamp_col in df.columns:
-        df = df.sort_values([student_col, timestamp_col])
-    sequences = []
-    for _, group in df.groupby(student_col):
-        skills   = group[skill_col].astype(int).tolist()
-        corrects = group[correct_col].astype(int).tolist()
-        if len(skills) >= min_seq_len:
-            sequences.append((skills, corrects))
-    print(f"Loaded {len(sequences)} student sequences from {path}")
-    return sequences

plrs/pipeline.py DELETED Viewed

@@ -1,236 +0,0 @@
-"""
-plrs.pipeline
-=============
-PLRSPipeline: the main entry point.
-Orchestrates SAKT inference → DAG constraint validation → multi-objective ranking.
-Usage
------
-    from plrs import PLRSPipeline
-    from plrs.curriculum import load_dag
-    curriculum = load_dag("math_dag.json")
-    pipeline   = PLRSPipeline(curriculum, model_path="sakt_model.pt")
-    # From raw interaction history
-    results = pipeline.recommend_from_history(
-        skill_seq=[12, 45, 3, 78],
-        correct_seq=[1, 0, 1, 1],
-    )
-    # From pre-computed mastery dict
-    results = pipeline.recommend_from_mastery(
-        mastery_scores={"algebra_basics": 0.85, "quadratic_equations": 0.42}
-    )
-"""
-from __future__ import annotations
-from pathlib import Path
-from typing import Any
-from plrs.constraints.dag import DAGConstraintLayer, MasteryVector
-from plrs.curriculum.loader import CurriculumGraph
-from plrs.ranking.ranker import MultiObjectiveRanker, RankedRecommendation
-class PLRSPipeline:
-    """
-    End-to-end PLRS recommendation pipeline.
-    Parameters
-    ----------
-    curriculum : CurriculumGraph
-    model_path : str or Path, optional
-        Path to a trained SAKT .pt file. If None, only mastery-dict mode is available.
-    threshold : float
-        Mastery threshold (default 0.70).
-    soft_threshold : float
-        Soft constraint threshold (default 0.50).
-    top_n : int
-        Number of top approved recommendations (default 5).
-    w_gap, w_readiness, w_downstream : float
-        Ranker objective weights.
-    device : str
-        PyTorch device for model inference (default "cpu").
-    """
-    def __init__(
-        self,
-        curriculum: CurriculumGraph,
-        model_path: str | Path | None = None,
-        threshold: float = 0.70,
-        soft_threshold: float = 0.50,
-        top_n: int = 5,
-        w_gap: float = 0.4,
-        w_readiness: float = 0.4,
-        w_downstream: float = 0.2,
-        device: str = "cpu",
-    ) -> None:
-        self.curriculum = curriculum
-        self.threshold = threshold
-        self.soft_threshold = soft_threshold
-        self.top_n = top_n
-        self.device = device
-        self.constraint_layer = DAGConstraintLayer(curriculum)
-        self.ranker = MultiObjectiveRanker(
-            curriculum,
-            w_gap=w_gap,
-            w_readiness=w_readiness,
-            w_downstream=w_downstream,
-        )
-        self._model = None
-        if model_path is not None:
-            self._load_model(model_path)
-    # ------------------------------------------------------------------ #
-    # Public API                                                           #
-    # ------------------------------------------------------------------ #
-    def recommend_from_mastery(
-        self,
-        mastery_scores: dict[str, float],
-        cascade: bool = False,
-    ) -> dict[str, Any]:
-        """
-        Generate recommendations from a pre-computed mastery dict.
-        Parameters
-        ----------
-        mastery_scores : dict[str, float]
-            Mapping from topic_id → mastery probability [0, 1].
-        cascade : bool
-            If True, propagate mastery upward through prerequisites.
-        Returns
-        -------
-        dict with keys: approved, challenging, vetoed, stats, mastery_summary
-        """
-        mastery = self._build_mastery_vector(mastery_scores)
-        if cascade:
-            mastery.cascade_up()
-        return self._run(mastery)
-    def recommend_from_history(
-        self,
-        skill_seq: list[int],
-        correct_seq: list[int],
-        skill_to_topic: dict[int, str] | None = None,
-        cascade: bool = False,
-    ) -> dict[str, Any]:
-        """
-        Generate recommendations from raw student interaction history.
-        Requires a loaded SAKT model (pass model_path to __init__).
-        Parameters
-        ----------
-        skill_seq : list[int]
-            Sequence of skill IDs from the student's history.
-        correct_seq : list[int]
-            Corresponding correctness flags (1/0).
-        skill_to_topic : dict[int, str], optional
-            Mapping from SAKT skill_id → curriculum topic_id.
-            Required to map model output back to DAG nodes.
-        cascade : bool
-            If True, propagate mastery upward through prerequisites.
-        Returns
-        -------
-        dict with keys: approved, challenging, vetoed, stats, mastery_summary
-        """
-        if self._model is None:
-            raise RuntimeError(
-                "No model loaded. Pass model_path to PLRSPipeline() to use history-based inference."
-            )
-        skill_probs = self._model.predict_mastery(skill_seq, correct_seq, device=self.device)
-        if skill_to_topic:
-            mastery_scores = {}
-            for skill_id, prob in skill_probs.items():
-                topic_id = skill_to_topic.get(skill_id)
-                if topic_id:
-                    mastery_scores[topic_id] = max(mastery_scores.get(topic_id, 0.0), prob)
-        else:
-            # Without mapping, return raw skill probabilities (limited utility)
-            mastery_scores = {str(k): v for k, v in skill_probs.items()}
-        mastery = self._build_mastery_vector(mastery_scores)
-        if cascade:
-            mastery.cascade_up()
-        return self._run(mastery)
-    def what_if(self, topic_id: str) -> dict[str, Any]:
-        """
-        What-if analysis: what unlocks if a student masters this topic?
-        Parameters
-        ----------
-        topic_id : str
-        Returns
-        -------
-        dict with direct_unlocks, all_unlocks, blocked_by, total_unlocked
-        """
-        graph = self.curriculum.graph
-        direct = self.curriculum.successors(topic_id)
-        all_unlocks = self.curriculum.descendants(topic_id)
-        blocked_by = self.curriculum.prerequisites(topic_id)
-        return {
-            "topic_id": topic_id,
-            "topic_label": self.curriculum.label(topic_id),
-            "direct_unlocks": [
-                {"id": n, "label": self.curriculum.label(n)} for n in direct
-            ],
-            "all_unlocks": [
-                {"id": n, "label": self.curriculum.label(n)} for n in all_unlocks
-            ],
-            "blocked_by": [
-                {"id": n, "label": self.curriculum.label(n)} for n in blocked_by
-            ],
-            "total_unlocked": len(all_unlocks),
-        }
-    # ------------------------------------------------------------------ #
-    # Internal helpers                                                     #
-    # ------------------------------------------------------------------ #
-    def _build_mastery_vector(self, mastery_scores: dict[str, float]) -> MasteryVector:
-        mv = MasteryVector(self.curriculum, self.threshold, self.soft_threshold)
-        mv.update_batch(mastery_scores)
-        return mv
-    def _run(self, mastery: MasteryVector) -> dict[str, Any]:
-        constraint_results = self.constraint_layer.validate_all(mastery)
-        ranked = self.ranker.rank(constraint_results, mastery, top_n=self.top_n)
-        ranked["mastery_summary"] = mastery.summary()
-        # Serialise to plain dicts for API/JSON friendliness
-        for key in ("approved", "challenging", "vetoed"):
-            ranked[key] = [self._rec_to_dict(r) for r in ranked[key]]
-        return ranked
-    def _load_model(self, path: str | Path) -> None:
-        from plrs.model.sakt import SAKTModel
-        self._model = SAKTModel.load(path, device=self.device)
-    @staticmethod
-    def _rec_to_dict(rec: RankedRecommendation) -> dict[str, Any]:
-        return {
-            "topic_id": rec.topic_id,
-            "topic_label": rec.topic_label,
-            "status": rec.status,
-            "mastery": rec.mastery,
-            "score": rec.score,
-            "reasoning": rec.reasoning,
-            "prerequisites": rec.prerequisites,
-            "unmet_prerequisites": rec.unmet_prerequisites,
-            "downstream_count": rec.downstream_count,
-            "score_breakdown": rec.score_breakdown,
-        }

plrs/ranking/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from plrs.ranking.ranker import MultiObjectiveRanker, RankedRecommendation
-__all__ = ["MultiObjectiveRanker", "RankedRecommendation"]

plrs/ranking/ranker.py DELETED Viewed

@@ -1,189 +0,0 @@
-"""
-plrs.ranking.ranker
-===================
-Multi-objective ranking function for approved/challenging topics.
-Scoring signals:
-  1. Mastery gap       — how close the student is to mastering this topic
-  2. Readiness         — fraction of prerequisites met
-  3. Downstream value  — how many future topics this unlocks (normalised)
-Weights are configurable. Default: gap=0.4, readiness=0.4, downstream=0.2
-"""
-from __future__ import annotations
-from dataclasses import dataclass
-import networkx as nx
-from plrs.constraints.dag import ConstraintResult, MasteryVector
-from plrs.curriculum.loader import CurriculumGraph
-@dataclass
-class RankedRecommendation:
-    topic_id: str
-    topic_label: str
-    status: str          # "approved" | "challenging"
-    mastery: float
-    score: float
-    reasoning: str
-    prerequisites: list[str]
-    unmet_prerequisites: list[str]
-    downstream_count: int
-    score_breakdown: dict[str, float]
-class MultiObjectiveRanker:
-    """
-    Ranks constraint-validated topics by a weighted combination of signals.
-    Parameters
-    ----------
-    curriculum : CurriculumGraph
-    w_gap : float
-        Weight for mastery gap signal (default 0.4).
-    w_readiness : float
-        Weight for prerequisite readiness signal (default 0.4).
-    w_downstream : float
-        Weight for downstream unlock value (default 0.2).
-    """
-    def __init__(
-        self,
-        curriculum: CurriculumGraph,
-        w_gap: float = 0.4,
-        w_readiness: float = 0.4,
-        w_downstream: float = 0.2,
-    ) -> None:
-        self.curriculum = curriculum
-        self.w_gap = w_gap
-        self.w_readiness = w_readiness
-        self.w_downstream = w_downstream
-        # Pre-compute downstream counts (expensive on large graphs; cache it)
-        self._downstream_counts = self._compute_downstream_counts()
-        max_d = max(self._downstream_counts.values(), default=1)
-        self._downstream_norm = {
-            node: count / max(max_d, 1)
-            for node, count in self._downstream_counts.items()
-        }
-    def _compute_downstream_counts(self) -> dict[str, int]:
-        return {
-            node: len(nx.descendants(self.curriculum.graph, node))
-            for node in self.curriculum.nodes
-        }
-    def score(self, result: ConstraintResult, mastery: MasteryVector) -> float:
-        """Compute composite score for a single topic."""
-        topic_id = result.topic_id
-        # 1. Mastery gap: student is close but not mastered → higher priority
-        gap = max(0.0, mastery.threshold - mastery.get(topic_id))
-        gap_score = gap / mastery.threshold  # normalise to [0, 1]
-        # 2. Readiness: fraction of prerequisites above soft threshold
-        prereqs = self.curriculum.prerequisites(topic_id)
-        if prereqs:
-            readiness = sum(
-                1 for p in prereqs if mastery.get(p) >= mastery.soft_threshold
-            ) / len(prereqs)
-        else:
-            readiness = 1.0
-        # 3. Downstream value
-        downstream = self._downstream_norm.get(topic_id, 0.0)
-        score = (
-            self.w_gap * gap_score
-            + self.w_readiness * readiness
-            + self.w_downstream * downstream
-        )
-        return round(score, 4)
-    def rank(
-        self,
-        results: list[ConstraintResult],
-        mastery: MasteryVector,
-        top_n: int = 5,
-        challenging_penalty: float = 0.8,
-    ) -> dict[str, list[RankedRecommendation]]:
-        """
-        Rank a list of constraint results into approved / challenging / vetoed.
-        Parameters
-        ----------
-        results : list[ConstraintResult]
-        mastery : MasteryVector
-        top_n : int
-            Number of top approved recommendations to return.
-        challenging_penalty : float
-            Score multiplier applied to challenging topics (default 0.8).
-        Returns
-        -------
-        dict with keys: "approved", "challenging", "vetoed", "stats"
-        """
-        approved: list[RankedRecommendation] = []
-        challenging: list[RankedRecommendation] = []
-        vetoed: list[RankedRecommendation] = []
-        for result in results:
-            # Skip already-mastered topics
-            if mastery.is_mastered(result.topic_id):
-                continue
-            base_score = self.score(result, mastery)
-            topic_id = result.topic_id
-            breakdown = {
-                "gap": round(
-                    self.w_gap * max(0.0, mastery.threshold - mastery.get(topic_id)) / mastery.threshold, 4
-                ),
-                "readiness": round(self.w_readiness * (
-                    sum(1 for p in self.curriculum.prerequisites(topic_id)
-                        if mastery.get(p) >= mastery.soft_threshold)
-                    / max(len(self.curriculum.prerequisites(topic_id)), 1)
-                ), 4),
-                "downstream": round(self.w_downstream * self._downstream_norm.get(topic_id, 0.0), 4),
-            }
-            rec = RankedRecommendation(
-                topic_id=result.topic_id,
-                topic_label=result.topic_label,
-                status=result.status,
-                mastery=round(result.mastery, 3),
-                score=round(base_score * (challenging_penalty if result.status == "challenging" else 1.0), 4),
-                reasoning=result.reasoning,
-                prerequisites=result.prerequisites,
-                unmet_prerequisites=result.unmet_prerequisites,
-                downstream_count=self._downstream_counts.get(result.topic_id, 0),
-                score_breakdown=breakdown,
-            )
-            if result.status == "approved":
-                approved.append(rec)
-            elif result.status == "challenging":
-                challenging.append(rec)
-            else:
-                vetoed.append(rec)
-        approved.sort(key=lambda r: r.score, reverse=True)
-        challenging.sort(key=lambda r: r.score, reverse=True)
-        total = len(results)
-        return {
-            "approved": approved[:top_n],
-            "challenging": challenging[:3],
-            "vetoed": vetoed[:5],
-            "stats": {
-                "total_topics": total,
-                "approved_count": len(approved),
-                "challenging_count": len(challenging),
-                "vetoed_count": len(vetoed),
-                "prerequisite_violation_rate": round(len(vetoed) / max(total, 1), 3),
-            },
-        }

requirements.txt CHANGED Viewed

@@ -1,9 +1,7 @@
-streamlit>=1.33.0
-torch>=2.0.0
 pandas>=2.0.0
 numpy>=1.24.0
-networkx>=3.0
 scikit-learn>=1.3.0
 huggingface_hub>=0.20.0
-fastapi>=0.110.0
-pydantic>=2.0

+streamlit>=1.32.0
+torch>=2.9.0
 pandas>=2.0.0
 numpy>=1.24.0
+networkx>=3.1
 scikit-learn>=1.3.0
 huggingface_hub>=0.20.0

sakt_decay_best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:673a79d8b9cd8a6cb4bb1081817a806c23f15ff54708e5546750e564bfc728f0
+size 171713

models/sakt_model.pt → sakt_model.pt RENAMED Viewed

File without changes

sakt_vanilla_best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b93a62b1132e17dde47ba1667e881a20b9614eaf3caad4494642cf2fbce8c2b
+size 171713

training_curves.png ADDED Viewed

Git LFS Details

SHA256: e7f03e155e938d1707609f0cf7c50469eee6b2a0bd4733becc2e2bdc8721970e
Pointer size: 131 Bytes
Size of remote file: 130 kB