#!/usr/bin/env python3 """ PLRS — Logic Engine v0.2.0 HuggingFace Space entry point (self-contained). Loads all assets from HuggingFace Hub (Clementio/PLRS). Works without local files — everything is downloaded on first run. """ import json import sys from pathlib import Path from typing import Dict, List, Optional, Tuple import numpy as np import pandas as pd import streamlit as st import torch import torch.nn as nn import networkx as nx # ── HuggingFace Hub helpers ───────────────────────────────────────────────── from huggingface_hub import hf_hub_download HF_REPO = "Clementio/PLRS" def _hub_download(filename: str) -> str: """Download a file from the HF repo, cached.""" return hf_hub_download(repo_id=HF_REPO, filename=filename) # ═════════════════════════════════════════════════════════════════════════════ # INLINE: plrs.model.sakt (SAKTModel — v0.1.0 architecture) # ═════════════════════════════════════════════════════════════════════════════ class SAKTModel(nn.Module): """Self-Attentive Knowledge Tracing (SAKT) — PyTorch implementation.""" def __init__( self, num_skills: int, embed_dim: int = 128, num_heads: int = 8, num_layers: int = 2, max_seq_len: int = 200, dropout: float = 0.2, ): super().__init__() self.num_skills = num_skills self.interaction_embed = nn.Embedding(num_skills * 2 + 1, embed_dim, padding_idx=0) self.skill_embed = nn.Embedding(num_skills + 1, embed_dim, padding_idx=0) self.pos_embed = nn.Embedding(max_seq_len + 1, embed_dim) encoder_layer = nn.TransformerEncoderLayer( d_model=embed_dim, nhead=num_heads, dropout=dropout, batch_first=True, dim_feedforward=embed_dim * 4, norm_first=True, ) self.transformer = nn.TransformerEncoder( encoder_layer, num_layers=num_layers, enable_nested_tensor=False ) self.dropout = nn.Dropout(dropout) self.output = nn.Linear(embed_dim, 1) def forward(self, interactions, target_skills, mask): batch_size, seq_len = interactions.shape positions = torch.arange(seq_len, device=interactions.device).unsqueeze(0).expand(batch_size, -1) x = self.interaction_embed(interactions) x = x + self.pos_embed(positions) x = x * mask.unsqueeze(-1).float() x = self.dropout(x) causal_mask = torch.triu(torch.full((seq_len, seq_len), float("-inf"), device=interactions.device), diagonal=1) x = self.transformer(x, mask=causal_mask, is_causal=False) x = x * mask.unsqueeze(-1).float() x = x + self.skill_embed(target_skills) return self.output(x).squeeze(-1) # ═════════════════════════════════════════════════════════════════════════════ # INLINE: plrs.model.sakt_decay (SAKTWithDecay — v0.2.0) # ═════════════════════════════════════════════════════════════════════════════ class SAKTWithDecay(nn.Module): """SAKT with time-decay attention (v0.2.0).""" def __init__( self, num_skills: int, embed_dim: int = 128, num_heads: int = 8, num_layers: int = 2, max_seq_len: int = 200, dropout: float = 0.2, decay_init: float = 1.0, ): super().__init__() self.num_skills = num_skills self.embed_dim = embed_dim self.decay = nn.Parameter(torch.tensor(decay_init)) self.interaction_embed = nn.Embedding(num_skills * 2 + 1, embed_dim, padding_idx=0) self.skill_embed = nn.Embedding(num_skills + 1, embed_dim, padding_idx=0) self.pos_embed = nn.Embedding(max_seq_len + 1, embed_dim) encoder_layer = nn.TransformerEncoderLayer( d_model=embed_dim, nhead=num_heads, dropout=dropout, batch_first=True, dim_feedforward=embed_dim * 4, norm_first=True, ) self.transformer = nn.TransformerEncoder( encoder_layer, num_layers=num_layers, enable_nested_tensor=False ) self.dropout = nn.Dropout(dropout) self.output = nn.Linear(embed_dim, 1) def forward(self, interactions, target_skills, mask): batch_size, seq_len = interactions.shape positions = torch.arange(seq_len, device=interactions.device).unsqueeze(0).expand(batch_size, -1) x = self.interaction_embed(interactions) x = x + self.pos_embed(positions) # Apply decay to older positions decay_weights = torch.exp(-self.decay * torch.arange(seq_len, device=interactions.device).float()) x = x * decay_weights.view(1, seq_len, 1) x = x * mask.unsqueeze(-1).float() x = self.dropout(x) causal_mask = torch.triu(torch.full((seq_len, seq_len), float("-inf"), device=interactions.device), diagonal=1) x = self.transformer(x, mask=causal_mask, is_causal=False) x = x * mask.unsqueeze(-1).float() x = x + self.skill_embed(target_skills) return self.output(x).squeeze(-1) # ═════════════════════════════════════════════════════════════════════════════ # INLINE: plrs.curriculum.loader + Curriculum class # ═════════════════════════════════════════════════════════════════════════════ class Curriculum: """Wrapper around a NetworkX DiGraph for prerequisite knowledge maps.""" def __init__(self, graph: nx.DiGraph, domain: str = ""): self._g = graph self.domain = domain @property def nodes(self) -> List[str]: return list(self._g.nodes()) @property def num_nodes(self) -> int: return self._g.number_of_nodes() @property def num_edges(self) -> int: return self._g.number_of_edges() def label(self, node: str) -> str: return self._g.nodes[node].get("label", node) def level(self, node: str) -> str: return self._g.nodes[node].get("level", "") def prerequisites(self, node: str) -> List[str]: return list(self._g.predecessors(node)) def successors(self, node: str) -> List[str]: return list(self._g.successors(node)) def descendants(self, node: str) -> List[str]: return list(nx.descendants(self._g, node)) def load_dag(path: str, domain: str = "") -> Curriculum: with open(path) as f: data = json.load(f) g = nx.DiGraph() for node in data["nodes"]: g.add_node(node["id"], label=node.get("label", node["id"]), level=node.get("level", ""), term=node.get("term", "")) for edge in data["edges"]: g.add_edge(edge["from"], edge["to"]) assert nx.is_directed_acyclic_graph(g), "Cycle detected in knowledge map!" return Curriculum(g, domain=domain) # ═════════════════════════════════════════════════════════════════════════════ # INLINE: plrs.pipeline (PLRSPipeline) # ═════════════════════════════════════════════════════════════════════════════ class PLRSPipeline: """Three-layer recommendation pipeline: SAKT → DAG constraints → Ranking.""" def __init__(self, curriculum: Curriculum, threshold: float = 0.70, soft_threshold: float = 0.50, top_n: int = 5): self.curriculum = curriculum self.threshold = threshold self.soft_threshold = soft_threshold self.top_n = top_n self._model = None # Pre-compute downstream importance scores scores = {n: len(self.curriculum.descendants(n)) for n in self.curriculum.nodes} mx = max(scores.values()) if scores else 1 self._downstream = {n: s / mx for n, s in scores.items()} def recommend_from_mastery(self, mastery_scores: Dict[str, float]) -> Dict: approved, challenging, vetoed = [], [], [] for node in self.curriculum.nodes: status, reasoning, unmet = self._validate(node, mastery_scores) current = mastery_scores.get(node, 0.0) entry = { "topic_id": node, "topic_label": self.curriculum.label(node), "mastery": round(current, 3), "reasoning": reasoning, "status": status, "unmet_prerequisites": unmet, "downstream_count": len(self.curriculum.descendants(node)), } if status == "approved" and current < self.threshold: entry["score"] = self._rank(node, mastery_scores) approved.append(entry) elif status == "challenging" and current < self.threshold: entry["score"] = self._rank(node, mastery_scores) * 0.8 challenging.append(entry) elif status == "vetoed": vetoed.append(entry) approved.sort(key=lambda x: x["score"], reverse=True) challenging.sort(key=lambda x: x["score"], reverse=True) mastered = [n for n in self.curriculum.nodes if mastery_scores.get(n, 0.0) >= self.threshold] return { "approved": approved[: self.top_n], "challenging": challenging[:3], "vetoed": vetoed[:5], "mastery_summary": { "total_topics": self.curriculum.num_nodes, "mastered": len(mastered), "mastery_rate": round(len(mastered) / max(self.curriculum.num_nodes, 1), 3), }, "stats": { "approved_count": len(approved), "challenging_count": len(challenging), "vetoed_count": len(vetoed), "prerequisite_violation_rate": round(len(vetoed) / max(self.curriculum.num_nodes, 1), 3), }, } def _validate(self, node: str, mastery: Dict[str, float]) -> Tuple[str, str, List[str]]: prereqs = self.curriculum.prerequisites(node) if not prereqs: return "approved", "✅ Foundational topic — no prerequisites.", [] hard_fails = [] soft_fails = [] for p in prereqs: m = mastery.get(p, 0.0) if m < self.soft_threshold: hard_fails.append(p) elif m < self.threshold: soft_fails.append(p) if hard_fails: labels = [self.curriculum.label(p) for p in hard_fails] return "vetoed", f"❌ Prerequisites not met: {', '.join(labels)}", labels elif soft_fails: labels = [self.curriculum.label(p) for p in soft_fails] return "challenging", f"⚠️ Challenging — prerequisites nearly met: {', '.join(labels)}", labels else: return "approved", "✅ All prerequisites mastered.", [] def _rank(self, node: str, mastery: Dict[str, float]) -> float: current = mastery.get(node, 0.0) gap = min(max(0.0, self.threshold - current) / self.threshold, 1.0) prereqs = self.curriculum.prerequisites(node) readiness = 1.0 if not prereqs else sum(1 for p in prereqs if mastery.get(p, 0.0) >= self.threshold) / len(prereqs) downstream = self._downstream.get(node, 0.0) boost = 0.0 if 0.10 <= current < self.threshold: boost = 0.15 * (current / self.threshold) return round(0.40 * gap + 0.35 * readiness + 0.25 * downstream + boost, 3) def what_if(self, node: str) -> Dict: direct = [{"id": s, "label": self.curriculum.label(s)} for s in self.curriculum.successors(node)] all_down = [{"id": d, "label": self.curriculum.label(d)} for d in self.curriculum.descendants(node)] blocked = [{"id": p, "label": self.curriculum.label(p)} for p in self.curriculum.prerequisites(node)] return { "direct_unlocks": direct, "all_unlocks": all_down, "blocked_by": blocked, "total_unlocked": len(all_down), } # ═════════════════════════════════════════════════════════════════════════════ # MODEL LOADER (from your model_loader.py, adapted for inline use) # ═════════════════════════════════════════════════════════════════════════════ def load_model_from_hub(device: str = "cpu"): """ Load SAKT model from HuggingFace Hub. Tries v0.2.0 (SAKTWithDecay) first, then v0.1.0 (SAKTModel). Returns (model, model_type_str) or (None, "unavailable"). """ for filename, model_type in [ ("models/sakt_decay_best.pt", "SAKTWithDecay"), ("models/sakt_vanilla_best.pt", "SAKTModel"), ("models/sakt_model.pt", "SAKTModel"), ("sakt_model.pt", "SAKTModel"), ]: try: path = _hub_download(filename) payload = torch.load(path, map_location=device, weights_only=False) # v0.2.0 format: {"state_dict": ..., "model_type": ..., "config": ...} if isinstance(payload, dict) and "state_dict" in payload: cfg = payload.get("config", {}) if model_type == "SAKTWithDecay": model = SAKTWithDecay( num_skills=cfg.get("num_skills", 5737), embed_dim=cfg.get("embed_dim", 128), num_heads=cfg.get("num_heads", 8), dropout=cfg.get("dropout", 0.2), max_seq_len=cfg.get("max_seq_len", 200), decay_init=cfg.get("decay_init", 1.0), ) else: model = SAKTModel( num_skills=cfg.get("num_skills", 5737), embed_dim=cfg.get("embed_dim", 128), num_heads=cfg.get("num_heads", 8), dropout=cfg.get("dropout", 0.2), max_seq_len=cfg.get("max_seq_len", 200), ) model.load_state_dict(payload["state_dict"], strict=False) model.eval() model.to(device) return model, model_type # v0.1.0 format: raw state_dict + separate config.json else: config_path = _hub_download("config.json") with open(config_path) as f: config = json.load(f) model = SAKTModel( num_skills=config.get("num_skills", 5737), embed_dim=config.get("embed_dim", 128), num_heads=config.get("num_heads", 8), dropout=config.get("dropout", 0.2), max_seq_len=config.get("max_seq_len", 200), ) model.load_state_dict(payload, strict=False) model.eval() model.to(device) return model, "SAKTModel" except Exception: continue return None, "unavailable" # ═════════════════════════════════════════════════════════════════════════════ # STREAMLIT UI (your exact design, adapted for self-contained backend) # ═════════════════════════════════════════════════════════════════════════════ st.set_page_config( page_title="PLRS · Logic Engine", page_icon="🧠", layout="wide", initial_sidebar_state="expanded", ) st.markdown(""" """, unsafe_allow_html=True) # ── Load everything from HF Hub ───────────────────────────────────────────── @st.cache_resource(show_spinner="Loading curriculum & model from HuggingFace...") def load_pipelines(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Download knowledge maps math_path = _hub_download("data/knowledge_maps/math_dag.json") cs_path = _hub_download("data/knowledge_maps/cs_dag.json") math_curriculum = load_dag(math_path, domain="Nigerian SS Mathematics") cs_curriculum = load_dag(cs_path, domain="CS Fundamentals") # Load model (v0.2.0 or v0.1.0) model, model_type = load_model_from_hub(device=str(device)) pipelines = {} for key, curriculum in [("math", math_curriculum), ("cs", cs_curriculum)]: pipeline = PLRSPipeline(curriculum) if model is not None: pipeline._model = model pipelines[key] = pipeline return pipelines, model is not None, model_type @st.cache_data def load_skill_encoder(): """Download and load the skill encoder CSV.""" try: path = _hub_download("data/skill_encoder_v2.csv") return pd.read_csv(path) except Exception: return None pipelines, has_model, model_type = load_pipelines() skill_encoder = load_skill_encoder() # ── Sidebar ───────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("### 🧠 PLRS") st.markdown('

LOGIC ENGINE v0.2.0

', unsafe_allow_html=True) if has_model: st.markdown(f'

● {model_type} LOADED

', unsafe_allow_html=True) else: st.markdown('

● MANUAL MODE

', unsafe_allow_html=True) st.markdown("---") domain_label = st.selectbox("Curriculum", ["Nigerian SS Mathematics", "CS Fundamentals"]) domain_key = "math" if "Mathematics" in domain_label else "cs" pipeline = pipelines[domain_key] curriculum = pipeline.curriculum st.markdown("---") threshold = st.slider("Mastery threshold", 0.50, 0.90, 0.70, 0.05) soft_threshold = st.slider("Challenging threshold", 0.20, 0.65, 0.50, 0.05) top_n = st.slider("Top N recommendations", 3, 10, 5) pipeline.threshold = threshold pipeline.soft_threshold = soft_threshold pipeline.top_n = top_n st.markdown("---") st.markdown(f'

NODES: {curriculum.num_nodes}

', unsafe_allow_html=True) st.markdown(f'

EDGES: {curriculum.num_edges}

', unsafe_allow_html=True) st.markdown(f'

MODEL: {model_type}

', unsafe_allow_html=True) st.markdown(f'

VIOLATION RATE: 0.0%

', unsafe_allow_html=True) st.markdown("---") st.markdown('

github.com/clementina-tom/plrs

', unsafe_allow_html=True) # ── Header ────────────────────────────────────────────────────────────────── st.markdown("""
Logic Engine Personalized Learning · Constraint-Aware · SAKT + DAG
""", unsafe_allow_html=True) # ── Tabs ──────────────────────────────────────────────────────────────────── tab1, tab2, tab3 = st.tabs(["RECOMMENDATIONS", "WHAT-IF SIMULATOR", "CURRICULUM MAP"]) ACTIVITY_TO_DOMAIN = { "math": { "oucontent": "algebraic_expressions", "forumng": "statistics_basic", "homepage": "whole_numbers", "subpage": "plane_shapes", "resource": "indices", "url": "number_bases", "ouwiki": "proportion_variation", "glossary": "algebraic_factorization", "quiz": "quadratic_equations", }, "cs": { "oucontent": "programming_concepts", "forumng": "ethics_technology", "homepage": "computer_basics", "subpage": "html_basics", "resource": "networking_fundamentals", "url": "internet_basics", "ouwiki": "cloud_basics", "glossary": "intro_databases", "quiz": "python_basics", }, } # ════════════════════════════════════════════════════════════════════════════ # TAB 1 — RECOMMENDATIONS # ════════════════════════════════════════════════════════════════════════════ with tab1: col_left, col_right = st.columns([1, 1.4], gap="large") with col_left: st.markdown('
Learner Profile
', unsafe_allow_html=True) mode = st.radio("Input mode", ["Manual sliders", "Simulate student"], horizontal=True, label_visibility="collapsed") mastery_scores = {} if mode == "Manual sliders": for node in curriculum.nodes: label = curriculum.label(node) level = curriculum.level(node) val = st.slider( f"{label}", 0.0, 1.0, 0.0, 0.05, key=f"mastery_{node}", help=f"Level: {level}" ) mastery_scores[node] = val else: seq_len = st.slider("Sequence length", 10, 200, 50) seed = st.number_input("Student seed", 1, 9999, 42) np.random.seed(int(seed)) mapping = ACTIVITY_TO_DOMAIN[domain_key] if skill_encoder is not None: available_skills = skill_encoder["skill_id"].tolist() sim_skills = np.random.choice(available_skills, seq_len).tolist() else: n_skills = 5736 sim_skills = np.random.randint(0, n_skills, seq_len).tolist() sim_corrects = np.random.randint(0, 2, seq_len).tolist() topic_scores = {} for skill_id, correct in zip(sim_skills, sim_corrects): if skill_encoder is not None: row = skill_encoder[skill_encoder["skill_id"] == skill_id] if not row.empty: act = row["activity_type"].values[0] else: continue else: # Statistical fallback logic activity_types = list(mapping.keys()) activity_probs = [0.38, 0.20, 0.15, 0.10, 0.06, 0.04, 0.03, 0.02, 0.02] act_idx = skill_id % 100 cumulative = 0 thresholds = [int(p * 100) for p in activity_probs] thresholds[-1] += 100 - sum(thresholds) act = activity_types[-1] for a, thresh in zip(activity_types, thresholds): cumulative += thresh if act_idx < cumulative: act = a break topic_id = mapping.get(act) if topic_id and topic_id in curriculum.nodes: prob = float(correct) * 0.6 + 0.1 + np.random.random() * 0.3 topic_scores[topic_id] = max(topic_scores.get(topic_id, 0.0), min(prob, 1.0)) mastery_scores = {n: 0.0 for n in curriculum.nodes} mastery_scores.update(topic_scores) st.success(f"Simulated {seq_len} real interactions → {len(topic_scores)} topics mapped") if topic_scores: st.markdown('
Mapped Mastery Signal
', unsafe_allow_html=True) for tid, score in sorted(topic_scores.items(), key=lambda x: -x[1]): pct = int(score * 100) color = "#22c55e" if score >= threshold else "#f59e0b" if score >= soft_threshold else "#ef4444" st.markdown(f"""
{curriculum.label(tid)} {pct}%
""", unsafe_allow_html=True) run = st.button("⚡ Generate Recommendations", type="primary", use_container_width=True) with col_right: if run or mode == "Simulate student": results = pipeline.recommend_from_mastery(mastery_scores) summary = results["mastery_summary"] stats = results["stats"] mastery_pct = int(summary["mastery_rate"] * 100) vrate_pct = int(stats["prerequisite_violation_rate"] * 100) st.markdown(f"""
Mastered
{summary['mastered']}/{summary['total_topics']}
{mastery_pct}% rate
Approved
{stats['approved_count']}
ready to learn
Challenging
{stats['challenging_count']}
partial prereqs
Violation rate
{vrate_pct}%
blocked topics
""", unsafe_allow_html=True) if results["approved"]: st.markdown('
✅ Approved Recommendations
', unsafe_allow_html=True) for i, rec in enumerate(results["approved"]): score_pct = int(rec["score"] * 100) st.markdown(f"""
{i+1}. {rec['topic_label']}
score: {rec['score']:.3f}  ·  mastery: {int(rec['mastery']*100)}%  ·  unlocks: {rec['downstream_count']}
{rec['reasoning']}
""", unsafe_allow_html=True) else: st.info("No approved topics — lower the mastery threshold or set some mastery levels.") if results["challenging"]: st.markdown('
⚠️ Challenging
', unsafe_allow_html=True) for rec in results["challenging"]: score_pct = int(rec["score"] * 100) unmet = ", ".join(rec["unmet_prerequisites"]) or "—" st.markdown(f"""
{rec['topic_label']}
score: {rec['score']:.3f}  ·  strengthen: {unmet}
{rec['reasoning']}
""", unsafe_allow_html=True) if results["vetoed"]: with st.expander(f"❌ Vetoed topics ({stats['vetoed_count']} total — prerequisite check failed)"): for rec in results["vetoed"]: unmet = ", ".join(rec["unmet_prerequisites"]) or "—" st.markdown(f"""
{rec['topic_label']}
blocked by: {unmet}
""", unsafe_allow_html=True) else: st.markdown("""
SET MASTERY LEVELS · THEN GENERATE
""", unsafe_allow_html=True) # ════════════════════════════════════════════════════════════════════════════ # TAB 2 — WHAT-IF SIMULATOR # ════════════════════════════════════════════════════════════════════════════ with tab2: st.markdown('
Prerequisite Impact Simulator
', unsafe_allow_html=True) st.markdown('

Select any topic to see what it unlocks and what currently blocks it.

', unsafe_allow_html=True) node_options = {curriculum.label(n): n for n in curriculum.nodes} selected_label = st.selectbox("Select topic", list(node_options.keys())) selected_id = node_options[selected_label] wi = pipeline.what_if(selected_id) col_a, col_b = st.columns(2, gap="large") with col_a: st.markdown('
🔓 What This Unlocks
', unsafe_allow_html=True) if wi["direct_unlocks"]: st.markdown("**Directly unlocks:**") st.markdown("".join(f'{u["label"]}' for u in wi["direct_unlocks"]), unsafe_allow_html=True) else: st.markdown('Leaf node — no further topics.', unsafe_allow_html=True) if wi["all_unlocks"]: st.markdown(f"**All downstream ({wi['total_unlocked']}):**") st.markdown("".join(f'{u["label"]}' for u in wi["all_unlocks"]), unsafe_allow_html=True) st.markdown(f"""
Total Unlocked
{wi['total_unlocked']}
""", unsafe_allow_html=True) with col_b: st.markdown('
🔒 What Blocks This
', unsafe_allow_html=True) if wi["blocked_by"]: st.markdown("**Prerequisites:**") st.markdown("".join(f'{b["label"]}' for b in wi["blocked_by"]), unsafe_allow_html=True) else: st.markdown('Root topic — no prerequisites.', unsafe_allow_html=True) # ════════════════════════════════════════════════════════════════════════════ # TAB 3 — CURRICULUM MAP # ════════════════════════════════════════════════════════════════════════════ with tab3: st.markdown('
Curriculum Knowledge Graph
', unsafe_allow_html=True) col_info, col_table = st.columns([1, 2], gap="large") with col_info: roots = [n for n in curriculum.nodes if not curriculum.prerequisites(n)] leaves = [n for n in curriculum.nodes if not curriculum.successors(n)] st.markdown(f"""
Domain
{curriculum.domain}
Topics
{curriculum.num_nodes}
Prerequisite Edges
{curriculum.num_edges}
""", unsafe_allow_html=True) st.markdown('
Root Topics
', unsafe_allow_html=True) st.markdown("".join(f'{curriculum.label(r)}' for r in roots), unsafe_allow_html=True) st.markdown('
Leaf Topics
', unsafe_allow_html=True) st.markdown("".join(f'{curriculum.label(l)}' for l in leaves), unsafe_allow_html=True) with col_table: st.markdown('
All Topics
', unsafe_allow_html=True) rows = [] for node in curriculum.nodes: rows.append({ "Topic": curriculum.label(node), "Level": curriculum.level(node), "Prerequisites": len(curriculum.prerequisites(node)), "Unlocks (direct)": len(curriculum.successors(node)), "Total Downstream": len(curriculum.descendants(node)), }) df = pd.DataFrame(rows).sort_values("Total Downstream", ascending=False) st.dataframe(df, use_container_width=True, height=480, hide_index=True)