#!/usr/bin/env python3 """ PLRS — Logic Engine v0.2.0 HuggingFace Space entry point (self-contained). Loads all assets from HuggingFace Hub (Clementio/PLRS). Works without local files — everything is downloaded on first run. """ import json import sys from pathlib import Path from typing import Dict, List, Optional, Tuple import numpy as np import pandas as pd import streamlit as st import torch import torch.nn as nn import networkx as nx # ── HuggingFace Hub helpers ───────────────────────────────────────────────── from huggingface_hub import hf_hub_download HF_REPO = "Clementio/PLRS" def _hub_download(filename: str) -> str: """Download a file from the HF repo, cached.""" return hf_hub_download(repo_id=HF_REPO, filename=filename) # ═════════════════════════════════════════════════════════════════════════════ # INLINE: plrs.model.sakt (SAKTModel — v0.1.0 architecture) # ═════════════════════════════════════════════════════════════════════════════ class SAKTModel(nn.Module): """Self-Attentive Knowledge Tracing (SAKT) — PyTorch implementation.""" def __init__( self, num_skills: int, embed_dim: int = 128, num_heads: int = 8, num_layers: int = 2, max_seq_len: int = 200, dropout: float = 0.2, ): super().__init__() self.num_skills = num_skills self.interaction_embed = nn.Embedding(num_skills * 2 + 1, embed_dim, padding_idx=0) self.skill_embed = nn.Embedding(num_skills + 1, embed_dim, padding_idx=0) self.pos_embed = nn.Embedding(max_seq_len + 1, embed_dim) encoder_layer = nn.TransformerEncoderLayer( d_model=embed_dim, nhead=num_heads, dropout=dropout, batch_first=True, dim_feedforward=embed_dim * 4, norm_first=True, ) self.transformer = nn.TransformerEncoder( encoder_layer, num_layers=num_layers, enable_nested_tensor=False ) self.dropout = nn.Dropout(dropout) self.output = nn.Linear(embed_dim, 1) def forward(self, interactions, target_skills, mask): batch_size, seq_len = interactions.shape positions = torch.arange(seq_len, device=interactions.device).unsqueeze(0).expand(batch_size, -1) x = self.interaction_embed(interactions) x = x + self.pos_embed(positions) x = x * mask.unsqueeze(-1).float() x = self.dropout(x) causal_mask = torch.triu(torch.full((seq_len, seq_len), float("-inf"), device=interactions.device), diagonal=1) x = self.transformer(x, mask=causal_mask, is_causal=False) x = x * mask.unsqueeze(-1).float() x = x + self.skill_embed(target_skills) return self.output(x).squeeze(-1) # ═════════════════════════════════════════════════════════════════════════════ # INLINE: plrs.model.sakt_decay (SAKTWithDecay — v0.2.0) # ═════════════════════════════════════════════════════════════════════════════ class SAKTWithDecay(nn.Module): """SAKT with time-decay attention (v0.2.0).""" def __init__( self, num_skills: int, embed_dim: int = 128, num_heads: int = 8, num_layers: int = 2, max_seq_len: int = 200, dropout: float = 0.2, decay_init: float = 1.0, ): super().__init__() self.num_skills = num_skills self.embed_dim = embed_dim self.decay = nn.Parameter(torch.tensor(decay_init)) self.interaction_embed = nn.Embedding(num_skills * 2 + 1, embed_dim, padding_idx=0) self.skill_embed = nn.Embedding(num_skills + 1, embed_dim, padding_idx=0) self.pos_embed = nn.Embedding(max_seq_len + 1, embed_dim) encoder_layer = nn.TransformerEncoderLayer( d_model=embed_dim, nhead=num_heads, dropout=dropout, batch_first=True, dim_feedforward=embed_dim * 4, norm_first=True, ) self.transformer = nn.TransformerEncoder( encoder_layer, num_layers=num_layers, enable_nested_tensor=False ) self.dropout = nn.Dropout(dropout) self.output = nn.Linear(embed_dim, 1) def forward(self, interactions, target_skills, mask): batch_size, seq_len = interactions.shape positions = torch.arange(seq_len, device=interactions.device).unsqueeze(0).expand(batch_size, -1) x = self.interaction_embed(interactions) x = x + self.pos_embed(positions) # Apply decay to older positions decay_weights = torch.exp(-self.decay * torch.arange(seq_len, device=interactions.device).float()) x = x * decay_weights.view(1, seq_len, 1) x = x * mask.unsqueeze(-1).float() x = self.dropout(x) causal_mask = torch.triu(torch.full((seq_len, seq_len), float("-inf"), device=interactions.device), diagonal=1) x = self.transformer(x, mask=causal_mask, is_causal=False) x = x * mask.unsqueeze(-1).float() x = x + self.skill_embed(target_skills) return self.output(x).squeeze(-1) # ═════════════════════════════════════════════════════════════════════════════ # INLINE: plrs.curriculum.loader + Curriculum class # ═════════════════════════════════════════════════════════════════════════════ class Curriculum: """Wrapper around a NetworkX DiGraph for prerequisite knowledge maps.""" def __init__(self, graph: nx.DiGraph, domain: str = ""): self._g = graph self.domain = domain @property def nodes(self) -> List[str]: return list(self._g.nodes()) @property def num_nodes(self) -> int: return self._g.number_of_nodes() @property def num_edges(self) -> int: return self._g.number_of_edges() def label(self, node: str) -> str: return self._g.nodes[node].get("label", node) def level(self, node: str) -> str: return self._g.nodes[node].get("level", "") def prerequisites(self, node: str) -> List[str]: return list(self._g.predecessors(node)) def successors(self, node: str) -> List[str]: return list(self._g.successors(node)) def descendants(self, node: str) -> List[str]: return list(nx.descendants(self._g, node)) def load_dag(path: str, domain: str = "") -> Curriculum: with open(path) as f: data = json.load(f) g = nx.DiGraph() for node in data["nodes"]: g.add_node(node["id"], label=node.get("label", node["id"]), level=node.get("level", ""), term=node.get("term", "")) for edge in data["edges"]: g.add_edge(edge["from"], edge["to"]) assert nx.is_directed_acyclic_graph(g), "Cycle detected in knowledge map!" return Curriculum(g, domain=domain) # ═════════════════════════════════════════════════════════════════════════════ # INLINE: plrs.pipeline (PLRSPipeline) # ═════════════════════════════════════════════════════════════════════════════ class PLRSPipeline: """Three-layer recommendation pipeline: SAKT → DAG constraints → Ranking.""" def __init__(self, curriculum: Curriculum, threshold: float = 0.70, soft_threshold: float = 0.50, top_n: int = 5): self.curriculum = curriculum self.threshold = threshold self.soft_threshold = soft_threshold self.top_n = top_n self._model = None # Pre-compute downstream importance scores scores = {n: len(self.curriculum.descendants(n)) for n in self.curriculum.nodes} mx = max(scores.values()) if scores else 1 self._downstream = {n: s / mx for n, s in scores.items()} def recommend_from_mastery(self, mastery_scores: Dict[str, float]) -> Dict: approved, challenging, vetoed = [], [], [] for node in self.curriculum.nodes: status, reasoning, unmet = self._validate(node, mastery_scores) current = mastery_scores.get(node, 0.0) entry = { "topic_id": node, "topic_label": self.curriculum.label(node), "mastery": round(current, 3), "reasoning": reasoning, "status": status, "unmet_prerequisites": unmet, "downstream_count": len(self.curriculum.descendants(node)), } if status == "approved" and current < self.threshold: entry["score"] = self._rank(node, mastery_scores) approved.append(entry) elif status == "challenging" and current < self.threshold: entry["score"] = self._rank(node, mastery_scores) * 0.8 challenging.append(entry) elif status == "vetoed": vetoed.append(entry) approved.sort(key=lambda x: x["score"], reverse=True) challenging.sort(key=lambda x: x["score"], reverse=True) mastered = [n for n in self.curriculum.nodes if mastery_scores.get(n, 0.0) >= self.threshold] return { "approved": approved[: self.top_n], "challenging": challenging[:3], "vetoed": vetoed[:5], "mastery_summary": { "total_topics": self.curriculum.num_nodes, "mastered": len(mastered), "mastery_rate": round(len(mastered) / max(self.curriculum.num_nodes, 1), 3), }, "stats": { "approved_count": len(approved), "challenging_count": len(challenging), "vetoed_count": len(vetoed), "prerequisite_violation_rate": round(len(vetoed) / max(self.curriculum.num_nodes, 1), 3), }, } def _validate(self, node: str, mastery: Dict[str, float]) -> Tuple[str, str, List[str]]: prereqs = self.curriculum.prerequisites(node) if not prereqs: return "approved", "✅ Foundational topic — no prerequisites.", [] hard_fails = [] soft_fails = [] for p in prereqs: m = mastery.get(p, 0.0) if m < self.soft_threshold: hard_fails.append(p) elif m < self.threshold: soft_fails.append(p) if hard_fails: labels = [self.curriculum.label(p) for p in hard_fails] return "vetoed", f"❌ Prerequisites not met: {', '.join(labels)}", labels elif soft_fails: labels = [self.curriculum.label(p) for p in soft_fails] return "challenging", f"⚠️ Challenging — prerequisites nearly met: {', '.join(labels)}", labels else: return "approved", "✅ All prerequisites mastered.", [] def _rank(self, node: str, mastery: Dict[str, float]) -> float: current = mastery.get(node, 0.0) gap = min(max(0.0, self.threshold - current) / self.threshold, 1.0) prereqs = self.curriculum.prerequisites(node) readiness = 1.0 if not prereqs else sum(1 for p in prereqs if mastery.get(p, 0.0) >= self.threshold) / len(prereqs) downstream = self._downstream.get(node, 0.0) boost = 0.0 if 0.10 <= current < self.threshold: boost = 0.15 * (current / self.threshold) return round(0.40 * gap + 0.35 * readiness + 0.25 * downstream + boost, 3) def what_if(self, node: str) -> Dict: direct = [{"id": s, "label": self.curriculum.label(s)} for s in self.curriculum.successors(node)] all_down = [{"id": d, "label": self.curriculum.label(d)} for d in self.curriculum.descendants(node)] blocked = [{"id": p, "label": self.curriculum.label(p)} for p in self.curriculum.prerequisites(node)] return { "direct_unlocks": direct, "all_unlocks": all_down, "blocked_by": blocked, "total_unlocked": len(all_down), } # ═════════════════════════════════════════════════════════════════════════════ # MODEL LOADER (from your model_loader.py, adapted for inline use) # ═════════════════════════════════════════════════════════════════════════════ def load_model_from_hub(device: str = "cpu"): """ Load SAKT model from HuggingFace Hub. Tries v0.2.0 (SAKTWithDecay) first, then v0.1.0 (SAKTModel). Returns (model, model_type_str) or (None, "unavailable"). """ for filename, model_type in [ ("models/sakt_decay_best.pt", "SAKTWithDecay"), ("models/sakt_vanilla_best.pt", "SAKTModel"), ("models/sakt_model.pt", "SAKTModel"), ("sakt_model.pt", "SAKTModel"), ]: try: path = _hub_download(filename) payload = torch.load(path, map_location=device, weights_only=False) # v0.2.0 format: {"state_dict": ..., "model_type": ..., "config": ...} if isinstance(payload, dict) and "state_dict" in payload: cfg = payload.get("config", {}) if model_type == "SAKTWithDecay": model = SAKTWithDecay( num_skills=cfg.get("num_skills", 5737), embed_dim=cfg.get("embed_dim", 128), num_heads=cfg.get("num_heads", 8), dropout=cfg.get("dropout", 0.2), max_seq_len=cfg.get("max_seq_len", 200), decay_init=cfg.get("decay_init", 1.0), ) else: model = SAKTModel( num_skills=cfg.get("num_skills", 5737), embed_dim=cfg.get("embed_dim", 128), num_heads=cfg.get("num_heads", 8), dropout=cfg.get("dropout", 0.2), max_seq_len=cfg.get("max_seq_len", 200), ) model.load_state_dict(payload["state_dict"], strict=False) model.eval() model.to(device) return model, model_type # v0.1.0 format: raw state_dict + separate config.json else: config_path = _hub_download("config.json") with open(config_path) as f: config = json.load(f) model = SAKTModel( num_skills=config.get("num_skills", 5737), embed_dim=config.get("embed_dim", 128), num_heads=config.get("num_heads", 8), dropout=config.get("dropout", 0.2), max_seq_len=config.get("max_seq_len", 200), ) model.load_state_dict(payload, strict=False) model.eval() model.to(device) return model, "SAKTModel" except Exception: continue return None, "unavailable" # ═════════════════════════════════════════════════════════════════════════════ # STREAMLIT UI (your exact design, adapted for self-contained backend) # ═════════════════════════════════════════════════════════════════════════════ st.set_page_config( page_title="PLRS · Logic Engine", page_icon="🧠", layout="wide", initial_sidebar_state="expanded", ) st.markdown(""" """, unsafe_allow_html=True) # ── Load everything from HF Hub ───────────────────────────────────────────── @st.cache_resource(show_spinner="Loading curriculum & model from HuggingFace...") def load_pipelines(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Download knowledge maps math_path = _hub_download("data/knowledge_maps/math_dag.json") cs_path = _hub_download("data/knowledge_maps/cs_dag.json") math_curriculum = load_dag(math_path, domain="Nigerian SS Mathematics") cs_curriculum = load_dag(cs_path, domain="CS Fundamentals") # Load model (v0.2.0 or v0.1.0) model, model_type = load_model_from_hub(device=str(device)) pipelines = {} for key, curriculum in [("math", math_curriculum), ("cs", cs_curriculum)]: pipeline = PLRSPipeline(curriculum) if model is not None: pipeline._model = model pipelines[key] = pipeline return pipelines, model is not None, model_type @st.cache_data def load_skill_encoder(): """Download and load the skill encoder CSV.""" try: path = _hub_download("data/skill_encoder_v2.csv") return pd.read_csv(path) except Exception: return None pipelines, has_model, model_type = load_pipelines() skill_encoder = load_skill_encoder() # ── Sidebar ───────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("### 🧠 PLRS") st.markdown('
LOGIC ENGINE v0.2.0
', unsafe_allow_html=True) if has_model: st.markdown(f'● {model_type} LOADED
', unsafe_allow_html=True) else: st.markdown('● MANUAL MODE
', unsafe_allow_html=True) st.markdown("---") domain_label = st.selectbox("Curriculum", ["Nigerian SS Mathematics", "CS Fundamentals"]) domain_key = "math" if "Mathematics" in domain_label else "cs" pipeline = pipelines[domain_key] curriculum = pipeline.curriculum st.markdown("---") threshold = st.slider("Mastery threshold", 0.50, 0.90, 0.70, 0.05) soft_threshold = st.slider("Challenging threshold", 0.20, 0.65, 0.50, 0.05) top_n = st.slider("Top N recommendations", 3, 10, 5) pipeline.threshold = threshold pipeline.soft_threshold = soft_threshold pipeline.top_n = top_n st.markdown("---") st.markdown(f'NODES: {curriculum.num_nodes}
', unsafe_allow_html=True) st.markdown(f'EDGES: {curriculum.num_edges}
', unsafe_allow_html=True) st.markdown(f'MODEL: {model_type}
', unsafe_allow_html=True) st.markdown(f'VIOLATION RATE: 0.0%
', unsafe_allow_html=True) st.markdown("---") st.markdown('github.com/clementina-tom/plrs
', unsafe_allow_html=True) # ── Header ────────────────────────────────────────────────────────────────── st.markdown("""Select any topic to see what it unlocks and what currently blocks it.
', unsafe_allow_html=True) node_options = {curriculum.label(n): n for n in curriculum.nodes} selected_label = st.selectbox("Select topic", list(node_options.keys())) selected_id = node_options[selected_label] wi = pipeline.what_if(selected_id) col_a, col_b = st.columns(2, gap="large") with col_a: st.markdown('