PLRS / app.py
Clementio's picture
Upload app.py with huggingface_hub
a15bdc1 verified
raw
history blame
23.5 kB
import streamlit as st
import torch
import torch.nn as nn
import json
import pandas as pd
import networkx as nx
import numpy as np
from huggingface_hub import hf_hub_download
from typing import Dict, List, Optional, Tuple
st.set_page_config(page_title='Logic Engine', page_icon='🧠', layout='wide')
HF_REPO = 'Clementio/PLRS'
@st.cache_resource
def load_model():
config_path = hf_hub_download(repo_id=HF_REPO, filename='config.json')
with open(config_path) as f:
config = json.load(f)
model_path = hf_hub_download(repo_id=HF_REPO, filename='sakt_model.pt')
class SAKT(nn.Module):
def __init__(self, num_skills, embed_dim, num_heads, num_layers, max_seq_len, dropout):
super(SAKT, self).__init__()
self.num_skills = num_skills
self.interaction_embed = nn.Embedding(num_skills * 2 + 1, embed_dim, padding_idx=0)
self.skill_embed = nn.Embedding(num_skills + 1, embed_dim, padding_idx=0)
self.pos_embed = nn.Embedding(max_seq_len + 1, embed_dim)
encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dropout=dropout, batch_first=True, dim_feedforward=embed_dim * 4, norm_first=True)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers, enable_nested_tensor=False)
self.dropout = nn.Dropout(dropout)
self.output = nn.Linear(embed_dim, 1)
def forward(self, interactions, target_skills, mask, return_attention=False):
batch_size, seq_len = interactions.shape
positions = torch.arange(seq_len, device=interactions.device).unsqueeze(0).expand(batch_size, -1)
x = self.interaction_embed(interactions)
x = x + self.pos_embed(positions)
x = x * mask.unsqueeze(-1).float()
x = self.dropout(x)
causal_mask = torch.triu(torch.full((seq_len, seq_len), float('-inf')), diagonal=1)
x = self.transformer(x, mask=causal_mask, is_causal=False)
x = x * mask.unsqueeze(-1).float()
x = x + self.skill_embed(target_skills)
return self.output(x).squeeze(-1)
device = torch.device('cpu')
model = SAKT(num_skills=config['num_skills'], embed_dim=config['embed_dim'], num_heads=config['num_heads'], num_layers=config['num_layers'], max_seq_len=config['max_seq_len'], dropout=config['dropout'])
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()
return model, config, device
@st.cache_resource
def load_knowledge_maps():
def load_dag(path):
with open(path) as f:
data = json.load(f)
G = nx.DiGraph()
for node in data['nodes']:
G.add_node(node['id'], label=node['label'], level=node['level'], term=node['term'])
for edge in data['edges']:
G.add_edge(edge['from'], edge['to'])
return G
return load_dag('knowledge_maps/math_dag.json'), load_dag('knowledge_maps/cs_dag.json')
@st.cache_data
def load_skill_encoder():
return pd.read_csv('data/skill_encoder.csv')
class MasteryVector:
def __init__(self, graph, threshold=0.70):
self.graph = graph
self.threshold = threshold
self.mastery = {node: 0.0 for node in graph.nodes}
def update(self, topic_id, probability):
if topic_id in self.mastery: self.mastery[topic_id] = probability
def is_mastered(self, topic_id):
return self.mastery.get(topic_id, 0.0) >= self.threshold
def get_mastery(self, topic_id):
return self.mastery.get(topic_id, 0.0)
def get_mastery_summary(self):
mastered = [t for t in self.mastery if self.is_mastered(t)]
return {'total_topics': len(self.mastery), 'mastered': len(mastered), 'mastery_rate': round(len(mastered)/len(self.mastery), 3), 'mastered_topics': mastered}
class DAGConstraintLayer:
def __init__(self, graph, threshold=0.70, soft_threshold=0.50):
self.graph = graph
self.threshold = threshold
self.soft_threshold = soft_threshold # below full threshold but above this = challenging
def validate(self, topic_id, mastery_vector):
if topic_id not in self.graph.nodes: return 'vetoed', 'Topic not found.'
prerequisites = list(self.graph.predecessors(topic_id))
label = self.graph.nodes[topic_id].get('label', topic_id)
if not prerequisites: return 'approved', f'βœ… Foundational topic β€” no prerequisites.'
hard_fails = []
soft_fails = []
for p in prerequisites:
m = mastery_vector.get_mastery(p)
plabel = self.graph.nodes[p].get('label', p)
if m < self.soft_threshold:
hard_fails.append((plabel, m))
elif m < self.threshold:
soft_fails.append((plabel, m))
if hard_fails:
gaps = ', '.join([f"{l} ({m:.0%} mastered, need {self.threshold:.0%})" for l,m in hard_fails])
return 'vetoed', f'❌ Prerequisites not met: {gaps}'
elif soft_fails:
gaps = ', '.join([f"{l} ({m:.0%} mastered, need {self.threshold:.0%})" for l,m in soft_fails])
return 'challenging', f'⚠️ Challenging β€” prerequisites nearly met: {gaps}. Proceed with caution.'
else:
prereq_labels = [self.graph.nodes[p].get('label',p) for p in prerequisites]
return 'approved', f'βœ… Prerequisites mastered: {", ".join(prereq_labels)}'
class RankingFunction:
def __init__(self, graph, threshold=0.70, w_gap=0.40, w_ready=0.35, w_downstream=0.25):
self.graph=graph; self.threshold=threshold; self.w_gap=w_gap; self.w_ready=w_ready; self.w_downstream=w_downstream
scores = {n: len(nx.descendants(graph, n)) for n in graph.nodes}
mx = max(scores.values()) if scores else 1
self._downstream = {n: s/mx for n,s in scores.items()}
def score(self, topic_id, mastery_vector):
current = mastery_vector.get_mastery(topic_id)
gap = min(max(0.0, self.threshold-current)/self.threshold, 1.0)
prereqs = list(self.graph.predecessors(topic_id))
readiness = 1.0 if not prereqs else sum(1 for p in prereqs if mastery_vector.is_mastered(p))/len(prereqs)
downstream = self._downstream.get(topic_id, 0.0)
# Near-mastery boost: topics the student has already started
# rank higher than untouched topics with the same gap score
near_mastery_boost = 0.0
if 0.10 <= current < self.threshold:
near_mastery_boost = 0.15 * (current / self.threshold)
return round(self.w_gap*gap + self.w_ready*readiness + self.w_downstream*downstream + near_mastery_boost, 3)
class LearningRecommendationPipeline:
def __init__(self, graph, threshold=0.70, soft_threshold=0.50, top_n=5):
self.graph=graph
self.constraint=DAGConstraintLayer(graph, threshold, soft_threshold)
self.ranker=RankingFunction(graph, threshold)
self.top_n=top_n
def run(self, mastery_vector):
approved, challenging, vetoed = [], [], []
for topic_id in self.graph.nodes:
status, reasoning = self.constraint.validate(topic_id, mastery_vector)
entry = {'topic_id': topic_id, 'topic_label': self.graph.nodes[topic_id].get('label', topic_id), 'mastery': round(mastery_vector.get_mastery(topic_id),3), 'reasoning': reasoning, 'status': status}
if status == 'approved' and not mastery_vector.is_mastered(topic_id):
entry['score'] = self.ranker.score(topic_id, mastery_vector)
approved.append(entry)
elif status == 'challenging' and not mastery_vector.is_mastered(topic_id):
entry['score'] = self.ranker.score(topic_id, mastery_vector) * 0.8 # slight penalty
challenging.append(entry)
elif status == 'vetoed':
vetoed.append(entry)
approved.sort(key=lambda x: x['score'], reverse=True)
challenging.sort(key=lambda x: x['score'], reverse=True)
return {'top_recommendations': approved[:self.top_n], 'challenging': challenging[:3], 'total_approved': len(approved), 'total_challenging': len(challenging), 'total_vetoed': len(vetoed), 'vetoed_sample': vetoed[:5], 'prerequisite_violation_rate': round(len(vetoed)/max(len(list(self.graph.nodes)),1),3)}
ACTIVITY_TO_MATH = {'oucontent':'algebraic_expressions','forumng':'statistics_basic','homepage':'whole_numbers','subpage':'plane_shapes','resource':'indices','url':'number_bases','ouwiki':'proportion_variation','glossary':'algebraic_factorization','quiz':'quadratic_equations'}
ACTIVITY_TO_CS = {'oucontent':'programming_concepts','forumng':'ethics_technology','homepage':'computer_basics','subpage':'html_basics','resource':'networking_fundamentals','url':'internet_basics','ouwiki':'cloud_basics','glossary':'intro_databases','quiz':'python_basics'}
def run_sakt_inference(model, config, skill_seq, correct_seq, device):
max_len=config['max_seq_len']; n_skills=config['num_skills']
if len(skill_seq)>max_len: skill_seq=skill_seq[-max_len:]; correct_seq=correct_seq[-max_len:]
interactions=[s+c*n_skills for s,c in zip(skill_seq[:-1],correct_seq[:-1])]
target_skills=skill_seq[1:]
seq_len=len(interactions); pad_len=max_len-seq_len
interactions=[0]*pad_len+interactions; target_skills=[0]*pad_len+target_skills; mask=[False]*pad_len+[True]*seq_len
with torch.no_grad():
logits=model(torch.LongTensor([interactions]).to(device),torch.LongTensor([target_skills]).to(device),torch.BoolTensor([mask]).to(device))
probs=torch.sigmoid(logits).squeeze(0)
mastery={}; real_probs=probs[torch.BoolTensor(mask)].cpu().numpy(); real_skills=target_skills[pad_len:]
for skill_id,prob in zip(real_skills,real_probs): mastery[int(skill_id)]=float(prob)
return mastery
def build_mastery_vector(skill_probs, graph, skill_encoder_df, domain, threshold, soft_threshold):
mv=MasteryVector(graph, threshold); mapping=ACTIVITY_TO_MATH if domain=='math' else ACTIVITY_TO_CS
topic_scores={}
for skill_id,prob in skill_probs.items():
row=skill_encoder_df[skill_encoder_df['skill_id']==skill_id]
if row.empty: continue
act=row['activity_type'].values[0] if 'activity_type' in row.columns else None
topic_id=mapping.get(act) if act else None
if topic_id: topic_scores[topic_id]=max(topic_scores.get(topic_id,0.0),prob)
for topic_id,score in topic_scores.items(): mv.update(topic_id,score)
return mv
def what_if_analysis(topic_id, graph):
unlocks = list(nx.descendants(graph, topic_id))
direct_unlocks = list(graph.successors(topic_id))
blocked_by = list(graph.predecessors(topic_id))
unlock_labels = [graph.nodes[n].get('label',n) for n in direct_unlocks]
all_unlock_labels = [graph.nodes[n].get('label',n) for n in unlocks]
blocked_labels = [graph.nodes[n].get('label',n) for n in blocked_by]
return {'direct_unlocks': unlock_labels, 'all_unlocks': all_unlock_labels, 'blocked_by': blocked_labels, 'total_unlocked': len(unlocks)}
def cascade_mastery(mastery_vector, graph):
"""
If a student has high mastery on a topic, infer that their
prerequisites are also likely mastered (propagate upward).
A student who scores 80% on Modular Arithmetic almost certainly
knows Whole Numbers β€” cascade fills these realistic gaps.
"""
changed = True
while changed:
changed = False
for node in graph.nodes:
node_mastery = mastery_vector.get_mastery(node)
if node_mastery < 0.40:
continue
# For each prerequisite of this node
for prereq in graph.predecessors(node):
prereq_mastery = mastery_vector.get_mastery(prereq)
# Infer prerequisite mastery as at least 85% of descendant mastery
inferred = min(node_mastery * 0.85, 0.95)
if inferred > prereq_mastery:
mastery_vector.update(prereq, inferred)
changed = True
return mastery_vector
def cascade_mastery(mastery_vector, graph):
"""
If a student has high mastery on a topic, infer that their
prerequisites are also likely mastered (propagate upward).
A student who scores 80% on Modular Arithmetic almost certainly
knows Whole Numbers β€” cascade fills these realistic gaps.
"""
changed = True
while changed:
changed = False
for node in graph.nodes:
node_mastery = mastery_vector.get_mastery(node)
if node_mastery < 0.40:
continue
# For each prerequisite of this node
for prereq in graph.predecessors(node):
prereq_mastery = mastery_vector.get_mastery(prereq)
# Infer prerequisite mastery as at least 85% of descendant mastery
inferred = min(node_mastery * 0.85, 0.95)
if inferred > prereq_mastery:
mastery_vector.update(prereq, inferred)
changed = True
return mastery_vector
def get_attention_weights(model, config, skill_seq, correct_seq, device):
max_len=config['max_seq_len']; n_skills=config['num_skills']
if len(skill_seq)>max_len: skill_seq=skill_seq[-max_len:]; correct_seq=correct_seq[-max_len:]
interactions=[s+c*n_skills for s,c in zip(skill_seq[:-1],correct_seq[:-1])]
target_skills=skill_seq[1:]
seq_len=len(interactions); pad_len=max_len-seq_len
interactions=[0]*pad_len+interactions; target_skills=[0]*pad_len+target_skills; mask_list=[False]*pad_len+[True]*seq_len
interactions_t=torch.LongTensor([interactions]); target_t=torch.LongTensor([target_skills]); mask_t=torch.BoolTensor([mask_list])
attention_weights = []
def hook_fn(module, input, output):
if hasattr(module, 'self_attn'):
pass
with torch.no_grad():
positions=torch.arange(max_len).unsqueeze(0)
x=model.interaction_embed(interactions_t)+model.pos_embed(positions)
x=x*mask_t.unsqueeze(-1).float()
real_mask=mask_t.squeeze(0)
real_skills=target_skills[pad_len:]
real_probs=torch.sigmoid(model(interactions_t,target_t,mask_t)).squeeze(0)[real_mask].numpy()
return real_skills[-10:], real_probs[-10:], seq_len
def main():
model, config, device = load_model()
math_graph, cs_graph = load_knowledge_maps()
skill_encoder = load_skill_encoder()
st.title('🧠 Logic Engine')
st.subheader('Domain-Agnostic Constraint-Aware Learning Recommender')
st.markdown('---')
st.sidebar.title('βš™οΈ Configuration')
domain = st.sidebar.selectbox('Select Domain', ['Mathematics', 'CS Fundamentals'])
threshold = st.sidebar.slider('Mastery Threshold', 0.50, 0.90, 0.70, 0.05, help='Minimum mastery to consider a topic fully mastered')
soft_threshold = st.sidebar.slider('Challenging Threshold', 0.30, 0.70, 0.50, 0.05, help='Topics above this but below mastery threshold are marked Challenging')
top_n = st.sidebar.slider('Top N Recommendations', 3, 10, 5)
graph = math_graph if domain=='Mathematics' else cs_graph
domain_key = 'math' if domain=='Mathematics' else 'cs'
pipeline = LearningRecommendationPipeline(graph, threshold, soft_threshold, top_n)
st.sidebar.markdown('---')
st.sidebar.markdown('**About**')
st.sidebar.markdown('SAKT-based knowledge tracing with DAG prerequisite constraints. Three-tier recommendations: βœ… Approved, ⚠️ Challenging, ❌ Vetoed.')
tab1, tab2, tab3, tab4 = st.tabs(['🎯 Recommendations','πŸ” What-If Simulator','πŸ—ΊοΈ Knowledge Map','πŸ“Š Diagnostics'])
with tab1:
st.header('Learner Profile')
mode = st.radio('Input Mode', ['Manual Mastery Input','Simulate Student Sequence'], horizontal=True)
mastery_vector = MasteryVector(graph, threshold)
if mode=='Manual Mastery Input':
st.markdown('Set your current mastery level for each topic:')
cols=st.columns(2); nodes=list(graph.nodes)
for i,node in enumerate(nodes):
label=graph.nodes[node].get('label',node); level=graph.nodes[node].get('level','')
val=cols[i%2].slider(f'{label} ({level})',0.0,1.0,0.0,0.05,key=f'mastery_{node}')
mastery_vector.update(node,val)
else:
seq_length=st.slider('Sequence Length',10,200,50)
seed=st.number_input('Student Seed',1,1000,42,1)
np.random.seed(int(seed))
topic_nodes = list(graph.nodes)
n_topics = len(topic_nodes)
raw_scores = np.random.beta(1.5, 3.0, size=n_topics)
scale = min(seq_length / 200.0 * 1.4, 1.0)
scores = np.clip(raw_scores * scale, 0.0, 1.0)
for topic_id, score in zip(topic_nodes, scores):
mastery_vector.update(topic_id, float(score))
mastery_df = pd.DataFrame({
'Topic': [graph.nodes[t].get('label', t)[:25] for t in topic_nodes],
'Mastery': [round(float(s), 3) for s in scores]
}).sort_values('Mastery', ascending=False).head(10)
st.markdown('**πŸ“ˆ Simulated Learner Mastery Signal (top 10 topics):**')
st.bar_chart(mastery_df.set_index('Topic'))
# Cascade mastery upward through DAG
mastery_vector = cascade_mastery(mastery_vector, graph)
n_mastered = sum(1 for t in topic_nodes if mastery_vector.is_mastered(t))
st.success(f'Learner simulation complete β€” {n_mastered}/{n_topics} topics above mastery threshold')
if st.button('πŸš€ Generate Recommendations', type='primary'):
output=pipeline.run(mastery_vector)
summary=mastery_vector.get_mastery_summary()
col1,col2,col3,col4,col5=st.columns(5)
col1.metric('Topics Mastered',f"{summary['mastered']} / {summary['total_topics']}")
col2.metric('Mastery Rate',f"{summary['mastery_rate']:.1%}")
col3.metric('βœ… Approved',output['total_approved'])
col4.metric('⚠️ Challenging',output['total_challenging'])
col5.metric('Violation Rate',f"{output['prerequisite_violation_rate']:.1%}")
st.markdown('---')
st.subheader(f'βœ… Top {top_n} Approved Recommendations')
if not output['top_recommendations']: st.warning('No approved recommendations β€” adjust mastery or lower threshold.')
else:
for i,rec in enumerate(output['top_recommendations'],1):
with st.expander(f"{i}. {rec['topic_label']} β€” Score: {rec['score']} | Mastery: {rec['mastery']:.1%}", expanded=(i<=3)):
st.markdown(f"**Reasoning:** {rec['reasoning']}")
st.progress(rec['mastery'])
if output['challenging']:
st.markdown('---')
st.subheader('⚠️ Challenging Topics (proceed with caution)')
for rec in output['challenging']:
with st.expander(f"{rec['topic_label']} | Mastery: {rec['mastery']:.1%}"):
st.markdown(f"**Reasoning:** {rec['reasoning']}")
st.progress(rec['mastery'])
if output['vetoed_sample']:
st.markdown('---'); st.subheader('❌ Sample Vetoed Topics')
for rec in output['vetoed_sample']:
with st.expander(f"βœ— {rec['topic_label']}"):
st.markdown(f"**Reason:** {rec['reasoning']}")
with tab2:
st.header('πŸ” What-If Prerequisite Simulator')
st.markdown('Explore how mastering a topic unlocks future learning paths β€” or what is blocking you from starting it.')
nodes_list = list(graph.nodes)
labels_list = [graph.nodes[n].get('label',n) for n in nodes_list]
selected_label = st.selectbox('Select a topic to analyse:', labels_list)
selected_node = nodes_list[labels_list.index(selected_label)]
if st.button('πŸ” Analyse Topic', type='primary'):
result = what_if_analysis(selected_node, graph)
col1, col2 = st.columns(2)
with col1:
st.subheader('πŸ”“ If you master this topic...')
if result['direct_unlocks']:
st.markdown(f"**Directly unlocks {len(result['direct_unlocks'])} topic(s):**")
for t in result['direct_unlocks']: st.markdown(f' β†’ {t}')
else:
st.info('This is a terminal topic β€” it does not unlock further topics in this map.')
if result['all_unlocks']:
st.markdown(f"**Total topics eventually unlocked: {result['total_unlocked']}**")
with col2:
st.subheader('πŸ”’ To start this topic you need...')
if result['blocked_by']:
st.markdown('**Prerequisites required:**')
for t in result['blocked_by']: st.markdown(f' βœ“ {t}')
else:
st.success('This is a foundational topic β€” no prerequisites needed. You can start it now!')
if result['all_unlocks']:
st.markdown('---')
st.markdown('**Full learning path unlocked:**')
st.markdown(' β†’ '.join([selected_label] + result['all_unlocks'][:8]) + ('...' if len(result['all_unlocks'])>8 else ''))
with tab3:
st.header(f'{domain} Knowledge Map')
st.markdown(f"**{graph.number_of_nodes()} topics** | **{graph.number_of_edges()} prerequisite relationships**")
rows=[]
for node in graph.nodes:
label=graph.nodes[node].get('label',node); level=graph.nodes[node].get('level',''); term=graph.nodes[node].get('term','')
prereqs=[graph.nodes[p].get('label',p) for p in graph.predecessors(node)]
rows.append({'Topic':label,'Level':level,'Term':term,'Prerequisites':', '.join(prereqs) if prereqs else 'None (Foundational)'})
st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True)
longest=nx.dag_longest_path(graph)
st.markdown('**Longest prerequisite chain:**')
st.markdown(' β†’ '.join([graph.nodes[n].get('label',n) for n in longest]))
with tab4:
st.header('System Diagnostics')
col1,col2=st.columns(2)
with col1: st.subheader('Model Configuration'); st.json(config)
with col2:
st.subheader('DAG Statistics')
st.json({'domain':domain,'nodes':graph.number_of_nodes(),'edges':graph.number_of_edges(),'is_valid_dag':nx.is_directed_acyclic_graph(graph),'longest_path':len(nx.dag_longest_path(graph))})
st.subheader('Constraint Layer')
st.markdown(f'**Mastery threshold:** {threshold:.0%} β€” topics above this are considered mastered')
st.markdown(f'**Challenging threshold:** {soft_threshold:.0%} β€” topics between this and mastery threshold are marked ⚠️ Challenging')
st.markdown('**Hard veto:** topics with prerequisites below challenging threshold are fully blocked')
st.subheader('Domain Switching')
dcol1,dcol2=st.columns(2)
with dcol1: st.metric('Math DAG',f'{math_graph.number_of_nodes()} topics')
with dcol2: st.metric('CS DAG',f'{cs_graph.number_of_nodes()} topics')
if __name__ == '__main__':
main()