Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- src/agentic_optimizer.py +400 -371
- src/curriculum_analyzer.py +146 -83
- src/curriculum_optimizer.py +292 -495
- src/inspect_graph.py +249 -72
- src/neu_graph_clean8.pkl +3 -0
- src/neu_scraper.py +235 -234
src/agentic_optimizer.py
CHANGED
|
@@ -1,413 +1,442 @@
|
|
| 1 |
"""
|
| 2 |
-
Agentic Curriculum Optimizer
|
| 3 |
-
|
| 4 |
-
"""
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import json
|
| 7 |
-
import
|
|
|
|
| 8 |
import networkx as nx
|
| 9 |
-
import
|
| 10 |
-
from dataclasses import dataclass, asdict
|
| 11 |
-
from typing import Dict, List, Tuple, Optional
|
| 12 |
from datetime import datetime
|
| 13 |
-
import
|
| 14 |
import torch
|
| 15 |
-
from sentence_transformers import SentenceTransformer
|
| 16 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 17 |
-
import schedule
|
| 18 |
-
import time
|
| 19 |
-
|
| 20 |
-
@dataclass
|
| 21 |
-
class StudentProfile:
|
| 22 |
-
student_id: str
|
| 23 |
-
completed_courses: List[str]
|
| 24 |
-
current_gpa: float
|
| 25 |
-
interests: List[str]
|
| 26 |
-
career_goals: str
|
| 27 |
-
learning_style: str
|
| 28 |
-
time_commitment: int
|
| 29 |
-
preferred_difficulty: str
|
| 30 |
|
| 31 |
@dataclass
|
| 32 |
-
class
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
dropped_courses: List[str]
|
| 41 |
|
| 42 |
-
class
|
| 43 |
"""
|
| 44 |
-
Autonomous agent that
|
| 45 |
-
1. Monitors student progress
|
| 46 |
-
2. Adapts recommendations based on feedback
|
| 47 |
-
3. Proactively suggests adjustments
|
| 48 |
-
4. Learns from outcomes
|
| 49 |
"""
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
self.graph = None
|
| 58 |
self.courses = {}
|
|
|
|
| 59 |
|
| 60 |
-
#
|
| 61 |
-
self.
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
self.feedback_history = []
|
| 66 |
-
|
| 67 |
-
def _init_database(self):
|
| 68 |
-
"""Create tables for agent memory"""
|
| 69 |
-
conn = sqlite3.connect(self.db_path)
|
| 70 |
-
c = conn.cursor()
|
| 71 |
-
|
| 72 |
-
# Student profiles
|
| 73 |
-
c.execute('''CREATE TABLE IF NOT EXISTS students
|
| 74 |
-
(id TEXT PRIMARY KEY,
|
| 75 |
-
profile TEXT,
|
| 76 |
-
created_at TIMESTAMP)''')
|
| 77 |
-
|
| 78 |
-
# Generated plans
|
| 79 |
-
c.execute('''CREATE TABLE IF NOT EXISTS plans
|
| 80 |
-
(id TEXT PRIMARY KEY,
|
| 81 |
-
student_id TEXT,
|
| 82 |
-
plan_data TEXT,
|
| 83 |
-
created_at TIMESTAMP,
|
| 84 |
-
performance_score REAL)''')
|
| 85 |
-
|
| 86 |
-
# Feedback for learning
|
| 87 |
-
c.execute('''CREATE TABLE IF NOT EXISTS feedback
|
| 88 |
-
(id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 89 |
-
plan_id TEXT,
|
| 90 |
-
student_id TEXT,
|
| 91 |
-
feedback_data TEXT,
|
| 92 |
-
timestamp TIMESTAMP)''')
|
| 93 |
-
|
| 94 |
-
# Agent learning patterns
|
| 95 |
-
c.execute('''CREATE TABLE IF NOT EXISTS patterns
|
| 96 |
-
(id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 97 |
-
pattern_type TEXT,
|
| 98 |
-
pattern_data TEXT,
|
| 99 |
-
success_rate REAL,
|
| 100 |
-
discovered_at TIMESTAMP)''')
|
| 101 |
-
|
| 102 |
-
conn.commit()
|
| 103 |
-
conn.close()
|
| 104 |
|
| 105 |
-
def
|
| 106 |
-
"""
|
| 107 |
-
|
| 108 |
-
"""
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
"
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
-
def
|
| 118 |
-
"""
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
for student_id in perceptions["active_students"]:
|
| 125 |
-
if self._needs_plan_update(student_id, perceptions):
|
| 126 |
-
decisions.append({
|
| 127 |
-
"action": "update_plan",
|
| 128 |
-
"student_id": student_id,
|
| 129 |
-
"reason": "Poor performance feedback"
|
| 130 |
-
})
|
| 131 |
-
|
| 132 |
-
# Decision 2: Identify at-risk students
|
| 133 |
-
at_risk = self._identify_at_risk_students(perceptions["recent_feedback"])
|
| 134 |
-
for student_id in at_risk:
|
| 135 |
-
decisions.append({
|
| 136 |
-
"action": "intervention",
|
| 137 |
-
"student_id": student_id,
|
| 138 |
-
"reason": "Risk of dropping out"
|
| 139 |
-
})
|
| 140 |
-
|
| 141 |
-
# Decision 3: Optimize based on patterns
|
| 142 |
-
if perceptions["success_patterns"]:
|
| 143 |
-
decisions.append({
|
| 144 |
-
"action": "update_algorithm",
|
| 145 |
-
"patterns": perceptions["success_patterns"]
|
| 146 |
-
})
|
| 147 |
-
|
| 148 |
-
return decisions
|
| 149 |
|
| 150 |
-
def
|
| 151 |
-
"""
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
for
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
})
|
| 164 |
-
|
| 165 |
-
elif decision["action"] == "intervention":
|
| 166 |
-
intervention = self._create_intervention(decision["student_id"])
|
| 167 |
-
results.append({
|
| 168 |
-
"action": "intervention_created",
|
| 169 |
-
"student_id": decision["student_id"],
|
| 170 |
-
"intervention": intervention
|
| 171 |
-
})
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
-
def
|
| 183 |
-
"""
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
if result["action"] == "plan_updated":
|
| 191 |
-
# Track plan performance
|
| 192 |
-
self._track_plan_performance(result["student_id"], result["plan"])
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
# 4. LEARN
|
| 227 |
-
self.learn(results)
|
| 228 |
-
print("Learning cycle complete")
|
| 229 |
-
|
| 230 |
-
# Wait before next cycle (in production, this could be daily)
|
| 231 |
-
time.sleep(60) # Run every minute for demo
|
| 232 |
-
|
| 233 |
-
# --- Helper Methods ---
|
| 234 |
-
|
| 235 |
-
def _get_active_students(self) -> List[str]:
|
| 236 |
-
"""Get list of active students"""
|
| 237 |
-
conn = sqlite3.connect(self.db_path)
|
| 238 |
-
c = conn.cursor()
|
| 239 |
-
c.execute("SELECT id FROM students")
|
| 240 |
-
students = [row[0] for row in c.fetchall()]
|
| 241 |
-
conn.close()
|
| 242 |
-
return students
|
| 243 |
-
|
| 244 |
-
def _get_recent_feedback(self) -> List[Dict]:
|
| 245 |
-
"""Get recent feedback"""
|
| 246 |
-
conn = sqlite3.connect(self.db_path)
|
| 247 |
-
c = conn.cursor()
|
| 248 |
-
c.execute("SELECT feedback_data FROM feedback ORDER BY timestamp DESC LIMIT 10")
|
| 249 |
-
feedback = [json.loads(row[0]) for row in c.fetchall()]
|
| 250 |
-
conn.close()
|
| 251 |
-
return feedback
|
| 252 |
-
|
| 253 |
-
def _check_course_updates(self) -> Dict:
|
| 254 |
-
"""Check for course changes (mock for demo)"""
|
| 255 |
-
return {"updated_courses": [], "new_prerequisites": {}}
|
| 256 |
-
|
| 257 |
-
def _analyze_success_patterns(self) -> List[Dict]:
|
| 258 |
-
"""Identify successful patterns"""
|
| 259 |
-
conn = sqlite3.connect(self.db_path)
|
| 260 |
-
c = conn.cursor()
|
| 261 |
-
c.execute("SELECT pattern_data, success_rate FROM patterns WHERE success_rate > 0.7")
|
| 262 |
-
patterns = [{"data": json.loads(row[0]), "success_rate": row[1]} for row in c.fetchall()]
|
| 263 |
-
conn.close()
|
| 264 |
-
return patterns
|
| 265 |
-
|
| 266 |
-
def _needs_plan_update(self, student_id: str, perceptions: Dict) -> bool:
|
| 267 |
-
"""Determine if student needs plan update"""
|
| 268 |
-
# Check if recent feedback shows issues
|
| 269 |
-
for feedback in perceptions["recent_feedback"]:
|
| 270 |
-
if feedback.get("student_id") == student_id:
|
| 271 |
-
if feedback.get("satisfaction", 5) < 3:
|
| 272 |
-
return True
|
| 273 |
-
return False
|
| 274 |
-
|
| 275 |
-
def _identify_at_risk_students(self, feedback: List[Dict]) -> List[str]:
|
| 276 |
-
"""Identify students at risk"""
|
| 277 |
-
at_risk = []
|
| 278 |
-
for fb in feedback:
|
| 279 |
-
if fb.get("difficulty_rating", 0) > 4 or fb.get("dropped_courses", []):
|
| 280 |
-
at_risk.append(fb.get("student_id"))
|
| 281 |
-
return at_risk
|
| 282 |
-
|
| 283 |
-
def _regenerate_plan(self, student_id: str) -> Dict:
|
| 284 |
-
"""Generate new plan for student"""
|
| 285 |
-
# This would use your existing optimizer
|
| 286 |
-
return {"plan": "new_optimized_plan", "adjustments": ["reduced_difficulty"]}
|
| 287 |
|
| 288 |
-
def
|
| 289 |
-
"""
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
|
|
|
|
|
|
|
|
|
| 293 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
-
def
|
| 296 |
-
"""
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
def _monitor_intervention(self, student_id: str, intervention: Dict):
|
| 310 |
-
"""Monitor intervention effectiveness"""
|
| 311 |
-
print(f"Monitoring intervention for {student_id}")
|
| 312 |
-
|
| 313 |
-
def _discover_patterns(self) -> List[Dict]:
|
| 314 |
-
"""Discover new patterns from data"""
|
| 315 |
-
# Example: Find that students who take CS2500 before CS2510 do better
|
| 316 |
-
patterns = []
|
| 317 |
-
|
| 318 |
-
# Analyze database for patterns
|
| 319 |
-
conn = sqlite3.connect(self.db_path)
|
| 320 |
-
c = conn.cursor()
|
| 321 |
-
|
| 322 |
-
# Example pattern discovery
|
| 323 |
-
c.execute("""
|
| 324 |
-
SELECT COUNT(*) FROM feedback
|
| 325 |
-
WHERE feedback_data LIKE '%CS2500%CS2510%'
|
| 326 |
-
AND json_extract(feedback_data, '$.satisfaction') > 4
|
| 327 |
-
""")
|
| 328 |
-
|
| 329 |
-
result = c.fetchone()
|
| 330 |
-
if result and result[0] > 5: # If pattern appears frequently
|
| 331 |
-
patterns.append({
|
| 332 |
-
"type": "course_sequence",
|
| 333 |
-
"data": {"sequence": ["CS2500", "CS2510"]},
|
| 334 |
-
"success_rate": 0.85
|
| 335 |
-
})
|
| 336 |
-
|
| 337 |
-
conn.close()
|
| 338 |
-
return patterns
|
| 339 |
|
|
|
|
|
|
|
| 340 |
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
conn = sqlite3.connect(self.agent.db_path)
|
| 358 |
-
c = conn.cursor()
|
| 359 |
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
| 363 |
|
| 364 |
-
|
| 365 |
-
conn.close()
|
| 366 |
|
| 367 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
|
| 369 |
-
def
|
| 370 |
-
"""
|
| 371 |
-
|
| 372 |
-
|
| 373 |
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
-
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
-
def
|
| 381 |
-
"""
|
| 382 |
-
print("
|
| 383 |
-
print("
|
| 384 |
-
print("
|
| 385 |
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
|
| 391 |
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
-
|
| 398 |
-
student = StudentProfile(
|
| 399 |
-
student_id="test_001",
|
| 400 |
-
completed_courses=["CS1800", "CS2500"],
|
| 401 |
-
current_gpa=3.5,
|
| 402 |
-
interests=["AI", "Machine Learning"],
|
| 403 |
-
career_goals="ML Engineer",
|
| 404 |
-
learning_style="Visual",
|
| 405 |
-
time_commitment=40,
|
| 406 |
-
preferred_difficulty="moderate"
|
| 407 |
-
)
|
| 408 |
|
| 409 |
-
|
| 410 |
-
|
|
|
|
|
|
|
| 411 |
|
| 412 |
-
|
| 413 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Agentic Curriculum Optimizer - Autonomous Graph Validator & Fixer
|
| 3 |
+
Detects missing courses, suggests replacements, and directly patches the graph.
|
|
|
|
| 4 |
|
| 5 |
+
Usage:
|
| 6 |
+
python agentic_optimizer.py --graph neu_graph_clean6.pkl --validate
|
| 7 |
+
python agentic_optimizer.py --graph neu_graph_clean6.pkl --fix --output neu_graph_fixed.pkl
|
| 8 |
+
"""
|
| 9 |
+
import pickle
|
| 10 |
import json
|
| 11 |
+
import re
|
| 12 |
+
import argparse
|
| 13 |
import networkx as nx
|
| 14 |
+
from typing import Dict, Set, List, Tuple, Optional
|
|
|
|
|
|
|
| 15 |
from datetime import datetime
|
| 16 |
+
from dataclasses import dataclass, asdict
|
| 17 |
import torch
|
|
|
|
| 18 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
@dataclass
|
| 21 |
+
class CourseChange:
|
| 22 |
+
"""Detected change in course catalog"""
|
| 23 |
+
old_code: str
|
| 24 |
+
new_code: str = None
|
| 25 |
+
status: str = "missing" # missing, renamed, moved, deprecated
|
| 26 |
+
replacement_suggestion: str = None
|
| 27 |
+
confidence: float = 0.0
|
| 28 |
+
evidence: str = ""
|
|
|
|
| 29 |
|
| 30 |
+
class AgenticOptimizer:
|
| 31 |
"""
|
| 32 |
+
Autonomous agent that validates requirements AND fixes graph automatically
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
"""
|
| 34 |
|
| 35 |
+
# Requirements synced with curriculum_optimizer.py
|
| 36 |
+
CONCENTRATION_REQUIREMENTS = {
|
| 37 |
+
"ai_ml": {
|
| 38 |
+
"foundations": {
|
| 39 |
+
"required": ["CS1800", "CS2500", "CS2510", "CS2800"],
|
| 40 |
+
},
|
| 41 |
+
"core": {
|
| 42 |
+
"required": ["CS3000", "CS3500"],
|
| 43 |
+
"pick_1_from": ["CS3200", "CS3650", "CS5700"] # FIXED: CS3700 → CS5700
|
| 44 |
+
},
|
| 45 |
+
"concentration_specific": {
|
| 46 |
+
"required": ["CS4100", "DS4400"],
|
| 47 |
+
"pick_2_from": ["CS4120", "CS4180", "DS4420", "DS4440"],
|
| 48 |
+
"pick_1_systems": ["CS4730", "CS4700"] # REMOVED: CS4750 (doesn't exist)
|
| 49 |
+
},
|
| 50 |
+
"math": {
|
| 51 |
+
"required": ["MATH1341", "MATH1342"],
|
| 52 |
+
"pick_1_from": ["MATH2331", "MATH3081"] # REMOVED: STAT3150
|
| 53 |
+
}
|
| 54 |
+
},
|
| 55 |
+
"systems": {
|
| 56 |
+
"foundations": {
|
| 57 |
+
"required": ["CS1800", "CS2500", "CS2510", "CS2800"]
|
| 58 |
+
},
|
| 59 |
+
"core": {
|
| 60 |
+
"required": ["CS3000", "CS3500", "CS3650"],
|
| 61 |
+
"pick_1_from": ["CS5700", "CS3200"] # FIXED: CS3700 → CS5700
|
| 62 |
+
},
|
| 63 |
+
"concentration_specific": {
|
| 64 |
+
"required": ["CS4700"],
|
| 65 |
+
"pick_2_from": ["CS4730"], # REMOVED: CS4750, CS4770
|
| 66 |
+
"pick_1_from": ["CS4400", "CS4500", "CS4520"]
|
| 67 |
+
},
|
| 68 |
+
"math": {
|
| 69 |
+
"required": ["MATH1341", "MATH1342"]
|
| 70 |
+
}
|
| 71 |
+
},
|
| 72 |
+
"security": {
|
| 73 |
+
"foundations": {
|
| 74 |
+
"required": ["CS1800", "CS2500", "CS2510", "CS2800"]
|
| 75 |
+
},
|
| 76 |
+
"core": {
|
| 77 |
+
"required": ["CS3000", "CS3650", "CY2550"],
|
| 78 |
+
"pick_1_from": ["CS5700", "CS3500"] # FIXED: CS3700 → CS5700
|
| 79 |
+
},
|
| 80 |
+
"concentration_specific": {
|
| 81 |
+
"required": ["CY3740"],
|
| 82 |
+
"pick_2_from": ["CY4740", "CY4760", "CY4770"], # CY4770 (moved from CS)
|
| 83 |
+
"pick_1_from": ["CS4700", "CS4730"]
|
| 84 |
+
},
|
| 85 |
+
"math": {
|
| 86 |
+
"required": ["MATH1342"],
|
| 87 |
+
"pick_1_from": ["MATH3527", "MATH3081"]
|
| 88 |
+
}
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
# Known manual additions for courses that don't appear in scraper
|
| 93 |
+
MANUAL_COURSES = {
|
| 94 |
+
"CS5700": {
|
| 95 |
+
"name": "Fundamentals of Networks",
|
| 96 |
+
"subject": "CS",
|
| 97 |
+
"classId": "5700",
|
| 98 |
+
"description": "Networks and distributed systems (grad level, no prereqs)",
|
| 99 |
+
"minCredits": 4,
|
| 100 |
+
"maxCredits": 4,
|
| 101 |
+
"prerequisites": [] # Open to undergrads
|
| 102 |
+
},
|
| 103 |
+
"CY4770": {
|
| 104 |
+
"name": "Foundations of Cryptography",
|
| 105 |
+
"subject": "CY",
|
| 106 |
+
"classId": "4770",
|
| 107 |
+
"description": "Mathematical cryptography (moved from CS dept)",
|
| 108 |
+
"minCredits": 4,
|
| 109 |
+
"maxCredits": 4,
|
| 110 |
+
"prerequisites": ["CS3000"] # Simplified prereq
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
def __init__(self, graph_path: str, use_llm: bool = True):
|
| 115 |
+
self.graph_path = graph_path
|
| 116 |
+
self.use_llm = use_llm
|
| 117 |
self.graph = None
|
| 118 |
self.courses = {}
|
| 119 |
+
self.changes = []
|
| 120 |
|
| 121 |
+
# Load LLM if needed
|
| 122 |
+
self.llm = None
|
| 123 |
+
self.tokenizer = None
|
| 124 |
+
if use_llm:
|
| 125 |
+
self._load_llm()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
+
def _load_llm(self):
|
| 128 |
+
"""Load local LLM for intelligent validation"""
|
| 129 |
+
print("🤖 Loading LLM for catalog analysis...")
|
| 130 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 131 |
+
|
| 132 |
+
if device.type == 'cuda':
|
| 133 |
+
model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
| 134 |
+
quant_config = BitsAndBytesConfig(
|
| 135 |
+
load_in_4bit=True,
|
| 136 |
+
bnb_4bit_quant_type="nf4",
|
| 137 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
| 138 |
+
)
|
| 139 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 140 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 141 |
+
self.llm = AutoModelForCausalLM.from_pretrained(
|
| 142 |
+
model_name,
|
| 143 |
+
quantization_config=quant_config,
|
| 144 |
+
device_map="auto"
|
| 145 |
+
)
|
| 146 |
+
print("✅ LLM loaded")
|
| 147 |
+
else:
|
| 148 |
+
print("⚠️ No GPU available, LLM disabled")
|
| 149 |
+
self.use_llm = False
|
| 150 |
|
| 151 |
+
def load_graph(self):
|
| 152 |
+
"""Load curriculum graph"""
|
| 153 |
+
print(f"📚 Loading graph: {self.graph_path}")
|
| 154 |
+
with open(self.graph_path, 'rb') as f:
|
| 155 |
+
self.graph = pickle.load(f)
|
| 156 |
+
self.courses = dict(self.graph.nodes(data=True))
|
| 157 |
+
print(f"✅ Loaded {len(self.courses)} courses")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
def validate_requirements(self) -> Dict[str, List[CourseChange]]:
|
| 160 |
+
"""Check which required courses are missing from graph"""
|
| 161 |
+
print("\n🔍 Validating CONCENTRATION_REQUIREMENTS against graph...")
|
| 162 |
+
|
| 163 |
+
track_changes = {}
|
| 164 |
+
|
| 165 |
+
for track, track_reqs in self.CONCENTRATION_REQUIREMENTS.items():
|
| 166 |
+
print(f"\n📋 Checking {track} track:")
|
| 167 |
+
track_changes[track] = []
|
| 168 |
+
|
| 169 |
+
for category, reqs in track_reqs.items():
|
| 170 |
+
if not isinstance(reqs, dict):
|
| 171 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
+
for key, courses in reqs.items():
|
| 174 |
+
if not isinstance(courses, list):
|
| 175 |
+
continue
|
| 176 |
+
|
| 177 |
+
for course in courses:
|
| 178 |
+
if course not in self.courses:
|
| 179 |
+
change = CourseChange(
|
| 180 |
+
old_code=course,
|
| 181 |
+
status="missing",
|
| 182 |
+
evidence=f"Not found in scraped graph ({len(self.courses)} courses)"
|
| 183 |
+
)
|
| 184 |
+
track_changes[track].append(change)
|
| 185 |
+
print(f" ❌ {course} - MISSING")
|
| 186 |
+
else:
|
| 187 |
+
print(f" ✅ {course}")
|
| 188 |
+
|
| 189 |
+
return track_changes
|
| 190 |
|
| 191 |
+
def find_replacements(self, changes: Dict[str, List[CourseChange]]) -> Dict[str, List[CourseChange]]:
|
| 192 |
+
"""Use pattern matching + LLM to suggest replacements"""
|
| 193 |
+
print("\n🤖 Analyzing missing courses...")
|
| 194 |
+
|
| 195 |
+
for track, track_changes in changes.items():
|
| 196 |
+
for change in track_changes:
|
| 197 |
+
if change.status != "missing":
|
| 198 |
+
continue
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
# Try pattern matching first (instant)
|
| 201 |
+
replacement = self._pattern_match_replacement(change.old_code)
|
| 202 |
+
if replacement:
|
| 203 |
+
change.new_code = replacement
|
| 204 |
+
change.status = "renamed"
|
| 205 |
+
change.confidence = 0.7
|
| 206 |
+
change.evidence = "Pattern matching"
|
| 207 |
+
print(f" 🔄 {change.old_code} → {replacement} (pattern)")
|
| 208 |
+
continue
|
| 209 |
+
|
| 210 |
+
# Check manual course database
|
| 211 |
+
if change.old_code in self.MANUAL_COURSES:
|
| 212 |
+
change.new_code = change.old_code # Will be added to graph
|
| 213 |
+
change.status = "manual_add"
|
| 214 |
+
change.confidence = 1.0
|
| 215 |
+
change.evidence = "Manual course database"
|
| 216 |
+
print(f" ➕ {change.old_code} - Will be added manually")
|
| 217 |
+
continue
|
| 218 |
+
|
| 219 |
+
# Use LLM for ambiguous cases
|
| 220 |
+
if self.use_llm and self.llm:
|
| 221 |
+
replacement = self._llm_suggest_replacement(change.old_code, track)
|
| 222 |
+
if replacement:
|
| 223 |
+
change.new_code = replacement
|
| 224 |
+
change.status = "renamed"
|
| 225 |
+
change.confidence = 0.9
|
| 226 |
+
change.evidence = "LLM analysis"
|
| 227 |
+
print(f" 🔄 {change.old_code} → {replacement} (LLM)")
|
| 228 |
+
else:
|
| 229 |
+
print(f" ⚠️ {change.old_code} - No replacement found")
|
| 230 |
+
|
| 231 |
+
return changes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
+
def _pattern_match_replacement(self, course_code: str) -> Optional[str]:
|
| 234 |
+
"""Fast pattern-based replacement detection"""
|
| 235 |
+
|
| 236 |
+
# Known replacements from manual verification
|
| 237 |
+
known_replacements = {
|
| 238 |
+
"CS3700": "CS5700",
|
| 239 |
+
"CS4770": "CY4770",
|
| 240 |
+
"STAT3150": "MATH3081",
|
| 241 |
}
|
| 242 |
+
|
| 243 |
+
if course_code in known_replacements:
|
| 244 |
+
if known_replacements[course_code] in self.courses:
|
| 245 |
+
return known_replacements[course_code]
|
| 246 |
+
|
| 247 |
+
# Try subject swap (CS ↔ CY)
|
| 248 |
+
if course_code.startswith("CS"):
|
| 249 |
+
alt_code = "CY" + course_code[2:]
|
| 250 |
+
if alt_code in self.courses:
|
| 251 |
+
return alt_code
|
| 252 |
+
elif course_code.startswith("CY"):
|
| 253 |
+
alt_code = "CS" + course_code[2:]
|
| 254 |
+
if alt_code in self.courses:
|
| 255 |
+
return alt_code
|
| 256 |
+
|
| 257 |
+
# Try grad-level version (3XXX/4XXX → 5XXX)
|
| 258 |
+
match = re.match(r'([A-Z]+)(\d)(\d{3})', course_code)
|
| 259 |
+
if match:
|
| 260 |
+
subject, first_digit, rest = match.groups()
|
| 261 |
+
if first_digit in ['3', '4']:
|
| 262 |
+
grad_code = f"{subject}5{rest}"
|
| 263 |
+
if grad_code in self.courses:
|
| 264 |
+
return grad_code
|
| 265 |
+
|
| 266 |
+
return None
|
| 267 |
|
| 268 |
+
def _llm_suggest_replacement(self, missing_course: str, track: str) -> Optional[str]:
|
| 269 |
+
"""Use LLM to intelligently suggest replacement"""
|
| 270 |
+
|
| 271 |
+
subject = re.match(r'([A-Z]+)', missing_course).group(1)
|
| 272 |
+
similar_courses = [
|
| 273 |
+
(cid, data.get('name', ''))
|
| 274 |
+
for cid, data in self.courses.items()
|
| 275 |
+
if cid.startswith(subject) and cid != missing_course
|
| 276 |
+
][:10]
|
| 277 |
+
|
| 278 |
+
course_list = "\n".join([f"- {cid}: {name}" for cid, name in similar_courses])
|
| 279 |
+
|
| 280 |
+
prompt = f"""Course catalog expert analyzing NEU curriculum changes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
+
**Missing:** {missing_course}
|
| 283 |
+
**Track:** {track}
|
| 284 |
|
| 285 |
+
**Available courses:**
|
| 286 |
+
{course_list}
|
| 287 |
+
|
| 288 |
+
Which course replaced {missing_course}? Return ONLY the code or "NONE".
|
| 289 |
+
|
| 290 |
+
Rules:
|
| 291 |
+
- Networks: CS3700 → CS5700
|
| 292 |
+
- Crypto: CS → CY dept
|
| 293 |
+
- STAT → MATH
|
| 294 |
+
- Game courses often don't exist
|
| 295 |
+
"""
|
| 296 |
|
| 297 |
+
try:
|
| 298 |
+
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(self.llm.device)
|
| 299 |
+
with torch.no_grad():
|
| 300 |
+
outputs = self.llm.generate(
|
| 301 |
+
**inputs,
|
| 302 |
+
max_new_tokens=50,
|
| 303 |
+
temperature=0.1,
|
| 304 |
+
do_sample=True,
|
| 305 |
+
pad_token_id=self.tokenizer.eos_token_id
|
| 306 |
+
)
|
| 307 |
+
response = self.tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True).strip()
|
| 308 |
+
|
| 309 |
+
match = re.search(r'([A-Z]{2,4}\d{4})', response)
|
| 310 |
+
if match:
|
| 311 |
+
suggested = match.group(1)
|
| 312 |
+
if suggested in self.courses:
|
| 313 |
+
return suggested
|
| 314 |
|
| 315 |
+
except Exception as e:
|
| 316 |
+
print(f" ⚠️ LLM error: {e}")
|
|
|
|
|
|
|
| 317 |
|
| 318 |
+
return None
|
| 319 |
+
|
| 320 |
+
def fix_graph(self, changes: Dict[str, List[CourseChange]]) -> int:
|
| 321 |
+
"""Directly add missing courses to the graph"""
|
| 322 |
+
print("\n🔧 Fixing graph by adding missing courses...")
|
| 323 |
|
| 324 |
+
added_count = 0
|
|
|
|
| 325 |
|
| 326 |
+
for track, track_changes in changes.items():
|
| 327 |
+
for change in track_changes:
|
| 328 |
+
if change.status == "manual_add" and change.old_code in self.MANUAL_COURSES:
|
| 329 |
+
course_data = self.MANUAL_COURSES[change.old_code]
|
| 330 |
+
cid = change.old_code
|
| 331 |
+
|
| 332 |
+
# Add node
|
| 333 |
+
self.graph.add_node(cid, **course_data)
|
| 334 |
+
self.courses[cid] = course_data
|
| 335 |
+
|
| 336 |
+
# Add prerequisite edges
|
| 337 |
+
for prereq in course_data.get("prerequisites", []):
|
| 338 |
+
if prereq in self.graph:
|
| 339 |
+
self.graph.add_edge(prereq, cid, relationship="prerequisite")
|
| 340 |
+
else:
|
| 341 |
+
print(f" ⚠️ Prereq {prereq} for {cid} not in graph")
|
| 342 |
+
|
| 343 |
+
print(f" ✅ Added {cid}: {course_data['name']}")
|
| 344 |
+
added_count += 1
|
| 345 |
+
|
| 346 |
+
return added_count
|
| 347 |
|
| 348 |
+
def save_report(self, changes: Dict[str, List[CourseChange]], output_path: str = None):
|
| 349 |
+
"""Save validation report"""
|
| 350 |
+
if not output_path:
|
| 351 |
+
output_path = f"catalog_validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
| 352 |
|
| 353 |
+
report = {
|
| 354 |
+
"timestamp": datetime.now().isoformat(),
|
| 355 |
+
"graph_file": self.graph_path,
|
| 356 |
+
"total_courses_in_graph": len(self.courses),
|
| 357 |
+
"changes": {
|
| 358 |
+
track: [asdict(c) for c in track_changes]
|
| 359 |
+
for track, track_changes in changes.items()
|
| 360 |
+
}
|
| 361 |
+
}
|
| 362 |
|
| 363 |
+
with open(output_path, 'w') as f:
|
| 364 |
+
json.dump(report, f, indent=2)
|
| 365 |
+
|
| 366 |
+
print(f"\n💾 Report saved: {output_path}")
|
| 367 |
+
|
| 368 |
+
def save_graph(self, output_path: str):
|
| 369 |
+
"""Save the fixed graph"""
|
| 370 |
+
with open(output_path, 'wb') as f:
|
| 371 |
+
pickle.dump(self.graph, f)
|
| 372 |
+
print(f"💾 Fixed graph saved: {output_path}")
|
| 373 |
+
print(f"📊 Final graph: {self.graph.number_of_nodes()} courses, {self.graph.number_of_edges()} edges")
|
| 374 |
|
| 375 |
+
def run(self, fix: bool = False, output: str = None):
|
| 376 |
+
"""Main agent workflow"""
|
| 377 |
+
print("="*70)
|
| 378 |
+
print("AGENTIC OPTIMIZER - Autonomous Graph Validator & Fixer")
|
| 379 |
+
print("="*70)
|
| 380 |
|
| 381 |
+
# Step 1: Load data
|
| 382 |
+
self.load_graph()
|
| 383 |
+
|
| 384 |
+
# Step 2: Validate requirements
|
| 385 |
+
changes = self.validate_requirements()
|
| 386 |
+
|
| 387 |
+
# Count issues
|
| 388 |
+
total_missing = sum(len(c) for c in changes.values())
|
| 389 |
+
if total_missing == 0:
|
| 390 |
+
print("\n✅ All requirements valid! No changes needed.")
|
| 391 |
+
return
|
| 392 |
+
|
| 393 |
+
print(f"\n⚠️ Found {total_missing} missing courses across all tracks")
|
| 394 |
+
|
| 395 |
+
# Step 3: Find replacements
|
| 396 |
+
changes = self.find_replacements(changes)
|
| 397 |
+
|
| 398 |
+
# Step 4: Generate report
|
| 399 |
+
self.save_report(changes)
|
| 400 |
+
|
| 401 |
+
# Step 5: Fix graph if requested
|
| 402 |
+
if fix:
|
| 403 |
+
added = self.fix_graph(changes)
|
| 404 |
+
|
| 405 |
+
if added > 0:
|
| 406 |
+
print(f"\n✅ Added {added} courses to graph")
|
| 407 |
+
|
| 408 |
+
if output:
|
| 409 |
+
self.save_graph(output)
|
| 410 |
+
else:
|
| 411 |
+
# Default output name
|
| 412 |
+
default_output = self.graph_path.replace('.pkl', '_fixed.pkl')
|
| 413 |
+
self.save_graph(default_output)
|
| 414 |
+
else:
|
| 415 |
+
print("\n⚠️ No courses added (all issues are renamings, not missing)")
|
| 416 |
+
|
| 417 |
+
print("\n✨ Optimization complete!")
|
| 418 |
|
| 419 |
|
| 420 |
+
def main():
|
| 421 |
+
parser = argparse.ArgumentParser(description="Agentic Optimizer - Auto-validate & fix curriculum graph")
|
| 422 |
+
parser.add_argument('--graph', required=True, help="Path to curriculum graph .pkl")
|
| 423 |
+
parser.add_argument('--validate', action='store_true', help="Only validate, don't fix")
|
| 424 |
+
parser.add_argument('--fix', action='store_true', help="Fix graph by adding missing courses")
|
| 425 |
+
parser.add_argument('--output', help="Output path for fixed graph")
|
| 426 |
+
parser.add_argument('--no-llm', action='store_true', help="Disable LLM (use pattern matching only)")
|
| 427 |
|
| 428 |
+
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
|
| 430 |
+
agent = AgenticOptimizer(
|
| 431 |
+
graph_path=args.graph,
|
| 432 |
+
use_llm=not args.no_llm
|
| 433 |
+
)
|
| 434 |
|
| 435 |
+
agent.run(
|
| 436 |
+
fix=args.fix,
|
| 437 |
+
output=args.output
|
| 438 |
+
)
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
if __name__ == "__main__":
|
| 442 |
+
main()
|
src/curriculum_analyzer.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
-
Curriculum Analyzer
|
| 3 |
-
Analyzes, CLEANS, and enriches scraped NEU curriculum data.
|
| 4 |
"""
|
| 5 |
import pickle
|
| 6 |
-
import json
|
| 7 |
import argparse
|
| 8 |
import networkx as nx
|
| 9 |
import re
|
| 10 |
-
from
|
| 11 |
|
| 12 |
def get_course_level(cid):
|
| 13 |
"""Extracts the numerical part of a course ID for level checking."""
|
|
@@ -16,112 +14,177 @@ def get_course_level(cid):
|
|
| 16 |
|
| 17 |
class CurriculumAnalyzer:
|
| 18 |
def __init__(self, graph_path, courses_path):
|
| 19 |
-
self.graph_path = graph_path
|
| 20 |
-
self.courses_path = courses_path
|
| 21 |
-
self.graph = None
|
| 22 |
-
self.courses = None
|
| 23 |
-
self.load_data()
|
| 24 |
-
|
| 25 |
-
def load_data(self):
|
| 26 |
print("📚 Loading raw curriculum data...")
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
print(f"✅ Loaded raw data with {self.graph.number_of_nodes()} courses.")
|
| 39 |
-
except FileNotFoundError as e:
|
| 40 |
-
print(f"❌ Error: Data file not found. {e}")
|
| 41 |
-
exit(1)
|
| 42 |
|
| 43 |
def pre_filter_graph(self):
|
| 44 |
-
"""
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
"""
|
| 48 |
-
print("\n🧹 Pre-filtering graph to remove irrelevant courses...")
|
| 49 |
-
|
| 50 |
-
# Define what subjects are considered relevant for a tech-focused degree
|
| 51 |
-
RELEVANT_SUBJECTS = {
|
| 52 |
-
"CS", "DS", "CY",
|
| 53 |
-
}
|
| 54 |
|
| 55 |
-
nodes_to_remove =
|
| 56 |
for node, data in self.graph.nodes(data=True):
|
| 57 |
-
subject = data.get('subject')
|
|
|
|
| 58 |
level = get_course_level(node)
|
| 59 |
|
| 60 |
-
#
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
self.graph.remove_nodes_from(nodes_to_remove)
|
| 65 |
-
print(f"✅
|
|
|
|
| 66 |
|
| 67 |
-
def
|
| 68 |
-
"""
|
| 69 |
-
print("\n
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
#
|
| 89 |
-
score = (
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
'prereq_count': in_deg,
|
| 94 |
-
'unlocks_count': out_deg
|
| 95 |
-
}
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
def save_enriched_graph(self, output_path):
|
| 101 |
"""Saves the final, clean, and enriched graph."""
|
| 102 |
-
print(f"\n💾 Saving
|
| 103 |
with open(output_path, 'wb') as f:
|
| 104 |
pickle.dump(self.graph, f)
|
| 105 |
-
print("✅ Graph saved
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
def main(args):
|
| 108 |
"""Main execution flow."""
|
| 109 |
analyzer = CurriculumAnalyzer(args.graph, args.courses)
|
| 110 |
-
|
| 111 |
-
# Run the new cleaning step first!
|
| 112 |
analyzer.pre_filter_graph()
|
| 113 |
-
|
|
|
|
| 114 |
analyzer.calculate_and_add_complexity()
|
| 115 |
|
| 116 |
-
analyzer.
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
if __name__ == "__main__":
|
| 122 |
-
parser = argparse.ArgumentParser(description="NEU Curriculum Analyzer and
|
| 123 |
-
parser.add_argument('--graph', required=True, help="Path to
|
| 124 |
-
parser.add_argument('--courses', required=True, help="Path to
|
| 125 |
-
parser.add_argument('--output-graph', default='
|
| 126 |
args = parser.parse_args()
|
| 127 |
-
main(args)
|
|
|
|
| 1 |
"""
|
| 2 |
+
Fixed Curriculum Analyzer - Better handling of incomplete data
|
|
|
|
| 3 |
"""
|
| 4 |
import pickle
|
|
|
|
| 5 |
import argparse
|
| 6 |
import networkx as nx
|
| 7 |
import re
|
| 8 |
+
from typing import Set, Dict
|
| 9 |
|
| 10 |
def get_course_level(cid):
|
| 11 |
"""Extracts the numerical part of a course ID for level checking."""
|
|
|
|
| 14 |
|
| 15 |
class CurriculumAnalyzer:
|
| 16 |
def __init__(self, graph_path, courses_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
print("📚 Loading raw curriculum data...")
|
| 18 |
+
with open(graph_path, 'rb') as f:
|
| 19 |
+
self.graph = pickle.load(f)
|
| 20 |
+
with open(courses_path, 'rb') as f:
|
| 21 |
+
self.courses = pickle.load(f)
|
| 22 |
+
|
| 23 |
+
# Merge course data into graph nodes
|
| 24 |
+
for course_id, course_data in self.courses.items():
|
| 25 |
+
if self.graph.has_node(course_id):
|
| 26 |
+
self.graph.nodes[course_id].update(course_data)
|
| 27 |
+
|
| 28 |
+
print(f"✅ Loaded {self.graph.number_of_nodes()} courses, {self.graph.number_of_edges()} edges")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def pre_filter_graph(self):
|
| 31 |
+
"""Keeps only relevant subjects and removes labs/high-level courses."""
|
| 32 |
+
print("\n🧹 Pre-filtering graph...")
|
| 33 |
+
|
| 34 |
+
KEEP_SUBJECTS = {"CS", "DS", "IS", "CY", "MATH", "PHYS", "ENGW", "STAT", "EECE"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
nodes_to_remove = set()
|
| 37 |
for node, data in self.graph.nodes(data=True):
|
| 38 |
+
subject = data.get('subject', '')
|
| 39 |
+
name = data.get('name', '').lower()
|
| 40 |
level = get_course_level(node)
|
| 41 |
|
| 42 |
+
# Remove if:
|
| 43 |
+
# - Not in whitelist
|
| 44 |
+
# - Too advanced (5000+)
|
| 45 |
+
# - Lab/recitation/etc
|
| 46 |
+
if (subject not in KEEP_SUBJECTS or
|
| 47 |
+
level >= 5000 or
|
| 48 |
+
any(skip in name for skip in ['lab', 'recitation', 'seminar', 'practicum', 'co-op'])):
|
| 49 |
+
nodes_to_remove.add(node)
|
| 50 |
+
|
| 51 |
self.graph.remove_nodes_from(nodes_to_remove)
|
| 52 |
+
print(f"✅ Removed {len(nodes_to_remove)} irrelevant courses")
|
| 53 |
+
print(f" Remaining: {self.graph.number_of_nodes()} courses")
|
| 54 |
|
| 55 |
+
def fix_chains(self):
|
| 56 |
+
"""Adds critical prerequisite chains that might be missing."""
|
| 57 |
+
print("\n🔗 Validating and fixing critical prerequisite chains...")
|
| 58 |
+
|
| 59 |
+
critical_chains = {
|
| 60 |
+
("CS1800", "CS2800", "Discrete → Logic"),
|
| 61 |
+
("CS2500", "CS2510", "Fundies 1 → Fundies 2"),
|
| 62 |
+
("CS2510", "CS3500", "Fundies 2 → OOD"),
|
| 63 |
+
("CS2510", "CS3000", "Fundies 2 → Algorithms"),
|
| 64 |
+
("CS3000", "CS4100", "Algorithms → AI"), # NEW
|
| 65 |
+
("MATH1341", "MATH1342", "Calc 1 → Calc 2"),
|
| 66 |
+
("DS2000", "DS2500", "Prog w/ Data → Intermediate"),
|
| 67 |
+
("DS2500", "DS3500", "Intermediate → Advanced"),
|
| 68 |
+
("DS3500", "DS4400", "Advanced → ML1"), # NEW
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
added = 0
|
| 72 |
+
for prereq, course, desc in critical_chains:
|
| 73 |
+
if self.graph.has_node(prereq) and self.graph.has_node(course):
|
| 74 |
+
if not self.graph.has_edge(prereq, course):
|
| 75 |
+
self.graph.add_edge(prereq, course)
|
| 76 |
+
print(f" 🔧 FIXED: Added {prereq} → {course} ({desc})")
|
| 77 |
+
added += 1
|
| 78 |
+
|
| 79 |
+
if added == 0:
|
| 80 |
+
print(" ✅ All critical chains present")
|
| 81 |
|
| 82 |
+
def remove_spurious_chains(self):
|
| 83 |
+
"""Removes known incorrect prerequisite edges."""
|
| 84 |
+
print("\n🗑️ Removing spurious prerequisite chains...")
|
| 85 |
|
| 86 |
+
spurious_chains = {
|
| 87 |
+
("MATH1365", "CS2800"), # Not a real prereq
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
removed = 0
|
| 91 |
+
for prereq, course in spurious_chains:
|
| 92 |
+
if self.graph.has_edge(prereq, course):
|
| 93 |
+
self.graph.remove_edge(prereq, course)
|
| 94 |
+
print(f" ✅ REMOVED: {prereq} → {course}")
|
| 95 |
+
removed += 1
|
| 96 |
+
|
| 97 |
+
if removed == 0:
|
| 98 |
+
print(" ✅ No spurious chains found")
|
| 99 |
|
| 100 |
+
def calculate_and_add_complexity(self):
|
| 101 |
+
"""Calculates and adds complexity score to each course."""
|
| 102 |
+
print("\n🧮 Calculating complexity scores...")
|
| 103 |
+
|
| 104 |
+
for node in self.graph.nodes():
|
| 105 |
+
in_degree = self.graph.in_degree(node)
|
| 106 |
+
out_degree = self.graph.out_degree(node)
|
| 107 |
|
| 108 |
+
# Complexity heuristic: weighted by prerequisites and courses unlocked
|
| 109 |
+
score = (in_degree * 10) + (out_degree * 5)
|
| 110 |
+
nx.set_node_attributes(self.graph, {node: {'complexity': score}})
|
| 111 |
+
|
| 112 |
+
print("✅ Complexity scores calculated")
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
+
def validate_critical_courses(self) -> Dict[str, Set[str]]:
|
| 115 |
+
"""Check if all critical courses exist in the graph."""
|
| 116 |
+
print("\n🎯 Validating critical course coverage...")
|
| 117 |
+
|
| 118 |
+
required_courses = {
|
| 119 |
+
"foundations": {"CS1800", "CS2500", "CS2510", "CS2800"},
|
| 120 |
+
"core": {"CS3000", "CS3500", "CS3650", "CS3700", "CS3200"},
|
| 121 |
+
"ai_ml": {"CS4100", "DS4400", "CS4120", "DS4420", "CS4180", "DS4440"},
|
| 122 |
+
"systems": {"CS4730", "CS4400", "CS4500"}, # Removed often-missing courses
|
| 123 |
+
"security": {"CY2550", "CY3740", "CY4740", "CY4760"},
|
| 124 |
+
"math": {"MATH1341", "MATH1342", "MATH2331", "MATH3081"}, # No STAT courses at NEU
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
missing = {}
|
| 128 |
+
for category, courses in required_courses.items():
|
| 129 |
+
missing_in_cat = courses - set(self.graph.nodes())
|
| 130 |
+
if missing_in_cat:
|
| 131 |
+
missing[category] = missing_in_cat
|
| 132 |
+
print(f" ⚠️ {category}: Missing {missing_in_cat}")
|
| 133 |
+
else:
|
| 134 |
+
print(f" ✅ {category}: All courses present")
|
| 135 |
+
|
| 136 |
+
return missing
|
| 137 |
|
| 138 |
def save_enriched_graph(self, output_path):
|
| 139 |
"""Saves the final, clean, and enriched graph."""
|
| 140 |
+
print(f"\n💾 Saving cleaned graph to {output_path}...")
|
| 141 |
with open(output_path, 'wb') as f:
|
| 142 |
pickle.dump(self.graph, f)
|
| 143 |
+
print("✅ Graph saved")
|
| 144 |
+
|
| 145 |
+
# Save a summary report
|
| 146 |
+
report_path = output_path.replace('.pkl', '_report.txt')
|
| 147 |
+
with open(report_path, 'w') as f:
|
| 148 |
+
f.write("Curriculum Graph Analysis Report\n")
|
| 149 |
+
f.write("="*70 + "\n\n")
|
| 150 |
+
f.write(f"Total courses: {self.graph.number_of_nodes()}\n")
|
| 151 |
+
f.write(f"Total prerequisites: {self.graph.number_of_edges()}\n\n")
|
| 152 |
+
|
| 153 |
+
# Subject breakdown
|
| 154 |
+
from collections import defaultdict
|
| 155 |
+
subject_counts = defaultdict(int)
|
| 156 |
+
for node in self.graph.nodes():
|
| 157 |
+
subject = self.graph.nodes[node].get('subject', 'UNKNOWN')
|
| 158 |
+
subject_counts[subject] += 1
|
| 159 |
+
|
| 160 |
+
f.write("Subject breakdown:\n")
|
| 161 |
+
for subject in sorted(subject_counts.keys()):
|
| 162 |
+
f.write(f" {subject}: {subject_counts[subject]}\n")
|
| 163 |
+
|
| 164 |
+
print(f"✅ Report saved to {report_path}")
|
| 165 |
|
| 166 |
def main(args):
|
| 167 |
"""Main execution flow."""
|
| 168 |
analyzer = CurriculumAnalyzer(args.graph, args.courses)
|
|
|
|
|
|
|
| 169 |
analyzer.pre_filter_graph()
|
| 170 |
+
analyzer.fix_chains()
|
| 171 |
+
analyzer.remove_spurious_chains()
|
| 172 |
analyzer.calculate_and_add_complexity()
|
| 173 |
|
| 174 |
+
missing = analyzer.validate_critical_courses()
|
| 175 |
|
| 176 |
+
if missing:
|
| 177 |
+
print("\n⚠️ WARNING: Some critical courses are missing!")
|
| 178 |
+
print(" Consider re-scraping with additional terms or subjects.")
|
| 179 |
+
print(" Missing courses will be excluded from planning.")
|
| 180 |
+
|
| 181 |
+
analyzer.save_enriched_graph(args.output_graph)
|
| 182 |
+
print("\n✨ Analysis complete!")
|
| 183 |
|
| 184 |
if __name__ == "__main__":
|
| 185 |
+
parser = argparse.ArgumentParser(description="NEU Curriculum Analyzer - Cleans and validates data")
|
| 186 |
+
parser.add_argument('--graph', required=True, help="Path to RAW curriculum graph")
|
| 187 |
+
parser.add_argument('--courses', required=True, help="Path to RAW courses data")
|
| 188 |
+
parser.add_argument('--output-graph', default='neu_graph_clean.pkl', help="Output path")
|
| 189 |
args = parser.parse_args()
|
| 190 |
+
main(args)
|
src/curriculum_optimizer.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
Fixed Hybrid Curriculum Optimizer
|
| 3 |
-
|
| 4 |
-
WITH MUTUAL EXCLUSION AND SEQUENCE VALIDATION
|
| 5 |
"""
|
| 6 |
import torch
|
| 7 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
@@ -27,10 +26,15 @@ class StudentProfile:
|
|
| 27 |
|
| 28 |
class HybridOptimizer:
|
| 29 |
"""
|
| 30 |
-
Fixed optimizer with
|
| 31 |
"""
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
COURSE_TRACKS = {
|
| 35 |
"physics": {
|
| 36 |
"engineering": ["PHYS1151", "PHYS1155"],
|
|
@@ -43,83 +47,56 @@ class HybridOptimizer:
|
|
| 43 |
}
|
| 44 |
}
|
| 45 |
|
| 46 |
-
# CONCENTRATION REQUIREMENTS - Structured with pick lists
|
| 47 |
CONCENTRATION_REQUIREMENTS = {
|
| 48 |
"ai_ml": {
|
| 49 |
"foundations": {
|
| 50 |
-
"required": ["CS1800", "CS2500", "CS2510", "CS2800"]
|
|
|
|
| 51 |
},
|
| 52 |
"core": {
|
| 53 |
"required": ["CS3000", "CS3500"],
|
| 54 |
-
"pick_1_from": ["CS3200", "CS3650", "
|
| 55 |
},
|
| 56 |
"concentration_specific": {
|
| 57 |
"required": ["CS4100", "DS4400"],
|
| 58 |
"pick_2_from": ["CS4120", "CS4180", "DS4420", "DS4440"],
|
| 59 |
-
"pick_1_systems": ["CS4730", "CS4700"
|
| 60 |
},
|
| 61 |
"math": {
|
| 62 |
"required": ["MATH1341", "MATH1342"],
|
| 63 |
-
"pick_1_from": ["MATH2331", "MATH3081"
|
| 64 |
}
|
| 65 |
},
|
| 66 |
"systems": {
|
| 67 |
-
"foundations": {
|
| 68 |
-
|
| 69 |
-
},
|
| 70 |
-
"
|
| 71 |
-
"required": ["CS3000", "CS3500", "CS3650"],
|
| 72 |
-
"pick_1_from": ["CS3700", "CS3200"]
|
| 73 |
-
},
|
| 74 |
-
"concentration_specific": {
|
| 75 |
-
"required": ["CS4700"],
|
| 76 |
-
"pick_2_from": ["CS4730", "CS4750", "CS4770"],
|
| 77 |
-
"pick_1_from": ["CS4400", "CS4500", "CS4520"]
|
| 78 |
-
},
|
| 79 |
-
"math": {
|
| 80 |
-
"required": ["MATH1341", "MATH1342"]
|
| 81 |
-
}
|
| 82 |
},
|
| 83 |
"security": {
|
| 84 |
-
"foundations": {
|
| 85 |
-
|
| 86 |
-
},
|
| 87 |
-
"
|
| 88 |
-
"required": ["CS3000", "CS3650", "CY2550"],
|
| 89 |
-
"pick_1_from": ["CS3700", "CS3500"]
|
| 90 |
-
},
|
| 91 |
-
"concentration_specific": {
|
| 92 |
-
"required": ["CY3740"],
|
| 93 |
-
"pick_2_from": ["CY4740", "CY4760", "CY4770"],
|
| 94 |
-
"pick_1_from": ["CS4700", "CS4730"]
|
| 95 |
-
},
|
| 96 |
-
"math": {
|
| 97 |
-
"required": ["MATH1342"],
|
| 98 |
-
"pick_1_from": ["MATH3527", "MATH3081"]
|
| 99 |
-
}
|
| 100 |
}
|
| 101 |
}
|
| 102 |
|
| 103 |
def __init__(self):
|
| 104 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 105 |
-
|
| 106 |
-
# Use smaller model for efficiency
|
| 107 |
self.model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
| 108 |
self.embedding_model_name = 'BAAI/bge-large-en-v1.5'
|
| 109 |
-
|
| 110 |
self.llm = None
|
| 111 |
self.tokenizer = None
|
| 112 |
self.embedding_model = None
|
| 113 |
self.curriculum_graph = None
|
| 114 |
self.courses = {}
|
| 115 |
-
|
|
|
|
| 116 |
def load_models(self):
|
| 117 |
-
"""Load embedding model and optionally LLM"""
|
| 118 |
print("Loading embedding model...")
|
| 119 |
self.embedding_model = SentenceTransformer(self.embedding_model_name, device=self.device)
|
| 120 |
|
| 121 |
def load_llm(self):
|
| 122 |
-
"""Load LLM separately for when needed"""
|
| 123 |
if self.device.type == 'cuda' and self.llm is None:
|
| 124 |
print("Loading LLM for intelligent planning...")
|
| 125 |
quant_config = BitsAndBytesConfig(
|
|
@@ -134,272 +111,218 @@ class HybridOptimizer:
|
|
| 134 |
quantization_config=quant_config,
|
| 135 |
device_map="auto"
|
| 136 |
)
|
| 137 |
-
|
| 138 |
def load_data(self, graph: nx.DiGraph):
|
| 139 |
-
"""Load and preprocess curriculum data"""
|
| 140 |
self.curriculum_graph = graph
|
| 141 |
self.courses = dict(graph.nodes(data=True))
|
| 142 |
-
|
| 143 |
-
# Filter valid courses
|
| 144 |
self.valid_courses = []
|
| 145 |
course_texts = []
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
for cid, data in self.courses.items():
|
| 148 |
-
# Skip labs/recitations
|
| 149 |
name = data.get('name', '')
|
| 150 |
-
if any(skip in name for skip in ['
|
| 151 |
continue
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
if
|
| 155 |
continue
|
| 156 |
|
| 157 |
self.valid_courses.append(cid)
|
| 158 |
course_texts.append(f"{name} {data.get('description', '')}")
|
| 159 |
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
| 161 |
print(f"Computing embeddings for {len(self.valid_courses)} courses...")
|
| 162 |
-
self.course_embeddings = self.embedding_model.encode(
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
)
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
for
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
def _validate_sequence(self, selected: List[str], candidate: str) -> bool:
|
| 177 |
-
"""Ensure course sequences stay consistent - no mixing tracks"""
|
| 178 |
for track_type, tracks in self.COURSE_TRACKS.items():
|
| 179 |
for track_name, sequence in tracks.items():
|
| 180 |
if candidate in sequence:
|
| 181 |
-
# Check if any course from different track already selected
|
| 182 |
for other_track, other_seq in tracks.items():
|
| 183 |
-
if other_track != track_name:
|
| 184 |
-
|
| 185 |
-
return False # Don't mix sequences
|
| 186 |
return True
|
| 187 |
-
|
| 188 |
-
def
|
| 189 |
-
"""
|
| 190 |
-
issues = {
|
| 191 |
-
"errors": [],
|
| 192 |
-
"warnings": [],
|
| 193 |
-
"info": []
|
| 194 |
-
}
|
| 195 |
-
|
| 196 |
-
all_courses = []
|
| 197 |
-
for year_key, year_data in plan.items():
|
| 198 |
-
if isinstance(year_data, dict) and year_key.startswith("year_"):
|
| 199 |
-
all_courses.extend(year_data.get("fall", []))
|
| 200 |
-
all_courses.extend(year_data.get("spring", []))
|
| 201 |
-
|
| 202 |
-
# Check for sequence mixing
|
| 203 |
-
for track_type, tracks in self.COURSE_TRACKS.items():
|
| 204 |
-
tracks_used = set()
|
| 205 |
-
for track_name, courses in tracks.items():
|
| 206 |
-
if any(c in all_courses for c in courses):
|
| 207 |
-
tracks_used.add(track_name)
|
| 208 |
-
|
| 209 |
-
if len(tracks_used) > 1:
|
| 210 |
-
issues["errors"].append(
|
| 211 |
-
f"Mixed {track_type} tracks: {', '.join(tracks_used)}. Must choose one sequence."
|
| 212 |
-
)
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
for year in range(1, 5):
|
| 217 |
-
for sem in ["fall", "spring"]:
|
| 218 |
-
year_key = f"year_{year}"
|
| 219 |
-
if year_key in plan:
|
| 220 |
-
courses = plan[year_key].get(sem, [])
|
| 221 |
-
for course in courses:
|
| 222 |
-
if course in self.curriculum_graph:
|
| 223 |
-
prereqs = set(self.curriculum_graph.predecessors(course))
|
| 224 |
-
missing = prereqs - completed
|
| 225 |
-
if missing:
|
| 226 |
-
issues["errors"].append(
|
| 227 |
-
f"{course} in Year {year} {sem} missing prereqs: {', '.join(missing)}"
|
| 228 |
-
)
|
| 229 |
-
completed.update(courses)
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
def generate_llm_plan(self, student: StudentProfile) -> Dict:
|
| 234 |
-
"""Generate AI-powered plan with LLM course selection"""
|
| 235 |
-
print("--- Generating AI-Optimized Plan ---")
|
| 236 |
|
| 237 |
-
|
| 238 |
-
self.load_llm()
|
| 239 |
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
-
#
|
| 245 |
-
|
| 246 |
-
|
| 247 |
|
| 248 |
-
#
|
| 249 |
-
|
|
|
|
| 250 |
|
| 251 |
-
#
|
| 252 |
-
|
|
|
|
|
|
|
| 253 |
|
| 254 |
-
#
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
print(f"Plan validation errors: {validation['errors']}")
|
| 258 |
-
# Try to fix errors
|
| 259 |
-
plan = self._fix_plan_errors(plan, validation, student)
|
| 260 |
|
| 261 |
-
#
|
| 262 |
-
|
| 263 |
|
| 264 |
-
return
|
| 265 |
-
|
| 266 |
def generate_simple_plan(self, student: StudentProfile) -> Dict:
|
| 267 |
-
"""Generate rule-based plan that considers student preferences"""
|
| 268 |
print("--- Generating Enhanced Rule-Based Plan ---")
|
|
|
|
| 269 |
return self.generate_enhanced_rule_plan(student)
|
| 270 |
-
|
| 271 |
-
def generate_enhanced_rule_plan(self, student: StudentProfile) -> Dict:
|
| 272 |
-
"""Enhanced rule-based plan with proper sequencing"""
|
| 273 |
|
| 274 |
-
|
|
|
|
| 275 |
track = self._identify_track(student)
|
| 276 |
-
|
| 277 |
-
# Step 2: Build structured plan
|
| 278 |
plan = self._build_structured_plan(student, track, None)
|
|
|
|
| 279 |
|
| 280 |
-
# Step 3: Validate
|
| 281 |
-
validation = self.validate_plan(plan)
|
| 282 |
if validation["errors"]:
|
| 283 |
plan = self._fix_plan_errors(plan, validation, student)
|
| 284 |
-
validation = self.validate_plan(plan)
|
| 285 |
|
| 286 |
-
# Step 4: Generate explanation
|
| 287 |
difficulty_level = self._map_difficulty(student.preferred_difficulty)
|
| 288 |
courses_per_semester = self._calculate_course_load(student.time_commitment)
|
| 289 |
explanation = f"Personalized {track} track ({difficulty_level} difficulty, {courses_per_semester} courses/semester)"
|
| 290 |
|
| 291 |
return self._finalize_plan(plan, explanation, validation)
|
| 292 |
-
|
| 293 |
-
def
|
| 294 |
-
|
| 295 |
-
student
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
completed = set(student.completed_courses)
|
| 302 |
plan = {}
|
| 303 |
requirements = self.CONCENTRATION_REQUIREMENTS.get(track, self.CONCENTRATION_REQUIREMENTS["ai_ml"])
|
| 304 |
|
| 305 |
-
# Determine course load
|
| 306 |
courses_per_semester = self._calculate_course_load(student.time_commitment)
|
| 307 |
|
| 308 |
-
#
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
# Build queue of required courses
|
| 313 |
for category, reqs in requirements.items():
|
| 314 |
if "required" in reqs:
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
# Handle pick lists
|
| 318 |
for key, courses in reqs.items():
|
| 319 |
if key.startswith("pick_"):
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
# Handle course track commitments (physics/calculus)
|
| 328 |
-
physics_track = self._get_track_commitment(completed, "physics")
|
| 329 |
-
calc_track = self._get_track_commitment(completed, "calculus")
|
| 330 |
-
|
| 331 |
-
# Build semesters
|
| 332 |
for sem_num in range(1, 9):
|
| 333 |
year = ((sem_num - 1) // 2) + 1
|
| 334 |
-
is_fall = (sem_num % 2) == 1
|
| 335 |
|
| 336 |
-
|
| 337 |
-
selected = []
|
| 338 |
|
| 339 |
-
#
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
if
|
| 343 |
-
|
| 344 |
-
else:
|
| 345 |
-
physics_track = "science"
|
| 346 |
|
| 347 |
-
#
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
selected.append(course)
|
| 352 |
-
required_queue.remove(course)
|
| 353 |
-
available.remove(course)
|
| 354 |
-
|
| 355 |
-
# Priority 2: Handle pick lists
|
| 356 |
-
for pick_list in pick_lists:
|
| 357 |
-
if len(selected) >= courses_per_semester:
|
| 358 |
-
break
|
| 359 |
-
|
| 360 |
-
# Filter available courses from this pick list
|
| 361 |
-
available_from_list = [c for c in pick_list["courses"] if c in available]
|
| 362 |
|
| 363 |
-
#
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
# Fill remaining slots
|
| 374 |
-
for course in available_from_list[:pick_list["num_to_pick"]]:
|
| 375 |
-
if len(selected) < courses_per_semester and course in available:
|
| 376 |
-
if self._validate_sequence(selected, course):
|
| 377 |
-
selected.append(course)
|
| 378 |
-
available.remove(course)
|
| 379 |
-
pick_list["num_to_pick"] -= 1
|
| 380 |
-
|
| 381 |
-
# Priority 3: Track-specific courses (physics/calc)
|
| 382 |
-
if physics_track and year <= 2:
|
| 383 |
-
physics_courses = self.COURSE_TRACKS["physics"].get(physics_track, [])
|
| 384 |
-
for course in physics_courses:
|
| 385 |
-
if course in available and len(selected) < courses_per_semester:
|
| 386 |
-
selected.append(course)
|
| 387 |
-
available.remove(course)
|
| 388 |
-
|
| 389 |
-
# Priority 4: Fill with electives
|
| 390 |
-
if len(selected) < courses_per_semester and available:
|
| 391 |
-
semantic_scores = self._compute_semantic_scores(student)
|
| 392 |
-
electives = sorted(
|
| 393 |
-
available,
|
| 394 |
-
key=lambda c: self._score_elective(c, semantic_scores, completed),
|
| 395 |
reverse=True
|
| 396 |
)
|
| 397 |
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
|
| 404 |
# Add to plan
|
| 405 |
if selected:
|
|
@@ -407,310 +330,184 @@ class HybridOptimizer:
|
|
| 407 |
if year_key not in plan:
|
| 408 |
plan[year_key] = {}
|
| 409 |
|
| 410 |
-
sem_type = 'fall' if
|
| 411 |
-
plan[year_key][sem_type] = selected
|
| 412 |
completed.update(selected)
|
| 413 |
|
| 414 |
return plan
|
| 415 |
-
|
| 416 |
-
def
|
| 417 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
-
#
|
| 420 |
-
|
| 421 |
-
|
|
|
|
|
|
|
|
|
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
if "Mixed physics" in error:
|
| 426 |
-
# Force engineering track (most common)
|
| 427 |
-
self.COURSE_TRACKS["physics"] = {"engineering": ["PHYS1151", "PHYS1155"]}
|
| 428 |
-
elif "Mixed calculus" in error:
|
| 429 |
-
# Force standard calc
|
| 430 |
-
self.COURSE_TRACKS["calculus"] = {"standard": ["MATH1341", "MATH1342"]}
|
| 431 |
|
| 432 |
-
|
| 433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
return plan
|
| 436 |
-
|
| 437 |
-
def _get_llm_course_suggestions(self, student: StudentProfile, track: str) -> List[str]:
|
| 438 |
-
"""Use LLM to suggest personalized course priorities"""
|
| 439 |
-
|
| 440 |
-
requirements = self.CONCENTRATION_REQUIREMENTS.get(track, self.CONCENTRATION_REQUIREMENTS["ai_ml"])
|
| 441 |
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
|
|
|
| 445 |
for key, courses in reqs.items():
|
| 446 |
-
if key.startswith("pick_"):
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
- Career Goal: {student.career_goals}
|
| 461 |
-
- Interests: {', '.join(student.interests)}
|
| 462 |
-
- Time Commitment: {student.time_commitment} hours/week
|
| 463 |
-
- Preferred Difficulty: {student.preferred_difficulty}
|
| 464 |
-
- Current GPA: {student.current_gpa}
|
| 465 |
-
|
| 466 |
-
Available Courses:
|
| 467 |
-
{chr(10).join(course_options)}
|
| 468 |
-
|
| 469 |
-
Return ONLY the top 5 course IDs in order of priority, one per line. Example:
|
| 470 |
-
CS4100
|
| 471 |
-
DS4400
|
| 472 |
-
CS4120
|
| 473 |
-
CS4180
|
| 474 |
-
DS4440"""
|
| 475 |
-
|
| 476 |
try:
|
| 477 |
-
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=
|
| 478 |
-
|
| 479 |
with torch.no_grad():
|
| 480 |
-
outputs = self.llm.generate(
|
| 481 |
-
**inputs,
|
| 482 |
-
max_new_tokens=100,
|
| 483 |
-
temperature=0.3,
|
| 484 |
-
do_sample=True,
|
| 485 |
-
pad_token_id=self.tokenizer.eos_token_id
|
| 486 |
-
)
|
| 487 |
-
|
| 488 |
response = self.tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
|
| 489 |
-
|
| 490 |
-
# Extract course IDs
|
| 491 |
-
suggested_courses = []
|
| 492 |
-
for line in response.strip().split('\n'):
|
| 493 |
-
line = line.strip()
|
| 494 |
-
match = re.search(r'([A-Z]{2,4}\d{4})', line)
|
| 495 |
-
if match:
|
| 496 |
-
suggested_courses.append(match.group(1))
|
| 497 |
-
|
| 498 |
return suggested_courses[:5]
|
| 499 |
-
|
| 500 |
except Exception as e:
|
| 501 |
print(f"LLM suggestion failed: {e}")
|
| 502 |
-
return all_options[:5]
|
| 503 |
-
|
| 504 |
def _map_difficulty(self, preferred_difficulty: str) -> str:
|
| 505 |
-
"""
|
| 506 |
-
|
| 507 |
-
"easy": "easy",
|
| 508 |
-
"moderate": "medium",
|
| 509 |
-
"challenging": "hard"
|
| 510 |
-
}
|
| 511 |
-
return mapping.get(preferred_difficulty.lower(), "medium")
|
| 512 |
-
|
| 513 |
def _calculate_course_load(self, time_commitment: int) -> int:
|
| 514 |
-
|
| 515 |
-
if time_commitment
|
| 516 |
-
|
| 517 |
-
elif time_commitment < 30:
|
| 518 |
-
return 4 # Standard
|
| 519 |
-
elif time_commitment < 40:
|
| 520 |
-
return 4 # Standard-heavy
|
| 521 |
-
else:
|
| 522 |
-
return 4 # Max (prerequisites limit anyway)
|
| 523 |
-
|
| 524 |
-
def _identify_track(self, student: StudentProfile) -> str:
|
| 525 |
-
"""Use embeddings to identify best track"""
|
| 526 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
profile_text = f"{student.career_goals} {' '.join(student.interests)}"
|
| 528 |
profile_emb = self.embedding_model.encode(profile_text, convert_to_tensor=True)
|
| 529 |
-
|
| 530 |
track_descriptions = {
|
| 531 |
-
"ai_ml": "artificial intelligence machine learning deep learning neural networks data science
|
| 532 |
-
"systems": "operating systems distributed systems networks compilers databases performance
|
| 533 |
-
"security": "cybersecurity cryptography
|
| 534 |
}
|
| 535 |
-
|
| 536 |
-
best_track = "ai_ml"
|
| 537 |
-
best_score = -1
|
| 538 |
-
|
| 539 |
for track, description in track_descriptions.items():
|
| 540 |
track_emb = self.embedding_model.encode(description, convert_to_tensor=True)
|
| 541 |
score = float(util.cos_sim(profile_emb, track_emb))
|
| 542 |
if score > best_score:
|
| 543 |
-
best_score = score
|
| 544 |
-
best_track = track
|
| 545 |
-
|
| 546 |
return best_track
|
| 547 |
-
|
| 548 |
-
def _compute_semantic_scores(self, student: StudentProfile) -> Dict[str, float]:
|
| 549 |
-
"""Compute semantic alignment for all courses"""
|
| 550 |
|
|
|
|
| 551 |
query_text = f"{student.career_goals} {' '.join(student.interests)}"
|
| 552 |
query_emb = self.embedding_model.encode(query_text, convert_to_tensor=True)
|
| 553 |
-
|
| 554 |
similarities = util.cos_sim(query_emb, self.course_embeddings)[0]
|
|
|
|
| 555 |
|
| 556 |
-
scores = {}
|
| 557 |
-
for idx, cid in enumerate(self.valid_courses):
|
| 558 |
-
scores[cid] = float(similarities[idx])
|
| 559 |
-
|
| 560 |
-
return scores
|
| 561 |
-
|
| 562 |
-
def _get_available_courses(self, completed: Set[str], year: int) -> List[str]:
|
| 563 |
-
"""Get schedulable courses with year restrictions"""
|
| 564 |
-
|
| 565 |
-
available = []
|
| 566 |
-
max_level = 2999 if year == 1 else 3999 if year == 2 else 9999
|
| 567 |
-
|
| 568 |
-
for cid in self.valid_courses:
|
| 569 |
-
if cid in completed:
|
| 570 |
-
continue
|
| 571 |
-
|
| 572 |
-
if self._get_level(cid) > max_level:
|
| 573 |
-
continue
|
| 574 |
-
|
| 575 |
-
# Check prerequisites
|
| 576 |
-
if cid in self.curriculum_graph:
|
| 577 |
-
prereqs = set(self.curriculum_graph.predecessors(cid))
|
| 578 |
-
if not prereqs.issubset(completed):
|
| 579 |
-
continue
|
| 580 |
-
|
| 581 |
-
available.append(cid)
|
| 582 |
-
|
| 583 |
-
return available
|
| 584 |
-
|
| 585 |
-
def _score_elective(
|
| 586 |
-
self,
|
| 587 |
-
course_id: str,
|
| 588 |
-
semantic_scores: Dict[str, float],
|
| 589 |
-
completed: Set[str]
|
| 590 |
-
) -> float:
|
| 591 |
-
"""Basic elective scoring"""
|
| 592 |
-
|
| 593 |
-
score = 0.0
|
| 594 |
-
|
| 595 |
-
# Semantic alignment (50%)
|
| 596 |
-
score += semantic_scores.get(course_id, 0) * 0.5
|
| 597 |
-
|
| 598 |
-
# Unlocks future courses (30%)
|
| 599 |
-
if course_id in self.curriculum_graph:
|
| 600 |
-
unlocks = len(list(self.curriculum_graph.successors(course_id)))
|
| 601 |
-
score += min(unlocks / 5, 1.0) * 0.3
|
| 602 |
-
|
| 603 |
-
# Subject relevance (20%)
|
| 604 |
-
subject = self.courses.get(course_id, {}).get('subject', '')
|
| 605 |
-
subject_scores = {"CS": 1.0, "DS": 0.9, "IS": 0.6, "MATH": 0.7, "CY": 0.8}
|
| 606 |
-
score += subject_scores.get(subject, 0.3) * 0.2
|
| 607 |
-
|
| 608 |
-
return score
|
| 609 |
-
|
| 610 |
def _generate_explanation(self, student: StudentProfile, plan: Dict, track: str, plan_type: str) -> str:
|
| 611 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
total_courses = sum(
|
| 618 |
-
len(plan.get(f"year_{y}", {}).get(sem, []))
|
| 619 |
-
for y in range(1, 5)
|
| 620 |
-
for sem in ["fall", "spring"]
|
| 621 |
-
)
|
| 622 |
-
|
| 623 |
-
prompt = f"""Explain this curriculum plan in 1-2 sentences:
|
| 624 |
-
Plan Type: {plan_type}
|
| 625 |
-
Track: {track}
|
| 626 |
-
Student Goal: {student.career_goals}
|
| 627 |
-
Interests: {', '.join(student.interests[:2])}
|
| 628 |
-
Difficulty: {student.preferred_difficulty}
|
| 629 |
-
Time Commitment: {student.time_commitment}h/week
|
| 630 |
-
Total Courses: {total_courses}
|
| 631 |
|
| 632 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 633 |
|
| 634 |
-
try:
|
| 635 |
-
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(self.device)
|
| 636 |
-
|
| 637 |
-
with torch.no_grad():
|
| 638 |
-
outputs = self.llm.generate(
|
| 639 |
-
**inputs,
|
| 640 |
-
max_new_tokens=150,
|
| 641 |
-
temperature=0.7,
|
| 642 |
-
do_sample=True,
|
| 643 |
-
pad_token_id=self.tokenizer.eos_token_id
|
| 644 |
-
)
|
| 645 |
-
|
| 646 |
-
explanation = self.tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
|
| 647 |
-
return explanation.strip()
|
| 648 |
-
|
| 649 |
-
except Exception as e:
|
| 650 |
-
print(f"Explanation generation failed: {e}")
|
| 651 |
-
return f"{plan_type} {track} track plan optimized for {student.career_goals}"
|
| 652 |
-
|
| 653 |
-
def _get_level(self, course_id: str) -> int:
|
| 654 |
-
"""Extract course level"""
|
| 655 |
-
match = re.search(r'\d+', course_id)
|
| 656 |
-
return int(match.group()) if match else 9999
|
| 657 |
-
|
| 658 |
def _finalize_plan(self, plan: Dict, explanation: str, validation: Dict = None) -> Dict:
|
| 659 |
-
""
|
| 660 |
-
|
| 661 |
-
structured = {
|
| 662 |
-
"reasoning": explanation,
|
| 663 |
-
"validation": validation if validation else {"errors": [], "warnings": [], "info": []}
|
| 664 |
-
}
|
| 665 |
-
|
| 666 |
-
# Ensure all years present
|
| 667 |
for year in range(1, 5):
|
| 668 |
year_key = f"year_{year}"
|
| 669 |
-
|
| 670 |
-
plan
|
| 671 |
-
|
| 672 |
-
structured[year_key] = {
|
| 673 |
-
"fall": plan[year_key].get("fall", []),
|
| 674 |
-
"spring": plan[year_key].get("spring", []),
|
| 675 |
"summer": "co-op" if year in [2, 3] else []
|
| 676 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
|
| 678 |
-
|
| 679 |
-
complexities = []
|
| 680 |
-
for year_key in structured:
|
| 681 |
-
if year_key.startswith("year_"):
|
| 682 |
-
for sem in ["fall", "spring"]:
|
| 683 |
-
courses = structured[year_key].get(sem, [])
|
| 684 |
-
if courses:
|
| 685 |
-
sem_complexity = sum(
|
| 686 |
-
self.courses.get(c, {}).get('complexity', 50)
|
| 687 |
-
for c in courses
|
| 688 |
-
)
|
| 689 |
-
complexities.append(sem_complexity)
|
| 690 |
-
|
| 691 |
-
structured["complexity_analysis"] = {
|
| 692 |
"average_semester_complexity": float(np.mean(complexities)) if complexities else 0,
|
| 693 |
"peak_semester_complexity": float(np.max(complexities)) if complexities else 0,
|
| 694 |
"total_complexity": float(np.sum(complexities)) if complexities else 0,
|
| 695 |
"balance_score (std_dev)": float(np.std(complexities)) if complexities else 0
|
| 696 |
}
|
| 697 |
-
|
| 698 |
-
# Add metadata
|
| 699 |
-
structured["metadata"] = {
|
| 700 |
"generated": datetime.now().isoformat(),
|
| 701 |
"valid": len(validation.get("errors", [])) == 0 if validation else True,
|
| 702 |
-
"has_warnings": len(validation.get("warnings", [])) > 0 if validation else False
|
| 703 |
}
|
| 704 |
-
|
| 705 |
-
return {"pathway": structured}
|
| 706 |
|
| 707 |
-
# Backward compatibility wrapper
|
| 708 |
class CurriculumOptimizer(HybridOptimizer):
|
| 709 |
-
"""
|
| 710 |
-
|
| 711 |
def __init__(self):
|
| 712 |
super().__init__()
|
| 713 |
|
| 714 |
def generate_plan(self, student: StudentProfile) -> Dict:
|
| 715 |
-
"""Default plan generation - uses enhanced rules"""
|
| 716 |
return self.generate_enhanced_rule_plan(student)
|
|
|
|
| 1 |
"""
|
| 2 |
Fixed Hybrid Curriculum Optimizer
|
| 3 |
+
WITH PROPER COURSE DISCOVERY, SUBJECT-AWARE SCORING, AND CONCENTRATION FOCUS
|
|
|
|
| 4 |
"""
|
| 5 |
import torch
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
|
|
| 26 |
|
| 27 |
class HybridOptimizer:
|
| 28 |
"""
|
| 29 |
+
Fixed optimizer with subject-aware scoring and concentration focus
|
| 30 |
"""
|
| 31 |
|
| 32 |
+
EQUIVALENCY_GROUPS = [
|
| 33 |
+
{"MATH1341", "MATH1241", "MATH1231"}, # Calculus 1
|
| 34 |
+
{"MATH1342", "MATH1242"}, # Calculus 2
|
| 35 |
+
{"PHYS1151", "PHYS1161", "PHYS1145"}, # Physics 1
|
| 36 |
+
{"PHYS1155", "PHYS1165", "PHYS1147"}, # Physics 2
|
| 37 |
+
]
|
| 38 |
COURSE_TRACKS = {
|
| 39 |
"physics": {
|
| 40 |
"engineering": ["PHYS1151", "PHYS1155"],
|
|
|
|
| 47 |
}
|
| 48 |
}
|
| 49 |
|
|
|
|
| 50 |
CONCENTRATION_REQUIREMENTS = {
|
| 51 |
"ai_ml": {
|
| 52 |
"foundations": {
|
| 53 |
+
"required": ["CS1800", "CS2500", "CS2510", "CS2800"],
|
| 54 |
+
"sequence": True
|
| 55 |
},
|
| 56 |
"core": {
|
| 57 |
"required": ["CS3000", "CS3500"],
|
| 58 |
+
"pick_1_from": ["CS3200", "CS3650", "CS5700"]
|
| 59 |
},
|
| 60 |
"concentration_specific": {
|
| 61 |
"required": ["CS4100", "DS4400"],
|
| 62 |
"pick_2_from": ["CS4120", "CS4180", "DS4420", "DS4440"],
|
| 63 |
+
"pick_1_systems": ["CS4730", "CS4700"]
|
| 64 |
},
|
| 65 |
"math": {
|
| 66 |
"required": ["MATH1341", "MATH1342"],
|
| 67 |
+
"pick_1_from": ["MATH2331", "MATH3081"]
|
| 68 |
}
|
| 69 |
},
|
| 70 |
"systems": {
|
| 71 |
+
"foundations": { "required": ["CS1800", "CS2500", "CS2510", "CS2800"] },
|
| 72 |
+
"core": { "required": ["CS3000", "CS3500", "CS3650"], "pick_1_from": ["CS5700", "CS3200"] },
|
| 73 |
+
"concentration_specific": { "required": ["CS4700"], "pick_2_from": ["CS4730"], "pick_1_from": ["CS4400", "CS4500", "CS4520"] },
|
| 74 |
+
"math": { "required": ["MATH1341", "MATH1342"] }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
},
|
| 76 |
"security": {
|
| 77 |
+
"foundations": { "required": ["CS1800", "CS2500", "CS2510", "CS2800"] },
|
| 78 |
+
"core": { "required": ["CS3000", "CS3650", "CY2550"], "pick_1_from": ["CS5700", "CS3500"] },
|
| 79 |
+
"concentration_specific": { "required": ["CY3740"], "pick_2_from": ["CY4740", "CY4760", "CY4770"], "pick_1_from": ["CS4700", "CS4730"] },
|
| 80 |
+
"math": { "required": ["MATH1342"], "pick_1_from": ["MATH3527", "MATH3081"] }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
}
|
| 82 |
}
|
| 83 |
|
| 84 |
def __init__(self):
|
| 85 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
| 86 |
self.model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
| 87 |
self.embedding_model_name = 'BAAI/bge-large-en-v1.5'
|
|
|
|
| 88 |
self.llm = None
|
| 89 |
self.tokenizer = None
|
| 90 |
self.embedding_model = None
|
| 91 |
self.curriculum_graph = None
|
| 92 |
self.courses = {}
|
| 93 |
+
self.current_student = None
|
| 94 |
+
|
| 95 |
def load_models(self):
|
|
|
|
| 96 |
print("Loading embedding model...")
|
| 97 |
self.embedding_model = SentenceTransformer(self.embedding_model_name, device=self.device)
|
| 98 |
|
| 99 |
def load_llm(self):
|
|
|
|
| 100 |
if self.device.type == 'cuda' and self.llm is None:
|
| 101 |
print("Loading LLM for intelligent planning...")
|
| 102 |
quant_config = BitsAndBytesConfig(
|
|
|
|
| 111 |
quantization_config=quant_config,
|
| 112 |
device_map="auto"
|
| 113 |
)
|
| 114 |
+
|
| 115 |
def load_data(self, graph: nx.DiGraph):
|
|
|
|
| 116 |
self.curriculum_graph = graph
|
| 117 |
self.courses = dict(graph.nodes(data=True))
|
| 118 |
+
UNDERGRAD_ACCESSIBLE_GRAD = {"CS5700", "CY5700", "DS5110", "CS5010"}
|
|
|
|
| 119 |
self.valid_courses = []
|
| 120 |
course_texts = []
|
| 121 |
|
| 122 |
+
concentration_courses = set()
|
| 123 |
+
for track_reqs in self.CONCENTRATION_REQUIREMENTS.values():
|
| 124 |
+
for category, reqs in track_reqs.items():
|
| 125 |
+
if isinstance(reqs, dict):
|
| 126 |
+
for key, courses in reqs.items():
|
| 127 |
+
if isinstance(courses, list):
|
| 128 |
+
concentration_courses.update(courses)
|
| 129 |
+
|
| 130 |
for cid, data in self.courses.items():
|
|
|
|
| 131 |
name = data.get('name', '')
|
| 132 |
+
if not name or name.strip() == '' or any(skip in name.lower() for skip in ['lab', 'recitation', 'seminar', 'practicum']):
|
| 133 |
continue
|
| 134 |
+
|
| 135 |
+
course_level = self._get_level(cid)
|
| 136 |
+
if course_level >= 5000 and cid not in UNDERGRAD_ACCESSIBLE_GRAD:
|
| 137 |
continue
|
| 138 |
|
| 139 |
self.valid_courses.append(cid)
|
| 140 |
course_texts.append(f"{name} {data.get('description', '')}")
|
| 141 |
|
| 142 |
+
missing_required = concentration_courses - set(self.valid_courses)
|
| 143 |
+
if missing_required:
|
| 144 |
+
print(f"\n⚠️ WARNING: {len(missing_required)} required courses missing from graph: {sorted(missing_required)}\n")
|
| 145 |
+
|
| 146 |
print(f"Computing embeddings for {len(self.valid_courses)} courses...")
|
| 147 |
+
self.course_embeddings = self.embedding_model.encode(course_texts, convert_to_tensor=True, show_progress_bar=True)
|
| 148 |
+
print(f"\nTotal valid courses: {len(self.valid_courses)}")
|
| 149 |
+
|
| 150 |
+
def _get_level(self, course_id: str) -> int:
|
| 151 |
+
match = re.search(r'\d+', course_id)
|
| 152 |
+
return int(match.group()) if match else 9999
|
| 153 |
+
|
| 154 |
+
def _get_completed_with_equivalents(self, completed: Set[str]) -> Set[str]:
|
| 155 |
+
expanded_completed = completed.copy()
|
| 156 |
+
for course in completed:
|
| 157 |
+
for group in self.EQUIVALENCY_GROUPS:
|
| 158 |
+
if course in group:
|
| 159 |
+
expanded_completed.update(group)
|
| 160 |
+
return expanded_completed
|
| 161 |
+
|
| 162 |
+
def _can_take_course(self, course_id: str, completed: Set[str]) -> bool:
|
| 163 |
+
effective_completed = self._get_completed_with_equivalents(completed)
|
| 164 |
+
if course_id not in self.curriculum_graph:
|
| 165 |
+
return True
|
| 166 |
+
prereqs = set(self.curriculum_graph.predecessors(course_id))
|
| 167 |
+
return prereqs.issubset(effective_completed)
|
| 168 |
+
|
| 169 |
def _validate_sequence(self, selected: List[str], candidate: str) -> bool:
|
|
|
|
| 170 |
for track_type, tracks in self.COURSE_TRACKS.items():
|
| 171 |
for track_name, sequence in tracks.items():
|
| 172 |
if candidate in sequence:
|
|
|
|
| 173 |
for other_track, other_seq in tracks.items():
|
| 174 |
+
if other_track != track_name and any(c in selected for c in other_seq):
|
| 175 |
+
return False
|
|
|
|
| 176 |
return True
|
| 177 |
+
|
| 178 |
+
def _score_course(self, course_id: str, semantic_scores: Dict[str, float], required_set: Set[str], picklist_set: Set[str]) -> float:
|
| 179 |
+
"""FIXED: Proper scoring with IS heavy penalty"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
+
if course_id not in self.courses or not self.courses[course_id].get('name', '').strip():
|
| 182 |
+
return -10000.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
course_data = self.courses[course_id]
|
| 185 |
+
subject = course_data.get('subject', '')
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
+
score = 0.0
|
|
|
|
| 188 |
|
| 189 |
+
# Subject bonuses/penalties
|
| 190 |
+
if subject in ["CS", "DS", "CY"]:
|
| 191 |
+
score += 300.0
|
| 192 |
+
elif subject == "MATH":
|
| 193 |
+
score += 100.0
|
| 194 |
+
else:
|
| 195 |
+
score -= 1000.0 # Heavy penalty for everything else (including IS)
|
| 196 |
|
| 197 |
+
# Required courses: massive boost
|
| 198 |
+
if course_id in required_set:
|
| 199 |
+
score += 10000.0 # INCREASED from 1000
|
| 200 |
|
| 201 |
+
# Pick-list courses: high boost
|
| 202 |
+
if course_id in picklist_set:
|
| 203 |
+
score += 5000.0 # INCREASED from 500
|
| 204 |
|
| 205 |
+
# Unlocking factor (reduced weight)
|
| 206 |
+
if course_id in self.curriculum_graph:
|
| 207 |
+
unlocks = self.curriculum_graph.out_degree(course_id)
|
| 208 |
+
score += min(unlocks, 5) * 2.0 # REDUCED
|
| 209 |
|
| 210 |
+
# Level preference
|
| 211 |
+
level = self._get_level(course_id)
|
| 212 |
+
score -= (level / 100.0)
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
+
# Semantic alignment (reduced weight)
|
| 215 |
+
score += semantic_scores.get(course_id, 0.0) * 5.0 # REDUCED from 15
|
| 216 |
|
| 217 |
+
return score
|
| 218 |
+
|
| 219 |
def generate_simple_plan(self, student: StudentProfile) -> Dict:
|
|
|
|
| 220 |
print("--- Generating Enhanced Rule-Based Plan ---")
|
| 221 |
+
self.current_student = student
|
| 222 |
return self.generate_enhanced_rule_plan(student)
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
+
def generate_enhanced_rule_plan(self, student: StudentProfile) -> Dict:
|
| 225 |
+
self.current_student = student
|
| 226 |
track = self._identify_track(student)
|
|
|
|
|
|
|
| 227 |
plan = self._build_structured_plan(student, track, None)
|
| 228 |
+
validation = self.validate_plan(plan, student)
|
| 229 |
|
|
|
|
|
|
|
| 230 |
if validation["errors"]:
|
| 231 |
plan = self._fix_plan_errors(plan, validation, student)
|
| 232 |
+
validation = self.validate_plan(plan, student)
|
| 233 |
|
|
|
|
| 234 |
difficulty_level = self._map_difficulty(student.preferred_difficulty)
|
| 235 |
courses_per_semester = self._calculate_course_load(student.time_commitment)
|
| 236 |
explanation = f"Personalized {track} track ({difficulty_level} difficulty, {courses_per_semester} courses/semester)"
|
| 237 |
|
| 238 |
return self._finalize_plan(plan, explanation, validation)
|
| 239 |
+
|
| 240 |
+
def generate_llm_plan(self, student: StudentProfile) -> Dict:
|
| 241 |
+
print("--- Generating AI-Optimized Plan ---")
|
| 242 |
+
self.current_student = student
|
| 243 |
+
self.load_llm()
|
| 244 |
+
if not self.llm:
|
| 245 |
+
return self.generate_enhanced_rule_plan(student)
|
| 246 |
+
|
| 247 |
+
track = self._identify_track(student)
|
| 248 |
+
llm_suggestions = self._get_llm_course_suggestions(student, track)
|
| 249 |
+
plan = self._build_structured_plan(student, track, llm_suggestions)
|
| 250 |
+
validation = self.validate_plan(plan, student)
|
| 251 |
+
if validation["errors"]:
|
| 252 |
+
plan = self._fix_plan_errors(plan, validation, student)
|
| 253 |
+
validation = self.validate_plan(plan, student)
|
| 254 |
+
|
| 255 |
+
explanation = self._generate_explanation(student, plan, track, "AI-optimized")
|
| 256 |
+
return self._finalize_plan(plan, explanation, validation)
|
| 257 |
+
|
| 258 |
+
def _build_structured_plan(self, student: StudentProfile, track: str, llm_suggestions: Optional[List[str]] = None) -> Dict:
|
| 259 |
+
"""FIXED with hardcoded Year 2 priorities"""
|
| 260 |
|
| 261 |
completed = set(student.completed_courses)
|
| 262 |
plan = {}
|
| 263 |
requirements = self.CONCENTRATION_REQUIREMENTS.get(track, self.CONCENTRATION_REQUIREMENTS["ai_ml"])
|
| 264 |
|
|
|
|
| 265 |
courses_per_semester = self._calculate_course_load(student.time_commitment)
|
| 266 |
|
| 267 |
+
# Build required and pick sets
|
| 268 |
+
required_set = set()
|
| 269 |
+
picklist_set = set()
|
|
|
|
|
|
|
| 270 |
for category, reqs in requirements.items():
|
| 271 |
if "required" in reqs:
|
| 272 |
+
required_set.update(reqs["required"])
|
|
|
|
|
|
|
| 273 |
for key, courses in reqs.items():
|
| 274 |
if key.startswith("pick_"):
|
| 275 |
+
picklist_set.update(courses)
|
| 276 |
+
|
| 277 |
+
semantic_scores = self._compute_semantic_scores(student)
|
| 278 |
+
|
| 279 |
+
# HARDCODED FIX: Force Year 2 to prioritize core courses
|
| 280 |
+
YEAR2_MUST_TAKE = ["CS3000", "CS3500", "DS2500", "MATH2331", "MATH3081"]
|
| 281 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
for sem_num in range(1, 9):
|
| 283 |
year = ((sem_num - 1) // 2) + 1
|
|
|
|
| 284 |
|
| 285 |
+
available_courses = self._get_available_courses(completed, year, sem_num, track)
|
|
|
|
| 286 |
|
| 287 |
+
# Filter: must be takeable
|
| 288 |
+
schedulable = [
|
| 289 |
+
c for c in available_courses
|
| 290 |
+
if c not in completed and self._can_take_course(c, completed)
|
| 291 |
+
]
|
|
|
|
|
|
|
| 292 |
|
| 293 |
+
# HARDCODED: In Year 2, force core courses to the top
|
| 294 |
+
if year == 2:
|
| 295 |
+
priority_courses = [c for c in YEAR2_MUST_TAKE if c in schedulable]
|
| 296 |
+
other_courses = [c for c in schedulable if c not in YEAR2_MUST_TAKE]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
+
# Score priority courses separately
|
| 299 |
+
scored_priority = sorted(
|
| 300 |
+
priority_courses,
|
| 301 |
+
key=lambda c: self._score_course(c, semantic_scores, required_set, picklist_set),
|
| 302 |
+
reverse=True
|
| 303 |
+
)
|
| 304 |
+
scored_others = sorted(
|
| 305 |
+
other_courses,
|
| 306 |
+
key=lambda c: self._score_course(c, semantic_scores, required_set, picklist_set),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
reverse=True
|
| 308 |
)
|
| 309 |
|
| 310 |
+
scored_courses = scored_priority + scored_others
|
| 311 |
+
else:
|
| 312 |
+
# Normal scoring for other years
|
| 313 |
+
scored_courses = sorted(
|
| 314 |
+
schedulable,
|
| 315 |
+
key=lambda c: self._score_course(c, semantic_scores, required_set, picklist_set),
|
| 316 |
+
reverse=True
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
# Select top N courses
|
| 320 |
+
selected = []
|
| 321 |
+
for course in scored_courses:
|
| 322 |
+
if len(selected) >= courses_per_semester:
|
| 323 |
+
break
|
| 324 |
+
if self._validate_sequence(selected, course):
|
| 325 |
+
selected.append(course)
|
| 326 |
|
| 327 |
# Add to plan
|
| 328 |
if selected:
|
|
|
|
| 330 |
if year_key not in plan:
|
| 331 |
plan[year_key] = {}
|
| 332 |
|
| 333 |
+
sem_type = 'fall' if (sem_num % 2) == 1 else 'spring'
|
| 334 |
+
plan[year_key][sem_type] = selected
|
| 335 |
completed.update(selected)
|
| 336 |
|
| 337 |
return plan
|
| 338 |
+
|
| 339 |
+
def _get_available_courses(self, completed: Set[str], year: int, sem_num: int = None, track: str = "ai_ml") -> List[str]:
|
| 340 |
+
"""FIXED: Return ALL courses that COULD be taken in this year"""
|
| 341 |
+
|
| 342 |
+
# Year 1: Hardcoded foundation
|
| 343 |
+
if year == 1:
|
| 344 |
+
if not completed or len(completed) < 2:
|
| 345 |
+
return [c for c in ["CS1800", "CS2500", "MATH1341", "ENGW1111"] if c in self.valid_courses]
|
| 346 |
+
else:
|
| 347 |
+
next_courses = []
|
| 348 |
+
for course, prereq in [("CS2800", "CS1800"), ("CS2510", "CS2500"), ("MATH1342", "MATH1341"), ("DS2000", None)]:
|
| 349 |
+
if course in self.valid_courses and course not in completed:
|
| 350 |
+
if prereq is None or prereq in completed:
|
| 351 |
+
next_courses.append(course)
|
| 352 |
+
return next_courses
|
| 353 |
+
|
| 354 |
+
# Years 2-4: Filter by subject and level
|
| 355 |
+
available = []
|
| 356 |
|
| 357 |
+
# ONLY CS/DS/CY/MATH allowed
|
| 358 |
+
ALLOWED_SUBJECTS = {"CS", "DS", "CY", "MATH"}
|
| 359 |
+
|
| 360 |
+
for cid in self.valid_courses:
|
| 361 |
+
if cid in completed:
|
| 362 |
+
continue
|
| 363 |
|
| 364 |
+
course_data = self.courses.get(cid, {})
|
| 365 |
+
subject = course_data.get('subject')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
+
if subject not in ALLOWED_SUBJECTS:
|
| 368 |
+
continue
|
| 369 |
+
|
| 370 |
+
course_level = self._get_level(cid)
|
| 371 |
+
|
| 372 |
+
# Year-based level filtering
|
| 373 |
+
if year == 2 and course_level > 3999:
|
| 374 |
+
continue # No 4000+ in Year 2
|
| 375 |
+
if year >= 3 and course_level < 2000:
|
| 376 |
+
continue # No intro courses in Years 3-4
|
| 377 |
+
|
| 378 |
+
available.append(cid)
|
| 379 |
|
| 380 |
+
return available
|
| 381 |
+
|
| 382 |
+
def _fix_plan_errors(self, plan: Dict, validation: Dict, student: StudentProfile) -> Dict:
|
| 383 |
+
if any("Mixed" in error for error in validation["errors"]):
|
| 384 |
+
return self._build_structured_plan(student, self._identify_track(student), None)
|
| 385 |
return plan
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
|
| 387 |
+
def _get_llm_course_suggestions(self, student: StudentProfile, track: str) -> List[str]:
|
| 388 |
+
requirements = self.CONCENTRATION_REQUIREMENTS.get(track, {})
|
| 389 |
+
all_options = set()
|
| 390 |
+
for reqs in requirements.values():
|
| 391 |
for key, courses in reqs.items():
|
| 392 |
+
if key.startswith("pick_"): all_options.update(courses)
|
| 393 |
+
|
| 394 |
+
course_options_text = [f"{cid}: {self.courses[cid].get('name', cid)} - {self.courses[cid].get('description', '')[:100].strip()}"
|
| 395 |
+
for cid in list(all_options)[:15] if cid in self.courses]
|
| 396 |
+
|
| 397 |
+
prompt = f"""You are an expert curriculum advisor. Based on the student profile, rank the top 5 most relevant courses from the list below.
|
| 398 |
+
### Student Profile:
|
| 399 |
+
- **Career Goal:** {student.career_goals}
|
| 400 |
+
- **Interests:** {', '.join(student.interests)}
|
| 401 |
+
- **Preferred Difficulty:** {student.preferred_difficulty}
|
| 402 |
+
### Available Elective Courses:
|
| 403 |
+
{chr(10).join(course_options_text)}
|
| 404 |
+
Return ONLY the top 5 course IDs, each on a new line.
|
| 405 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
try:
|
| 407 |
+
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096).to(self.device)
|
|
|
|
| 408 |
with torch.no_grad():
|
| 409 |
+
outputs = self.llm.generate(**inputs, max_new_tokens=100, temperature=0.2, do_sample=True, pad_token_id=self.tokenizer.eos_token_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
response = self.tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
|
| 411 |
+
suggested_courses = re.findall(r'([A-Z]{2,4}\d{4})', response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
return suggested_courses[:5]
|
|
|
|
| 413 |
except Exception as e:
|
| 414 |
print(f"LLM suggestion failed: {e}")
|
| 415 |
+
return list(all_options)[:5]
|
| 416 |
+
|
| 417 |
def _map_difficulty(self, preferred_difficulty: str) -> str:
|
| 418 |
+
return {"easy": "easy", "moderate": "medium", "challenging": "hard"}.get(preferred_difficulty.lower(), "medium")
|
| 419 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
def _calculate_course_load(self, time_commitment: int) -> int:
|
| 421 |
+
if time_commitment <= 20: return 3
|
| 422 |
+
if time_commitment <= 40: return 4 # Setting hours to 40 will now correctly return 4.
|
| 423 |
+
return 5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
|
| 425 |
+
def _identify_track(self, student: StudentProfile) -> str:
|
| 426 |
+
if not hasattr(self, 'embedding_model') or self.embedding_model is None:
|
| 427 |
+
combined = f"{student.career_goals.lower()} {' '.join(student.interests).lower()}"
|
| 428 |
+
if any(word in combined for word in ['ai', 'ml', 'machine learning', 'data']): return "ai_ml"
|
| 429 |
+
if any(word in combined for word in ['systems', 'distributed', 'backend']): return "systems"
|
| 430 |
+
if any(word in combined for word in ['security', 'cyber']): return "security"
|
| 431 |
+
return "ai_ml"
|
| 432 |
profile_text = f"{student.career_goals} {' '.join(student.interests)}"
|
| 433 |
profile_emb = self.embedding_model.encode(profile_text, convert_to_tensor=True)
|
|
|
|
| 434 |
track_descriptions = {
|
| 435 |
+
"ai_ml": "artificial intelligence machine learning deep learning neural networks data science",
|
| 436 |
+
"systems": "operating systems distributed systems networks compilers databases performance backend",
|
| 437 |
+
"security": "cybersecurity cryptography network security ethical hacking vulnerabilities"
|
| 438 |
}
|
| 439 |
+
best_track, best_score = "ai_ml", -1.0
|
|
|
|
|
|
|
|
|
|
| 440 |
for track, description in track_descriptions.items():
|
| 441 |
track_emb = self.embedding_model.encode(description, convert_to_tensor=True)
|
| 442 |
score = float(util.cos_sim(profile_emb, track_emb))
|
| 443 |
if score > best_score:
|
| 444 |
+
best_score, best_track = score, track
|
|
|
|
|
|
|
| 445 |
return best_track
|
|
|
|
|
|
|
|
|
|
| 446 |
|
| 447 |
+
def _compute_semantic_scores(self, student: StudentProfile) -> Dict[str, float]:
|
| 448 |
query_text = f"{student.career_goals} {' '.join(student.interests)}"
|
| 449 |
query_emb = self.embedding_model.encode(query_text, convert_to_tensor=True)
|
|
|
|
| 450 |
similarities = util.cos_sim(query_emb, self.course_embeddings)[0]
|
| 451 |
+
return {cid: float(similarities[idx]) for idx, cid in enumerate(self.valid_courses)}
|
| 452 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
def _generate_explanation(self, student: StudentProfile, plan: Dict, track: str, plan_type: str) -> str:
|
| 454 |
+
return f"{plan_type.title()} plan for the {track} track, tailored to your goal of becoming a {student.career_goals}."
|
| 455 |
+
|
| 456 |
+
def validate_plan(self, plan: Dict, student: StudentProfile = None) -> Dict[str, List[str]]:
|
| 457 |
+
issues = {"errors": [], "warnings": [], "info": []}
|
| 458 |
+
all_courses = [course for year in plan.values() for sem in year.values() for course in sem if isinstance(sem, list)]
|
| 459 |
|
| 460 |
+
for track_type, tracks in self.COURSE_TRACKS.items():
|
| 461 |
+
tracks_used = {name for name, courses in tracks.items() if any(c in all_courses for c in courses)}
|
| 462 |
+
if len(tracks_used) > 1:
|
| 463 |
+
issues["errors"].append(f"Mixed {track_type} tracks: {', '.join(tracks_used)}. Choose one sequence.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
|
| 465 |
+
completed_for_validation = set(student.completed_courses) if student else set()
|
| 466 |
+
for year in range(1, 5):
|
| 467 |
+
for sem in ["fall", "spring"]:
|
| 468 |
+
year_key = f"year_{year}"
|
| 469 |
+
sem_courses = plan.get(year_key, {}).get(sem, [])
|
| 470 |
+
for course in sem_courses:
|
| 471 |
+
if course in self.curriculum_graph:
|
| 472 |
+
prereqs = set(self.curriculum_graph.predecessors(course))
|
| 473 |
+
if not prereqs.issubset(self._get_completed_with_equivalents(completed_for_validation)):
|
| 474 |
+
missing = prereqs - completed_for_validation
|
| 475 |
+
issues["errors"].append(f"{course} in Year {year} {sem} is missing prereqs: {', '.join(missing)}")
|
| 476 |
+
completed_for_validation.update(sem_courses)
|
| 477 |
+
return issues
|
| 478 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
def _finalize_plan(self, plan: Dict, explanation: str, validation: Dict = None) -> Dict:
|
| 480 |
+
structured_plan = {"reasoning": explanation, "validation": validation or {"errors": [], "warnings": [], "info": []}}
|
| 481 |
+
complexities = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
for year in range(1, 5):
|
| 483 |
year_key = f"year_{year}"
|
| 484 |
+
structured_plan[year_key] = {
|
| 485 |
+
"fall": plan.get(year_key, {}).get("fall", []),
|
| 486 |
+
"spring": plan.get(year_key, {}).get("spring", []),
|
|
|
|
|
|
|
|
|
|
| 487 |
"summer": "co-op" if year in [2, 3] else []
|
| 488 |
}
|
| 489 |
+
for sem in ["fall", "spring"]:
|
| 490 |
+
courses = structured_plan[year_key][sem]
|
| 491 |
+
if courses:
|
| 492 |
+
sem_complexity = sum(self.courses.get(c, {}).get('complexity', 50) for c in courses)
|
| 493 |
+
complexities.append(sem_complexity)
|
| 494 |
|
| 495 |
+
structured_plan["complexity_analysis"] = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
"average_semester_complexity": float(np.mean(complexities)) if complexities else 0,
|
| 497 |
"peak_semester_complexity": float(np.max(complexities)) if complexities else 0,
|
| 498 |
"total_complexity": float(np.sum(complexities)) if complexities else 0,
|
| 499 |
"balance_score (std_dev)": float(np.std(complexities)) if complexities else 0
|
| 500 |
}
|
| 501 |
+
structured_plan["metadata"] = {
|
|
|
|
|
|
|
| 502 |
"generated": datetime.now().isoformat(),
|
| 503 |
"valid": len(validation.get("errors", [])) == 0 if validation else True,
|
|
|
|
| 504 |
}
|
| 505 |
+
return {"pathway": structured_plan}
|
|
|
|
| 506 |
|
|
|
|
| 507 |
class CurriculumOptimizer(HybridOptimizer):
|
| 508 |
+
"""Wrapper to maintain compatibility with older script calls."""
|
|
|
|
| 509 |
def __init__(self):
|
| 510 |
super().__init__()
|
| 511 |
|
| 512 |
def generate_plan(self, student: StudentProfile) -> Dict:
|
|
|
|
| 513 |
return self.generate_enhanced_rule_plan(student)
|
src/inspect_graph.py
CHANGED
|
@@ -1,88 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import pickle
|
| 2 |
import networkx as nx
|
| 3 |
-
import
|
|
|
|
| 4 |
|
| 5 |
-
def
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
try:
|
| 10 |
-
with open(
|
| 11 |
graph = pickle.load(f)
|
| 12 |
-
print(f"✅ Successfully loaded graph '{graph_path}'")
|
| 13 |
-
print(f" - Total Courses (Nodes): {graph.number_of_nodes()}")
|
| 14 |
-
print(f" - Prerequisite Links (Edges): {graph.number_of_edges()}")
|
| 15 |
-
except FileNotFoundError:
|
| 16 |
-
print(f"❌ ERROR: File not found at '{graph_path}'. Please check the path.")
|
| 17 |
-
return
|
| 18 |
except Exception as e:
|
| 19 |
-
print(f"❌ ERROR: Could not load
|
| 20 |
return
|
| 21 |
-
|
| 22 |
-
print("\n
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
]
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
| 35 |
if graph.has_edge(prereq, course):
|
| 36 |
-
print(f"
|
| 37 |
else:
|
| 38 |
-
|
| 39 |
-
|
| 40 |
else:
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
print(f"
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
else:
|
| 58 |
-
print("
|
| 59 |
-
|
| 60 |
-
#
|
| 61 |
-
print("\n
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
print("
|
| 78 |
else:
|
| 79 |
-
print("
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
if __name__ == "__main__":
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Comprehensive Graph Data Inspector
|
| 4 |
+
Diagnoses all potential issues with the curriculum graph data
|
| 5 |
+
"""
|
| 6 |
import pickle
|
| 7 |
import networkx as nx
|
| 8 |
+
from collections import defaultdict
|
| 9 |
+
import sys
|
| 10 |
|
| 11 |
+
def inspect_graph_thoroughly(graph_file):
|
| 12 |
+
"""Complete inspection of curriculum graph data"""
|
| 13 |
+
|
| 14 |
+
print("=" * 70)
|
| 15 |
+
print("COMPREHENSIVE CURRICULUM GRAPH INSPECTION")
|
| 16 |
+
print("=" * 70)
|
| 17 |
+
|
| 18 |
+
# Load the graph
|
| 19 |
try:
|
| 20 |
+
with open(graph_file, 'rb') as f:
|
| 21 |
graph = pickle.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
except Exception as e:
|
| 23 |
+
print(f"❌ ERROR: Could not load graph: {e}")
|
| 24 |
return
|
| 25 |
+
|
| 26 |
+
print(f"\n📊 BASIC STATS:")
|
| 27 |
+
print(f" Total nodes: {graph.number_of_nodes()}")
|
| 28 |
+
print(f" Total edges: {graph.number_of_edges()}")
|
| 29 |
+
|
| 30 |
+
# 1. CHECK SUBJECT DISTRIBUTION
|
| 31 |
+
print("\n📚 SUBJECT ANALYSIS:")
|
| 32 |
+
subject_counts = defaultdict(int)
|
| 33 |
+
courses_by_subject = defaultdict(list)
|
| 34 |
+
|
| 35 |
+
for node, data in graph.nodes(data=True):
|
| 36 |
+
subject = data.get('subject', 'UNKNOWN')
|
| 37 |
+
subject_counts[subject] += 1
|
| 38 |
+
courses_by_subject[subject].append(node)
|
| 39 |
+
|
| 40 |
+
# Categorize subjects
|
| 41 |
+
CS_RELEVANT = {"CS", "DS", "IS", "CY", "MATH", "PHYS", "ENGW", "STAT", "EECE"}
|
| 42 |
+
MAYBE_RELEVANT = {"CHEM", "BIOL", "PSYC", "PHIL", "ECON"}
|
| 43 |
+
|
| 44 |
+
print("\n Relevant CS Subjects:")
|
| 45 |
+
for subj in sorted(CS_RELEVANT):
|
| 46 |
+
count = subject_counts.get(subj, 0)
|
| 47 |
+
if count > 0:
|
| 48 |
+
sample = courses_by_subject[subj][:3]
|
| 49 |
+
print(f" ✅ {subj:8s}: {count:3d} courses (e.g., {', '.join(sample)})")
|
| 50 |
+
else:
|
| 51 |
+
print(f" ❌ {subj:8s}: 0 courses - MISSING!")
|
| 52 |
+
|
| 53 |
+
print("\n Irrelevant Subjects (should be removed):")
|
| 54 |
+
irrelevant_found = False
|
| 55 |
+
for subj, count in sorted(subject_counts.items()):
|
| 56 |
+
if subj not in CS_RELEVANT and subj not in MAYBE_RELEVANT and count > 0:
|
| 57 |
+
irrelevant_found = True
|
| 58 |
+
sample = courses_by_subject[subj][:3]
|
| 59 |
+
print(f" ❌ {subj:8s}: {count:3d} courses (e.g., {', '.join(sample)})")
|
| 60 |
+
|
| 61 |
+
if not irrelevant_found:
|
| 62 |
+
print(" ✅ None found - graph is clean!")
|
| 63 |
+
|
| 64 |
+
# 2. CHECK CRITICAL COURSES EXISTENCE
|
| 65 |
+
print("\n🎯 CRITICAL COURSES CHECK:")
|
| 66 |
+
|
| 67 |
+
# Foundation courses
|
| 68 |
+
foundation_courses = ["CS1800", "CS2500", "CS2510", "CS2800"]
|
| 69 |
+
print("\n Foundation Courses:")
|
| 70 |
+
for course in foundation_courses:
|
| 71 |
+
if course in graph:
|
| 72 |
+
data = graph.nodes[course]
|
| 73 |
+
print(f" ✅ {course}: {data.get('name', 'Unknown')}")
|
| 74 |
+
else:
|
| 75 |
+
print(f" ❌ {course}: MISSING!")
|
| 76 |
+
|
| 77 |
+
# Core CS courses
|
| 78 |
+
core_courses = ["CS3000", "CS3500", "CS3650", "CS3700", "CS3200"]
|
| 79 |
+
print("\n Core CS Courses:")
|
| 80 |
+
for course in core_courses:
|
| 81 |
+
if course in graph:
|
| 82 |
+
data = graph.nodes[course]
|
| 83 |
+
print(f" ✅ {course}: {data.get('name', 'Unknown')}")
|
| 84 |
+
else:
|
| 85 |
+
print(f" ❌ {course}: MISSING!")
|
| 86 |
+
|
| 87 |
+
# AI/ML concentration courses
|
| 88 |
+
ai_ml_courses = ["CS4100", "DS4400", "CS4120", "DS4420", "CS4180", "DS4440"]
|
| 89 |
+
print("\n AI/ML Concentration:")
|
| 90 |
+
missing_concentration = []
|
| 91 |
+
for course in ai_ml_courses:
|
| 92 |
+
if course in graph:
|
| 93 |
+
data = graph.nodes[course]
|
| 94 |
+
print(f" ✅ {course}: {data.get('name', 'Unknown')}")
|
| 95 |
+
else:
|
| 96 |
+
missing_concentration.append(course)
|
| 97 |
+
print(f" ❌ {course}: MISSING!")
|
| 98 |
+
|
| 99 |
+
# 3. CHECK PREREQUISITE CHAINS
|
| 100 |
+
print("\n🔗 PREREQUISITE CHAINS:")
|
| 101 |
+
|
| 102 |
+
critical_chains = [
|
| 103 |
+
("CS1800", "CS2800", "Discrete Structures → Logic"),
|
| 104 |
+
("CS2500", "CS2510", "Fundies 1 → Fundies 2"),
|
| 105 |
+
("CS2510", "CS3500", "Fundies 2 → OOD"),
|
| 106 |
+
("CS2510", "CS3000", "Fundies 2 → Algorithms"),
|
| 107 |
+
("MATH1341", "MATH1342", "Calc 1 → Calc 2"),
|
| 108 |
+
("DS2000", "DS2500", "Prog w/ Data → Intermediate"),
|
| 109 |
+
("DS2500", "DS3500", "Intermediate → Advanced")
|
| 110 |
]
|
| 111 |
+
|
| 112 |
+
broken_chains = []
|
| 113 |
+
for prereq, course, desc in critical_chains:
|
| 114 |
+
if prereq in graph and course in graph:
|
| 115 |
if graph.has_edge(prereq, course):
|
| 116 |
+
print(f" ✅ {prereq} → {course} ({desc})")
|
| 117 |
else:
|
| 118 |
+
broken_chains.append((prereq, course))
|
| 119 |
+
print(f" ❌ {prereq} → {course} ({desc}) - EDGE MISSING!")
|
| 120 |
else:
|
| 121 |
+
if prereq not in graph:
|
| 122 |
+
print(f" ⚠️ {prereq} → {course} - {prereq} doesn't exist")
|
| 123 |
+
if course not in graph:
|
| 124 |
+
print(f" ⚠️ {prereq} → {course} - {course} doesn't exist")
|
| 125 |
|
| 126 |
+
# 4. CS2800 SPECIFIC DIAGNOSIS
|
| 127 |
+
print("\n🔍 CS2800 DETAILED ANALYSIS:")
|
| 128 |
+
|
| 129 |
+
if "CS2800" in graph:
|
| 130 |
+
cs2800_data = graph.nodes["CS2800"]
|
| 131 |
+
print(f" ✅ CS2800 exists")
|
| 132 |
+
print(f" Name: {cs2800_data.get('name', 'Unknown')}")
|
| 133 |
+
print(f" Subject: {cs2800_data.get('subject', 'Unknown')}")
|
| 134 |
+
print(f" Credits: {cs2800_data.get('maxCredits', 'Unknown')}")
|
| 135 |
+
|
| 136 |
+
# Check prerequisites
|
| 137 |
+
prereqs = list(graph.predecessors("CS2800"))
|
| 138 |
+
print(f" Prerequisites: {prereqs if prereqs else 'NONE (this is wrong!)'}")
|
| 139 |
+
|
| 140 |
+
# What it unlocks
|
| 141 |
+
unlocks = list(graph.successors("CS2800"))[:5]
|
| 142 |
+
print(f" Unlocks: {unlocks if unlocks else 'Nothing (suspicious...)'}")
|
| 143 |
+
|
| 144 |
+
# Specific CS1800 connection
|
| 145 |
+
if "CS1800" in graph:
|
| 146 |
+
if graph.has_edge("CS1800", "CS2800"):
|
| 147 |
+
print(f" ✅ CS1800 → CS2800 connection exists")
|
| 148 |
+
else:
|
| 149 |
+
print(f" ❌ CS1800 → CS2800 connection MISSING!")
|
| 150 |
else:
|
| 151 |
+
print(f" ❌ CS2800 is completely MISSING from the graph!")
|
| 152 |
+
|
| 153 |
+
# 5. CHECK FOR DUPLICATE/REDUNDANT COURSES
|
| 154 |
+
print("\n🔄 CHECKING FOR REDUNDANT COURSES:")
|
| 155 |
+
|
| 156 |
+
calc_variants = ["MATH1341", "MATH1241", "MATH1231", "MATH1340"]
|
| 157 |
+
physics_variants = ["PHYS1151", "PHYS1161", "PHYS1145"]
|
| 158 |
+
|
| 159 |
+
print("\n Calculus variants in graph:")
|
| 160 |
+
calc_found = [c for c in calc_variants if c in graph]
|
| 161 |
+
if len(calc_found) > 1:
|
| 162 |
+
print(f" ⚠️ Multiple calculus courses found: {calc_found}")
|
| 163 |
+
print(f" These satisfy the same requirement - graph needs deduplication")
|
| 164 |
+
else:
|
| 165 |
+
print(f" ✅ Only one variant: {calc_found}")
|
| 166 |
+
|
| 167 |
+
print("\n Physics variants in graph:")
|
| 168 |
+
phys_found = [c for c in physics_variants if c in graph]
|
| 169 |
+
if len(phys_found) > 1:
|
| 170 |
+
print(f" ⚠️ Multiple physics courses found: {phys_found}")
|
| 171 |
else:
|
| 172 |
+
print(f" ✅ Only one variant: {phys_found}")
|
| 173 |
+
|
| 174 |
+
# 6. CHECK FOR LABS/RECITATIONS
|
| 175 |
+
print("\n🧪 CHECKING FOR LABS/RECITATIONS (should be removed):")
|
| 176 |
+
|
| 177 |
+
labs_found = []
|
| 178 |
+
for node, data in graph.nodes(data=True):
|
| 179 |
+
name = data.get('name', '').lower()
|
| 180 |
+
if any(word in name for word in ['lab', 'recitation', 'seminar', 'practicum']):
|
| 181 |
+
labs_found.append((node, data.get('name', node)))
|
| 182 |
+
|
| 183 |
+
if labs_found:
|
| 184 |
+
print(f" ❌ Found {len(labs_found)} lab/recitation courses:")
|
| 185 |
+
for course_id, name in labs_found[:5]:
|
| 186 |
+
print(f" - {course_id}: {name}")
|
| 187 |
+
else:
|
| 188 |
+
print(f" ✅ No labs/recitations found")
|
| 189 |
+
|
| 190 |
+
# 7. CHECK 4000-LEVEL COURSES
|
| 191 |
+
print("\n🎓 4000-LEVEL COURSES:")
|
| 192 |
+
|
| 193 |
+
cs4000_courses = [n for n in graph.nodes() if n.startswith("CS4")]
|
| 194 |
+
ds4000_courses = [n for n in graph.nodes() if n.startswith("DS4")]
|
| 195 |
+
|
| 196 |
+
print(f" CS 4000-level: {len(cs4000_courses)} courses")
|
| 197 |
+
if cs4000_courses:
|
| 198 |
+
print(f" Examples: {', '.join(cs4000_courses[:5])}")
|
| 199 |
+
else:
|
| 200 |
+
print(f" ❌ NO CS 4000-level courses found!")
|
| 201 |
+
|
| 202 |
+
print(f" DS 4000-level: {len(ds4000_courses)} courses")
|
| 203 |
+
if ds4000_courses:
|
| 204 |
+
print(f" Examples: {', '.join(ds4000_courses[:5])}")
|
| 205 |
+
else:
|
| 206 |
+
print(f" ❌ NO DS 4000-level courses found!")
|
| 207 |
+
|
| 208 |
+
# FINAL VERDICT
|
| 209 |
+
print("\n" + "=" * 70)
|
| 210 |
+
print("VERDICT:")
|
| 211 |
+
print("=" * 70)
|
| 212 |
+
|
| 213 |
+
issues = []
|
| 214 |
+
|
| 215 |
+
if irrelevant_found:
|
| 216 |
+
issues.append("Contains irrelevant subjects (ARTH, FRNH, etc.)")
|
| 217 |
+
|
| 218 |
+
if missing_concentration:
|
| 219 |
+
issues.append(f"Missing critical courses: {', '.join(missing_concentration)}")
|
| 220 |
+
|
| 221 |
+
if broken_chains:
|
| 222 |
+
issues.append(f"Broken prerequisite chains: {len(broken_chains)}")
|
| 223 |
+
|
| 224 |
+
if not cs4000_courses or not ds4000_courses:
|
| 225 |
+
issues.append("Missing 4000-level courses")
|
| 226 |
+
|
| 227 |
+
if labs_found:
|
| 228 |
+
issues.append(f"Contains {len(labs_found)} lab/recitation courses")
|
| 229 |
+
|
| 230 |
+
if issues:
|
| 231 |
+
print("❌ GRAPH HAS ISSUES:")
|
| 232 |
+
for i, issue in enumerate(issues, 1):
|
| 233 |
+
print(f" {i}. {issue}")
|
| 234 |
+
|
| 235 |
+
print("\n📋 RECOMMENDED ACTIONS:")
|
| 236 |
+
print("1. Re-scrape with more subjects: CS DS IS CY MATH PHYS STAT EECE")
|
| 237 |
+
print("2. Re-run analyzer with stricter filtering")
|
| 238 |
+
print("3. Manually add missing prerequisite edges if needed")
|
| 239 |
+
else:
|
| 240 |
+
print("✅ Graph appears to be clean and complete!")
|
| 241 |
+
|
| 242 |
+
def suggest_fix_commands(graph_file):
|
| 243 |
+
"""Suggest specific commands to fix issues"""
|
| 244 |
+
|
| 245 |
+
print("\n" + "=" * 70)
|
| 246 |
+
print("FIX COMMANDS:")
|
| 247 |
+
print("=" * 70)
|
| 248 |
+
|
| 249 |
+
print("\n1️⃣ If courses are missing, re-scrape with expanded subjects:")
|
| 250 |
+
print(" python neu_scraper.py --term 202510 --subjects CS DS IS CY MATH PHYS STAT EECE --prefix neu_complete")
|
| 251 |
+
|
| 252 |
+
print("\n2️⃣ Clean the new data:")
|
| 253 |
+
print(" python curriculum_analyzer.py --graph neu_complete_graph_*.pkl --courses neu_complete_courses_*.pkl --output-graph neu_graph_ultra_clean.pkl")
|
| 254 |
+
|
| 255 |
+
print("\n3️⃣ Test the cleaned data:")
|
| 256 |
+
print(f" python {sys.argv[0]} neu_graph_ultra_clean.pkl")
|
| 257 |
|
| 258 |
if __name__ == "__main__":
|
| 259 |
+
if len(sys.argv) < 2:
|
| 260 |
+
print("Usage: python inspect_graph.py <graph.pkl>")
|
| 261 |
+
print("Example: python inspect_graph.py neu_graph_clean3.pkl")
|
| 262 |
+
else:
|
| 263 |
+
graph_file = sys.argv[1]
|
| 264 |
+
inspect_graph_thoroughly(graph_file)
|
| 265 |
+
suggest_fix_commands(graph_file)
|
src/neu_graph_clean8.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a587cdbcc482e13aff07b62e79a4d1c8732c1ab1cb41f1d699ed6f50148f4db4
|
| 3 |
+
size 244756
|
src/neu_scraper.py
CHANGED
|
@@ -1,235 +1,236 @@
|
|
| 1 |
-
"""
|
| 2 |
-
NEU Course
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
import
|
| 10 |
-
import
|
| 11 |
-
|
| 12 |
-
import
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
self.
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
"
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
}
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
logger.info(f"
|
| 218 |
-
|
| 219 |
-
def main():
|
| 220 |
-
import argparse
|
| 221 |
-
parser = argparse.ArgumentParser(description="
|
| 222 |
-
parser.add_argument("--
|
| 223 |
-
parser.add_argument("--subjects", nargs="+", required=True, help="Subjects
|
| 224 |
-
parser.add_argument("--prefix", default="
|
| 225 |
-
parser.add_argument("--batch-size", type=int, default=100, help="
|
| 226 |
-
args = parser.parse_args()
|
| 227 |
-
|
| 228 |
-
scraper =
|
| 229 |
-
scraper.
|
| 230 |
-
scraper.build_graph()
|
| 231 |
-
scraper.save_data(args.prefix)
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
|
|
|
| 235 |
main()
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Multi-Term NEU Course Scraper - Merges data from multiple terms
|
| 3 |
+
Fixes: Missing courses by scraping Fall/Spring/Summer catalogs
|
| 4 |
+
"""
|
| 5 |
+
import requests
|
| 6 |
+
import pickle
|
| 7 |
+
import networkx as nx
|
| 8 |
+
import time
|
| 9 |
+
import logging
|
| 10 |
+
from typing import List, Dict, Set, Any
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from collections import defaultdict
|
| 13 |
+
|
| 14 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
class MultiTermScraper:
|
| 18 |
+
def __init__(self, term_ids: List[str], api_url: str = "https://searchneu.com/graphql"):
|
| 19 |
+
self.term_ids = term_ids
|
| 20 |
+
self.api_url = api_url
|
| 21 |
+
self.headers = {"Content-Type": "application/json"}
|
| 22 |
+
self.merged_courses: Dict[str, Dict] = {} # cid -> course data
|
| 23 |
+
self.graph = nx.DiGraph()
|
| 24 |
+
|
| 25 |
+
def get_all_courses_by_subject(self, term_id: str, subject: str, batch_size: int = 100) -> List[Dict]:
|
| 26 |
+
"""Fetch ALL courses for a specific subject/term via pagination."""
|
| 27 |
+
all_courses = []
|
| 28 |
+
offset = 0
|
| 29 |
+
page = 1
|
| 30 |
+
|
| 31 |
+
while True:
|
| 32 |
+
query = """
|
| 33 |
+
query searchQuery($termId: String!, $query: String!, $first: Int, $offset: Int) {
|
| 34 |
+
search(termId: $termId, query: $query, first: $first, offset: $offset) {
|
| 35 |
+
totalCount
|
| 36 |
+
nodes {
|
| 37 |
+
__typename
|
| 38 |
+
... on ClassOccurrence {
|
| 39 |
+
subject
|
| 40 |
+
classId
|
| 41 |
+
name
|
| 42 |
+
desc
|
| 43 |
+
prereqs
|
| 44 |
+
coreqs
|
| 45 |
+
minCredits
|
| 46 |
+
maxCredits
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
"""
|
| 52 |
+
variables = {
|
| 53 |
+
"termId": term_id,
|
| 54 |
+
"query": subject,
|
| 55 |
+
"first": batch_size,
|
| 56 |
+
"offset": offset
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
try:
|
| 60 |
+
resp = requests.post(self.api_url, json={"query": query, "variables": variables}, headers=self.headers, timeout=10)
|
| 61 |
+
resp.raise_for_status()
|
| 62 |
+
data = resp.json()
|
| 63 |
+
|
| 64 |
+
if "errors" in data:
|
| 65 |
+
logger.error(f"GraphQL errors for {term_id}/{subject}: {data['errors']}")
|
| 66 |
+
break
|
| 67 |
+
|
| 68 |
+
search_data = data.get("data", {}).get("search", {})
|
| 69 |
+
nodes = search_data.get("nodes", [])
|
| 70 |
+
page_courses = [c for c in nodes if c.get("__typename") == "ClassOccurrence"]
|
| 71 |
+
all_courses.extend(page_courses)
|
| 72 |
+
|
| 73 |
+
logger.info(f"[{term_id}] {subject} Page {page}: {len(page_courses)} courses (Total: {len(all_courses)})")
|
| 74 |
+
|
| 75 |
+
if len(page_courses) < batch_size:
|
| 76 |
+
break
|
| 77 |
+
|
| 78 |
+
offset += batch_size
|
| 79 |
+
page += 1
|
| 80 |
+
time.sleep(0.1)
|
| 81 |
+
|
| 82 |
+
except Exception as e:
|
| 83 |
+
logger.error(f"Error fetching {term_id}/{subject} page {page}: {e}")
|
| 84 |
+
break
|
| 85 |
+
|
| 86 |
+
logger.info(f"[{term_id}] {subject}: {len(all_courses)} total courses")
|
| 87 |
+
return all_courses
|
| 88 |
+
|
| 89 |
+
def _recursive_parse_prereqs(self, prereq_obj: Any) -> Set[str]:
|
| 90 |
+
"""Extract course IDs from nested prereq structures."""
|
| 91 |
+
ids = set()
|
| 92 |
+
if not isinstance(prereq_obj, dict):
|
| 93 |
+
return ids
|
| 94 |
+
|
| 95 |
+
if "classId" in prereq_obj and "subject" in prereq_obj:
|
| 96 |
+
ids.add(f"{prereq_obj['subject']}{prereq_obj['classId']}")
|
| 97 |
+
return ids
|
| 98 |
+
|
| 99 |
+
if prereq_obj.get("type") in ["and", "or"]:
|
| 100 |
+
for val in prereq_obj.get("values", []):
|
| 101 |
+
ids |= self._recursive_parse_prereqs(val)
|
| 102 |
+
|
| 103 |
+
elif "values" in prereq_obj:
|
| 104 |
+
for val in prereq_obj.get("values", []):
|
| 105 |
+
ids |= self._recursive_parse_prereqs(val)
|
| 106 |
+
|
| 107 |
+
return ids
|
| 108 |
+
|
| 109 |
+
def scrape_all_terms(self, subjects: List[str]):
|
| 110 |
+
"""Scrape courses from all terms and merge by course ID."""
|
| 111 |
+
term_data = defaultdict(lambda: defaultdict(list)) # term_id -> subject -> courses
|
| 112 |
+
|
| 113 |
+
for term_id in self.term_ids:
|
| 114 |
+
logger.info(f"\n{'='*70}")
|
| 115 |
+
logger.info(f"SCRAPING TERM: {term_id}")
|
| 116 |
+
logger.info(f"{'='*70}")
|
| 117 |
+
|
| 118 |
+
for subject in subjects:
|
| 119 |
+
courses = self.get_all_courses_by_subject(term_id, subject)
|
| 120 |
+
term_data[term_id][subject] = courses
|
| 121 |
+
time.sleep(0.5)
|
| 122 |
+
|
| 123 |
+
# Merge courses across terms (prefer most recent data for duplicates)
|
| 124 |
+
for term_id in self.term_ids:
|
| 125 |
+
for subject in subjects:
|
| 126 |
+
for course in term_data[term_id][subject]:
|
| 127 |
+
cid = f"{course['subject']}{course['classId']}"
|
| 128 |
+
|
| 129 |
+
# Only update if we don't have this course OR this term is newer
|
| 130 |
+
if cid not in self.merged_courses:
|
| 131 |
+
self.merged_courses[cid] = course
|
| 132 |
+
logger.debug(f"Added {cid} from {term_id}")
|
| 133 |
+
else:
|
| 134 |
+
# Update if current course has more complete data
|
| 135 |
+
existing = self.merged_courses[cid]
|
| 136 |
+
if not existing.get('desc') and course.get('desc'):
|
| 137 |
+
self.merged_courses[cid] = course
|
| 138 |
+
logger.debug(f"Updated {cid} from {term_id} (better description)")
|
| 139 |
+
|
| 140 |
+
logger.info(f"\n{'='*70}")
|
| 141 |
+
logger.info(f"MERGE COMPLETE: {len(self.merged_courses)} unique courses")
|
| 142 |
+
logger.info(f"{'='*70}")
|
| 143 |
+
|
| 144 |
+
# Log subject breakdown
|
| 145 |
+
subject_counts = defaultdict(int)
|
| 146 |
+
for cid in self.merged_courses:
|
| 147 |
+
subject = self.merged_courses[cid].get('subject', 'UNKNOWN')
|
| 148 |
+
subject_counts[subject] += 1
|
| 149 |
+
|
| 150 |
+
logger.info("\nSubject breakdown:")
|
| 151 |
+
for subject in sorted(subject_counts.keys()):
|
| 152 |
+
logger.info(f" {subject}: {subject_counts[subject]} courses")
|
| 153 |
+
|
| 154 |
+
def build_graph(self):
|
| 155 |
+
"""Build NetworkX graph from merged course data."""
|
| 156 |
+
logger.info("\nBuilding course dependency graph...")
|
| 157 |
+
|
| 158 |
+
# Add all courses as nodes
|
| 159 |
+
for cid, cdata in self.merged_courses.items():
|
| 160 |
+
self.graph.add_node(cid, **{
|
| 161 |
+
"name": cdata.get("name", ""),
|
| 162 |
+
"subject": cdata.get("subject", ""),
|
| 163 |
+
"classId": cdata.get("classId", ""),
|
| 164 |
+
"description": cdata.get("desc", ""),
|
| 165 |
+
"minCredits": cdata.get("minCredits", 0),
|
| 166 |
+
"maxCredits": cdata.get("maxCredits", 0)
|
| 167 |
+
})
|
| 168 |
+
|
| 169 |
+
# Add prerequisite edges
|
| 170 |
+
edge_count = 0
|
| 171 |
+
for cid, cdata in self.merged_courses.items():
|
| 172 |
+
prereqs = cdata.get("prereqs", {})
|
| 173 |
+
if prereqs:
|
| 174 |
+
prereq_ids = self._recursive_parse_prereqs(prereqs)
|
| 175 |
+
for pid in prereq_ids:
|
| 176 |
+
if pid in self.graph:
|
| 177 |
+
self.graph.add_edge(pid, cid, relationship="prerequisite")
|
| 178 |
+
edge_count += 1
|
| 179 |
+
else:
|
| 180 |
+
logger.warning(f"Prerequisite {pid} for {cid} not in graph")
|
| 181 |
+
|
| 182 |
+
logger.info(f"Graph built: {self.graph.number_of_nodes()} nodes, {edge_count} edges")
|
| 183 |
+
|
| 184 |
+
def save_data(self, prefix: str):
|
| 185 |
+
"""Save merged graph and courses."""
|
| 186 |
+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 187 |
+
gfile = f"{prefix}_graph_{ts}.pkl"
|
| 188 |
+
cfile = f"{prefix}_courses_{ts}.pkl"
|
| 189 |
+
|
| 190 |
+
with open(gfile, "wb") as gf:
|
| 191 |
+
pickle.dump(self.graph, gf)
|
| 192 |
+
with open(cfile, "wb") as cf:
|
| 193 |
+
pickle.dump(self.merged_courses, cf)
|
| 194 |
+
|
| 195 |
+
logger.info(f"\nData saved:")
|
| 196 |
+
logger.info(f" Graph: {gfile}")
|
| 197 |
+
logger.info(f" Courses: {cfile}")
|
| 198 |
+
|
| 199 |
+
# Save merge report
|
| 200 |
+
report_file = f"{prefix}_merge_report_{ts}.txt"
|
| 201 |
+
with open(report_file, "w") as rf:
|
| 202 |
+
rf.write(f"Multi-Term Scrape Report\n")
|
| 203 |
+
rf.write(f"{'='*70}\n\n")
|
| 204 |
+
rf.write(f"Terms scraped: {', '.join(self.term_ids)}\n")
|
| 205 |
+
rf.write(f"Total unique courses: {len(self.merged_courses)}\n")
|
| 206 |
+
rf.write(f"Total edges: {self.graph.number_of_edges()}\n\n")
|
| 207 |
+
|
| 208 |
+
rf.write("Subject breakdown:\n")
|
| 209 |
+
subject_counts = defaultdict(int)
|
| 210 |
+
for cid in self.merged_courses:
|
| 211 |
+
subject = self.merged_courses[cid].get('subject', 'UNKNOWN')
|
| 212 |
+
subject_counts[subject] += 1
|
| 213 |
+
|
| 214 |
+
for subject in sorted(subject_counts.keys()):
|
| 215 |
+
rf.write(f" {subject}: {subject_counts[subject]}\n")
|
| 216 |
+
|
| 217 |
+
logger.info(f" Report: {report_file}")
|
| 218 |
+
|
| 219 |
+
def main():
|
| 220 |
+
import argparse
|
| 221 |
+
parser = argparse.ArgumentParser(description="Multi-Term NEU Catalog Scraper")
|
| 222 |
+
parser.add_argument("--terms", nargs="+", required=True, help="Term IDs (e.g., 202510 202520 202530)")
|
| 223 |
+
parser.add_argument("--subjects", nargs="+", required=True, help="Subjects (e.g., CS DS STAT)")
|
| 224 |
+
parser.add_argument("--prefix", default="neu_merged", help="Output prefix")
|
| 225 |
+
parser.add_argument("--batch-size", type=int, default=100, help="Courses per page")
|
| 226 |
+
args = parser.parse_args()
|
| 227 |
+
|
| 228 |
+
scraper = MultiTermScraper(term_ids=args.terms)
|
| 229 |
+
scraper.scrape_all_terms(args.subjects)
|
| 230 |
+
scraper.build_graph()
|
| 231 |
+
scraper.save_data(args.prefix)
|
| 232 |
+
|
| 233 |
+
logger.info("\n✅ Multi-term scraping complete!")
|
| 234 |
+
|
| 235 |
+
if __name__ == "__main__":
|
| 236 |
main()
|