""" Curriculum Optimizer - PRODUCTION VERSION All redundant code removed, all critical issues fixed """ import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from sentence_transformers import SentenceTransformer, util import networkx as nx import numpy as np from typing import Dict, List, Set, Optional from dataclasses import dataclass import re from datetime import datetime @dataclass class StudentProfile: completed_courses: List[str] time_commitment: int preferred_difficulty: str career_goals: str interests: List[str] current_gpa: float = 3.5 learning_style: str = "Visual" class HybridOptimizer: EQUIVALENCY_GROUPS = [ {"MATH1341", "MATH1241", "MATH1231"}, {"MATH1342", "MATH1242"}, {"PHYS1151", "PHYS1161", "PHYS1145"}, {"PHYS1155", "PHYS1165", "PHYS1147"}, ] COURSE_TRACKS = { "physics": { "engineering": ["PHYS1151", "PHYS1155"], "science": ["PHYS1161", "PHYS1165"], "life_sciences": ["PHYS1145", "PHYS1147"] }, "calculus": { "standard": ["MATH1341", "MATH1342"], "computational": ["MATH156", "MATH256"] } } CONCENTRATION_REQUIREMENTS = { "ai_ml": { "foundations": { "required": ["CS1800", "CS2500", "CS2510", "CS2800"], "sequence": True }, "core": { "required": ["CS3000", "CS3500"], "pick_1_from": ["CS3200", "CS3650", "CS5700"] }, "concentration_specific": { "required": ["CS4100", "DS4400"], "pick_2_from": ["CS4120", "CS4180", "DS4420", "DS4440"], "pick_1_systems": ["CS4730", "CS4700"] }, "math": { "required": ["MATH1341", "MATH1342"], "pick_1_from": ["MATH2331", "MATH3081"] } }, "systems": { "foundations": {"required": ["CS1800", "CS2500", "CS2510", "CS2800"]}, "core": {"required": ["CS3000", "CS3500", "CS3650"], "pick_1_from": ["CS5700", "CS3200"]}, "concentration_specific": {"required": ["CS4700"], "pick_2_from": ["CS4730"], "pick_1_from": ["CS4400", "CS4500", "CS4520"]}, "math": {"required": ["MATH1341", "MATH1342"]} }, "security": { "foundations": {"required": ["CS1800", "CS2500", "CS2510", "CS2800"]}, "core": {"required": ["CS3000", "CS3650", "CY2550"], "pick_1_from": ["CS5700", "CS3500"]}, "concentration_specific": {"required": ["CY3740"], "pick_2_from": ["CY4740", "CY4760", "CY4770"], "pick_1_from": ["CS4700", "CS4730"]}, "math": {"required": ["MATH1342"], "pick_1_from": ["MATH3527", "MATH3081"]} } } def __init__(self): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model_name = "meta-llama/Llama-3.1-8B-Instruct" self.embedding_model_name = 'BAAI/bge-large-en-v1.5' self.llm = None self.tokenizer = None self.embedding_model = None self.curriculum_graph = None self.courses = {} self.current_student = None def load_models(self): print("Loading embedding model...") self.embedding_model = SentenceTransformer(self.embedding_model_name, device=self.device) def load_llm(self): if self.device.type == 'cuda' and self.llm is None: print("Loading LLM for intelligent planning...") quant_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 ) self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) self.tokenizer.pad_token = self.tokenizer.eos_token self.llm = AutoModelForCausalLM.from_pretrained( self.model_name, quantization_config=quant_config, device_map="auto" ) def load_data(self, graph: nx.DiGraph): self.curriculum_graph = graph self.courses = dict(graph.nodes(data=True)) UNDERGRAD_ACCESSIBLE_GRAD = {"CS5700", "CY5700", "DS5110", "CS5010"} self.valid_courses = [] course_texts = [] concentration_courses = set() for track_reqs in self.CONCENTRATION_REQUIREMENTS.values(): for category, reqs in track_reqs.items(): if isinstance(reqs, dict): for key, courses in reqs.items(): if isinstance(courses, list): concentration_courses.update(courses) for cid, data in self.courses.items(): name = data.get('name', '') if not name or name.strip() == '' or any(skip in name.lower() for skip in ['lab', 'recitation', 'seminar', 'practicum']): continue course_level = self._get_level(cid) if course_level >= 5000 and cid not in UNDERGRAD_ACCESSIBLE_GRAD: continue self.valid_courses.append(cid) course_texts.append(f"{name} {data.get('description', '')}") missing_required = concentration_courses - set(self.valid_courses) if missing_required: print(f"\n⚠️ WARNING: {len(missing_required)} required courses missing from graph: {sorted(missing_required)}\n") print(f"Computing embeddings for {len(self.valid_courses)} courses...") self.course_embeddings = self.embedding_model.encode(course_texts, convert_to_tensor=True, show_progress_bar=True) print(f"\nTotal valid courses: {len(self.valid_courses)}") def _get_level(self, course_id: str) -> int: match = re.search(r'\d+', course_id) return int(match.group()) if match else 9999 def _get_completed_with_equivalents(self, completed: Set[str]) -> Set[str]: expanded_completed = completed.copy() for course in completed: for group in self.EQUIVALENCY_GROUPS: if course in group: expanded_completed.update(group) return expanded_completed def _can_take_course(self, course_id: str, completed: Set[str]) -> bool: effective_completed = self._get_completed_with_equivalents(completed) if course_id not in self.curriculum_graph: return True prereqs = set(self.curriculum_graph.predecessors(course_id)) return prereqs.issubset(effective_completed) def _validate_sequence(self, selected: List[str], candidate: str) -> bool: for track_type, tracks in self.COURSE_TRACKS.items(): for track_name, sequence in tracks.items(): if candidate in sequence: for other_track, other_seq in tracks.items(): if other_track != track_name and any(c in selected for c in other_seq): return False return True def _score_course(self, course_id: str, semantic_scores: Dict[str, float], required_set: Set[str], picklist_set: Set[str], year: int, track: str) -> float: """ PRODUCTION SCORING - NOW TRACK AWARE Applies different boosts based on the selected track. """ if course_id not in self.courses or not self.courses[course_id].get('name', '').strip(): return -10000.0 course_data = self.courses[course_id] subject = course_data.get('subject', '') level = self._get_level(course_id) name = course_data.get('name', '').lower() score = 0.0 # --- SEMANTICS APPLIED FIRST --- semantic_weight = 15.0 if year == 4 else 5.0 score += semantic_scores.get(course_id, 0.0) * semantic_weight # --- PENALTY APPLIED AFTER SEMANTICS --- non_technical_keywords = ['society', 'ethics', 'law', 'policy', 'mobile', 'game', 'visualiz', 'web'] if any(keyword in name for keyword in non_technical_keywords): # Exception: allow 'game' and 'mobile' if game_dev track is selected if track == "game_dev" and any(k in name for k in ['game', 'mobile']): pass # Do not penalize else: score -= 10000.0 # Subject-aware scoring if subject in ["CS", "DS"]: score += 300.0 elif subject == "CY": if level < 3000: score -= 500.0 else: score += 300.0 # Allow CY electives if not intro elif subject == "MATH": score += 100.0 else: score -= 1000.0 # --- TRACK-AWARE CRITICAL PATH BOOSTS --- if track == "ai_ml": if course_id in ["DS2500", "DS3000", "DS3500"]: score += 7000.0 elif track == "security": if course_id in ["CY2550", "CY3740"]: score += 7000.0 elif track == "systems": if course_id == "CS3650": score += 7000.0 elif track == "game_dev": if course_id == "CS3540": # Game Programming score += 8000.0 # Main course for this track # "general" track gets no special boosts # Hard requirements if course_id in required_set: score += 10000.0 # Pick-list courses if course_id in picklist_set: score += 5000.0 # Unlocking factor if course_id in self.curriculum_graph: unlocks = self.curriculum_graph.out_degree(course_id) score += min(unlocks, 5) * 2.0 # Level preference score -= (level / 100.0) # Year-specific penalties if year == 4 and level < 4000: score -= 3000.0 elif year == 3 and level < 3000: score -= 2000.0 return score def generate_simple_plan(self, student: StudentProfile, track_override: Optional[str] = None) -> Dict: print("--- Generating Enhanced Rule-Based Plan ---") self.current_student = student return self.generate_enhanced_rule_plan(student, track_override) def generate_enhanced_rule_plan(self, student: StudentProfile, track_override: Optional[str] = None) -> Dict: self.current_student = student # --- FIX: Logic corrected to respect "general" override --- if track_override: track = track_override print(f"--- Using user-selected track: {track} ---") else: track = self._identify_track(student) print(f"--- Auto-identified track: {track} ---") if not track: track = "general" plan = self._build_structured_plan(student, track, None) validation = self.validate_plan(plan, student) if validation["errors"]: plan = self._fix_plan_errors(plan, validation, student) validation = self.validate_plan(plan, student) difficulty_level = self._map_difficulty(student.preferred_difficulty) courses_per_semester = self._calculate_course_load(student.time_commitment) track_name = track.replace("_", " ").title() explanation = f"Personalized {track_name} track ({difficulty_level} difficulty, {courses_per_semester} courses/semester)" return self._finalize_plan(plan, explanation, validation) def generate_llm_plan(self, student: StudentProfile, track_override: Optional[str] = None) -> Dict: print("--- Generating AI-Optimized Plan ---") self.current_student = student self.load_llm() if not self.llm: return self.generate_enhanced_rule_plan(student, track_override) # Pass override # --- FIX: Use override if provided, otherwise identify --- if track_override and track_override != "general": track = track_override print(f"--- Using user-selected track: {track} ---") else: track = self._identify_track(student) print(f"--- Auto-identified track: {track} ---") if not track: track = "general" llm_suggestions = self._get_llm_course_suggestions(student, track) plan = self._build_structured_plan(student, track, llm_suggestions) validation = self.validate_plan(plan, student) if validation["errors"]: plan = self._fix_plan_errors(plan, validation, student) validation = self.validate_plan(plan, student) track_name = track.replace("_", " ").title() explanation = self._generate_explanation(student, plan, track, f"AI-optimized {track_name}") return self._finalize_plan(plan, explanation, validation) def _build_structured_plan(self, student: StudentProfile, track: str, llm_suggestions: Optional[List[str]] = None) -> Dict: """ PRODUCTION PLANNER - NOW FULLY TRACK-AWARE Uses different priority lists based on the selected track. """ completed = set(student.completed_courses) plan = {} # --- FIX: TRACK-AWARE REQUIREMENTS --- if track == "general": print("--- Using General CS requirements ---") requirements = { "foundations": {"required": ["CS1800", "CS2500", "CS2510", "CS2800"]}, "core": {"required": ["CS3000", "CS3500", "CS3650"]}, "math": {"required": ["MATH1341", "MATH1342"], "pick_1_from": ["MATH2331", "MATH3081"]} } elif track == "game_dev": print("--- Using Game Dev (AI/ML base) requirements ---") # Use ai_ml as a base, scoring/priorities will handle the rest requirements = self.CONCENTRATION_REQUIREMENTS["ai_ml"] else: requirements = self.CONCENTRATION_REQUIREMENTS.get(track, self.CONCENTRATION_REQUIREMENTS["ai_ml"]) courses_per_semester = self._calculate_course_load(student.time_commitment) # Build required and pick sets required_set = set() picklist_set = set() for category, reqs in requirements.items(): if "required" in reqs: required_set.update(reqs["required"]) for key, courses in reqs.items(): if key.startswith("pick_"): picklist_set.update(courses) semantic_scores = self._compute_semantic_scores(student) # --- FIX: TRACK-AWARE PRIORITIES --- TRACK_YEAR_PRIORITIES = { "general": { 2: ["CS3000", "CS3500", "CS3650", "MATH2331", "MATH3081", "CS3200"], 3: ["CS4700", "CS4400", "CS4500", "CS4100"], 4: ["CS5700", "CS4730", "CS4530", "CS4550", "CS4410"] }, "ai_ml": { 2: ["CS3000", "CS3500", "DS2500", "DS3000", "DS3500", "MATH2331", "MATH3081", "CS3650"], 3: ["CS4100", "DS4400", "CS4120", "DS4420", "DS4440", "CS4180"], 4: ["CS4730", "CS4700", "CS5700", "DS4300", "CS4400", "CS4500"] }, "security": { 2: ["CS3000", "CS3650", "CY2550", "MATH2331", "MATH3081", "CS3500"], 3: ["CY3740", "CS4700", "CS5700", "CS4730"], 4: ["CY4740", "CY4760", "CS4400"] # CY4770 is missing from graph }, "systems": { 2: ["CS3000", "CS3500", "CS3650", "MATH2331", "CS3200"], 3: ["CS4700", "CS5700", "CS4730", "CS4500", "CS4400"], 4: ["CS4520", "CS4410"] }, "game_dev": { 2: ["CS3000", "CS3500", "CS3540", "MATH2331", "MATH3081", "CS3650"], 3: ["CS4520", "CS4300", "CS4100", "CS4700"], 4: ["CS4550", "CS4410", "CS4180"] } } for sem_num in range(1, 9): year = ((sem_num - 1) // 2) + 1 available_courses = self._get_available_courses(completed, year, sem_num, track) schedulable = [ c for c in available_courses if c not in completed and self._can_take_course(c, completed) ] # Use track-specific priorities, default to "general" if track is unknown current_year_priorities = TRACK_YEAR_PRIORITIES.get(track, TRACK_YEAR_PRIORITIES["general"]).get(year) if current_year_priorities: priority_courses = [c for c in current_year_priorities if c in schedulable] other_courses = [c for c in schedulable if c not in current_year_priorities] scored_priority = sorted( priority_courses, # --- FIX: Pass 'track' to score_course --- key=lambda c: self._score_course(c, semantic_scores, required_set, picklist_set, year, track), reverse=True ) scored_others = sorted( other_courses, key=lambda c: self._score_course(c, semantic_scores, required_set, picklist_set, year, track), reverse=True ) scored_courses = scored_priority + scored_others else: # Year 1: normal scoring scored_courses = sorted( schedulable, key=lambda c: self._score_course(c, semantic_scores, required_set, picklist_set, year, track), reverse=True ) # Select top N courses selected = [] for course in scored_courses: if len(selected) >= courses_per_semester: break if self._validate_sequence(selected, course): selected.append(course) if selected: year_key = f"year_{year}" if year_key not in plan: plan[year_key] = {} sem_type = 'fall' if (sem_num % 2) == 1 else 'spring' plan[year_key][sem_type] = selected completed.update(selected) return plan def _get_available_courses(self, completed: Set[str], year: int, sem_num: int = None, track: str = "ai_ml") -> List[str]: """ PRODUCTION COURSE FILTER - Strict level enforcement """ # Year 1: Hardcoded foundation if year == 1: if not completed or len(completed) < 2: return [c for c in ["CS1800", "CS2500", "MATH1341", "ENGW1111"] if c in self.valid_courses] else: next_courses = [] prereq_map = [ ("CS2800", "CS1800"), ("CS2510", "CS2500"), ("MATH1342", "MATH1341"), ("DS2000", None), ("DS2500", "DS2000") ] for course, prereq in prereq_map: if course in self.valid_courses and course not in completed: if prereq is None or prereq in completed: next_courses.append(course) return next_courses # Years 2-4: Strict filtering by subject and level available = [] ALLOWED_SUBJECTS = {"CS", "DS", "CY", "MATH"} for cid in self.valid_courses: if cid in completed: continue course_data = self.courses.get(cid, {}) subject = course_data.get('subject') if subject not in ALLOWED_SUBJECTS: continue course_level = self._get_level(cid) # FIX: Strict year-based level filtering if year == 2: if course_level < 2000 or course_level > 3999: continue # Year 2: only 2000-3999 elif year == 3: if course_level < 3000: continue # Year 3: 3000+ only elif year == 4: if course_level < 4000: continue # Year 4: 4000+ only (including CS5700) available.append(cid) return available def _fix_plan_errors(self, plan: Dict, validation: Dict, student: StudentProfile) -> Dict: if any("Mixed" in error for error in validation["errors"]): return self._build_structured_plan(student, self._identify_track(student), None) return plan def _get_llm_course_suggestions(self, student: StudentProfile, track: str) -> List[str]: requirements = self.CONCENTRATION_REQUIREMENTS.get(track, {}) all_options = set() for reqs in requirements.values(): for key, courses in reqs.items(): if key.startswith("pick_"): all_options.update(courses) course_options_text = [ f"{cid}: {self.courses[cid].get('name', cid)} - {self.courses[cid].get('description', '')[:100].strip()}" for cid in list(all_options)[:15] if cid in self.courses ] prompt = f"""Expert curriculum advisor ranking courses for student. Student Profile: - Career Goal: {student.career_goals} - Interests: {', '.join(student.interests)} - Difficulty: {student.preferred_difficulty} Available Courses: {chr(10).join(course_options_text)} Return ONLY top 5 course IDs, one per line.""" try: inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096).to(self.device) with torch.no_grad(): outputs = self.llm.generate( **inputs, max_new_tokens=100, temperature=0.2, do_sample=True, pad_token_id=self.tokenizer.eos_token_id ) response = self.tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True) suggested_courses = re.findall(r'([A-Z]{2,4}\d{4})', response) return suggested_courses[:5] except Exception as e: print(f"LLM suggestion failed: {e}") return list(all_options)[:5] def _map_difficulty(self, preferred_difficulty: str) -> str: return {"easy": "easy", "moderate": "medium", "challenging": "hard"}.get(preferred_difficulty.lower(), "medium") def _calculate_course_load(self, time_commitment: int) -> int: if time_commitment <= 20: return 3 if time_commitment <= 40: return 4 return 5 def _identify_track(self, student: StudentProfile) -> str: if not hasattr(self, 'embedding_model') or self.embedding_model is None: combined = f"{student.career_goals.lower()} {' '.join(student.interests).lower()}" if any(word in combined for word in ['ai', 'ml', 'machine learning', 'data']): return "ai_ml" if any(word in combined for word in ['systems', 'distributed', 'backend']): return "systems" if any(word in combined for word in ['security', 'cyber']): return "security" return "ai_ml" profile_text = f"{student.career_goals} {' '.join(student.interests)}" profile_emb = self.embedding_model.encode(profile_text, convert_to_tensor=True) track_descriptions = { "ai_ml": "artificial intelligence machine learning deep learning neural networks data science", "systems": "operating systems distributed systems networks compilers databases performance backend", "security": "cybersecurity cryptography network security ethical hacking vulnerabilities" } best_track, best_score = "ai_ml", -1.0 for track, description in track_descriptions.items(): track_emb = self.embedding_model.encode(description, convert_to_tensor=True) score = float(util.cos_sim(profile_emb, track_emb)) if score > best_score: best_score, best_track = score, track return best_track def _compute_semantic_scores(self, student: StudentProfile) -> Dict[str, float]: query_text = f"{student.career_goals} {' '.join(student.interests)}" query_emb = self.embedding_model.encode(query_text, convert_to_tensor=True) similarities = util.cos_sim(query_emb, self.course_embeddings)[0] return {cid: float(similarities[idx]) for idx, cid in enumerate(self.valid_courses)} def _generate_explanation(self, student: StudentProfile, plan: Dict, track: str, plan_type: str) -> str: return f"{plan_type.title()} plan for the {track} track, tailored to your goal of becoming a {student.career_goals}." def validate_plan(self, plan: Dict, student: StudentProfile = None) -> Dict[str, List[str]]: issues = {"errors": [], "warnings": [], "info": []} all_courses = [course for year in plan.values() for sem in year.values() for course in sem if isinstance(sem, list)] # Check for mixed tracks for track_type, tracks in self.COURSE_TRACKS.items(): tracks_used = {name for name, courses in tracks.items() if any(c in all_courses for c in courses)} if len(tracks_used) > 1: issues["errors"].append(f"Mixed {track_type} tracks: {', '.join(tracks_used)}. Choose one sequence.") # Validate prerequisites completed_for_validation = set(student.completed_courses) if student else set() for year in range(1, 5): for sem in ["fall", "spring"]: year_key = f"year_{year}" sem_courses = plan.get(year_key, {}).get(sem, []) for course in sem_courses: if course in self.curriculum_graph: prereqs = set(self.curriculum_graph.predecessors(course)) if not prereqs.issubset(self._get_completed_with_equivalents(completed_for_validation)): missing = prereqs - completed_for_validation issues["errors"].append(f"{course} in Year {year} {sem} is missing prereqs: {', '.join(missing)}") completed_for_validation.update(sem_courses) return issues def _finalize_plan(self, plan: Dict, explanation: str, validation: Dict = None) -> Dict: structured_plan = { "reasoning": explanation, "validation": validation or {"errors": [], "warnings": [], "info": []} } complexities = [] for year in range(1, 5): year_key = f"year_{year}" structured_plan[year_key] = { "fall": plan.get(year_key, {}).get("fall", []), "spring": plan.get(year_key, {}).get("spring", []), "summer": "co-op" if year in [2, 3] else [] } for sem in ["fall", "spring"]: courses = structured_plan[year_key][sem] if courses: sem_complexity = sum(self.courses.get(c, {}).get('complexity', 50) for c in courses) complexities.append(sem_complexity) structured_plan["complexity_analysis"] = { "average_semester_complexity": float(np.mean(complexities)) if complexities else 0, "peak_semester_complexity": float(np.max(complexities)) if complexities else 0, "total_complexity": float(np.sum(complexities)) if complexities else 0, "balance_score (std_dev)": float(np.std(complexities)) if complexities else 0 } structured_plan["metadata"] = { "generated": datetime.now().isoformat(), "valid": len(validation.get("errors", [])) == 0 if validation else True, } return {"pathway": structured_plan} class CurriculumOptimizer(HybridOptimizer): """Compatibility wrapper""" def __init__(self): super().__init__() def generate_plan(self, student: StudentProfile, track_override: Optional[str] = None) -> Dict: return self.generate_enhanced_rule_plan(student, track_override)