ckharche commited on
Commit
722bc5e
·
verified ·
1 Parent(s): 19445a8

Upload curriculum_optimizer.py

Browse files
Files changed (1) hide show
  1. src/curriculum_optimizer.py +716 -0
src/curriculum_optimizer.py ADDED
@@ -0,0 +1,716 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fixed Hybrid Curriculum Optimizer
3
+ Actually personalizes plans based on student profile
4
+ WITH MUTUAL EXCLUSION AND SEQUENCE VALIDATION
5
+ """
6
+ import torch
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
+ from sentence_transformers import SentenceTransformer, util
9
+ import networkx as nx
10
+ import numpy as np
11
+ from typing import Dict, List, Set, Tuple, Optional
12
+ from dataclasses import dataclass
13
+ import re
14
+ import json
15
+ import random
16
+ from datetime import datetime
17
+
18
+ @dataclass
19
+ class StudentProfile:
20
+ completed_courses: List[str]
21
+ time_commitment: int
22
+ preferred_difficulty: str
23
+ career_goals: str
24
+ interests: List[str]
25
+ current_gpa: float = 3.5
26
+ learning_style: str = "Visual"
27
+
28
+ class HybridOptimizer:
29
+ """
30
+ Fixed optimizer with proper course sequencing and mutual exclusion
31
+ """
32
+
33
+ # COURSE TRACKS - Mutually exclusive sequences
34
+ COURSE_TRACKS = {
35
+ "physics": {
36
+ "engineering": ["PHYS1151", "PHYS1155"],
37
+ "science": ["PHYS1161", "PHYS1165"],
38
+ "life_sciences": ["PHYS1145", "PHYS1147"]
39
+ },
40
+ "calculus": {
41
+ "standard": ["MATH1341", "MATH1342"],
42
+ "computational": ["MATH156", "MATH256"]
43
+ }
44
+ }
45
+
46
+ # CONCENTRATION REQUIREMENTS - Structured with pick lists
47
+ CONCENTRATION_REQUIREMENTS = {
48
+ "ai_ml": {
49
+ "foundations": {
50
+ "required": ["CS1800", "CS2500", "CS2510", "CS2800"]
51
+ },
52
+ "core": {
53
+ "required": ["CS3000", "CS3500"],
54
+ "pick_1_from": ["CS3200", "CS3650", "CS3700"]
55
+ },
56
+ "concentration_specific": {
57
+ "required": ["CS4100", "DS4400"],
58
+ "pick_2_from": ["CS4120", "CS4180", "DS4420", "DS4440"],
59
+ "pick_1_systems": ["CS4730", "CS4700", "CS4750"]
60
+ },
61
+ "math": {
62
+ "required": ["MATH1341", "MATH1342"],
63
+ "pick_1_from": ["MATH2331", "MATH3081", "STAT315"]
64
+ }
65
+ },
66
+ "systems": {
67
+ "foundations": {
68
+ "required": ["CS1800", "CS2500", "CS2510", "CS2800"]
69
+ },
70
+ "core": {
71
+ "required": ["CS3000", "CS3500", "CS3650"],
72
+ "pick_1_from": ["CS3700", "CS3200"]
73
+ },
74
+ "concentration_specific": {
75
+ "required": ["CS4700"],
76
+ "pick_2_from": ["CS4730", "CS4750", "CS4770"],
77
+ "pick_1_from": ["CS4400", "CS4500", "CS4520"]
78
+ },
79
+ "math": {
80
+ "required": ["MATH1341", "MATH1342"]
81
+ }
82
+ },
83
+ "security": {
84
+ "foundations": {
85
+ "required": ["CS1800", "CS2500", "CS2510", "CS2800"]
86
+ },
87
+ "core": {
88
+ "required": ["CS3000", "CS3650", "CY2550"],
89
+ "pick_1_from": ["CS3700", "CS3500"]
90
+ },
91
+ "concentration_specific": {
92
+ "required": ["CY3740"],
93
+ "pick_2_from": ["CY4740", "CY4760", "CY4770"],
94
+ "pick_1_from": ["CS4700", "CS4730"]
95
+ },
96
+ "math": {
97
+ "required": ["MATH1342"],
98
+ "pick_1_from": ["MATH3527", "MATH3081"]
99
+ }
100
+ }
101
+ }
102
+
103
+ def __init__(self):
104
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
105
+
106
+ # Use smaller model for efficiency
107
+ self.model_name = "meta-llama/Llama-3.1-8B-Instruct"
108
+ self.embedding_model_name = 'BAAI/bge-large-en-v1.5'
109
+
110
+ self.llm = None
111
+ self.tokenizer = None
112
+ self.embedding_model = None
113
+ self.curriculum_graph = None
114
+ self.courses = {}
115
+
116
+ def load_models(self):
117
+ """Load embedding model and optionally LLM"""
118
+ print("Loading embedding model...")
119
+ self.embedding_model = SentenceTransformer(self.embedding_model_name, device=self.device)
120
+
121
+ def load_llm(self):
122
+ """Load LLM separately for when needed"""
123
+ if self.device.type == 'cuda' and self.llm is None:
124
+ print("Loading LLM for intelligent planning...")
125
+ quant_config = BitsAndBytesConfig(
126
+ load_in_4bit=True,
127
+ bnb_4bit_quant_type="nf4",
128
+ bnb_4bit_compute_dtype=torch.bfloat16
129
+ )
130
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
131
+ self.tokenizer.pad_token = self.tokenizer.eos_token
132
+ self.llm = AutoModelForCausalLM.from_pretrained(
133
+ self.model_name,
134
+ quantization_config=quant_config,
135
+ device_map="auto"
136
+ )
137
+
138
+ def load_data(self, graph: nx.DiGraph):
139
+ """Load and preprocess curriculum data"""
140
+ self.curriculum_graph = graph
141
+ self.courses = dict(graph.nodes(data=True))
142
+
143
+ # Filter valid courses
144
+ self.valid_courses = []
145
+ course_texts = []
146
+
147
+ for cid, data in self.courses.items():
148
+ # Skip labs/recitations
149
+ name = data.get('name', '')
150
+ if any(skip in name for skip in ['Lab', 'Recitation', 'Seminar', 'Practicum']):
151
+ continue
152
+
153
+ # Skip grad level
154
+ if self._get_level(cid) >= 5000:
155
+ continue
156
+
157
+ self.valid_courses.append(cid)
158
+ course_texts.append(f"{name} {data.get('description', '')}")
159
+
160
+ # Precompute embeddings
161
+ print(f"Computing embeddings for {len(self.valid_courses)} courses...")
162
+ self.course_embeddings = self.embedding_model.encode(
163
+ course_texts,
164
+ convert_to_tensor=True,
165
+ show_progress_bar=True
166
+ )
167
+
168
+ def _get_track_commitment(self, completed: Set[str], track_type: str) -> Optional[str]:
169
+ """Once a student takes one course in a track, commit to that track"""
170
+ tracks = self.COURSE_TRACKS.get(track_type, {})
171
+ for track_name, courses in tracks.items():
172
+ if any(c in completed for c in courses):
173
+ return track_name
174
+ return None
175
+
176
+ def _validate_sequence(self, selected: List[str], candidate: str) -> bool:
177
+ """Ensure course sequences stay consistent - no mixing tracks"""
178
+ for track_type, tracks in self.COURSE_TRACKS.items():
179
+ for track_name, sequence in tracks.items():
180
+ if candidate in sequence:
181
+ # Check if any course from different track already selected
182
+ for other_track, other_seq in tracks.items():
183
+ if other_track != track_name:
184
+ if any(c in selected for c in other_seq):
185
+ return False # Don't mix sequences
186
+ return True
187
+
188
+ def validate_plan(self, plan: Dict) -> Dict[str, List[str]]:
189
+ """Validate a plan for consistency and requirements"""
190
+ issues = {
191
+ "errors": [],
192
+ "warnings": [],
193
+ "info": []
194
+ }
195
+
196
+ all_courses = []
197
+ for year_key, year_data in plan.items():
198
+ if isinstance(year_data, dict) and year_key.startswith("year_"):
199
+ all_courses.extend(year_data.get("fall", []))
200
+ all_courses.extend(year_data.get("spring", []))
201
+
202
+ # Check for sequence mixing
203
+ for track_type, tracks in self.COURSE_TRACKS.items():
204
+ tracks_used = set()
205
+ for track_name, courses in tracks.items():
206
+ if any(c in all_courses for c in courses):
207
+ tracks_used.add(track_name)
208
+
209
+ if len(tracks_used) > 1:
210
+ issues["errors"].append(
211
+ f"Mixed {track_type} tracks: {', '.join(tracks_used)}. Must choose one sequence."
212
+ )
213
+
214
+ # Check prerequisites are satisfied
215
+ completed = set()
216
+ for year in range(1, 5):
217
+ for sem in ["fall", "spring"]:
218
+ year_key = f"year_{year}"
219
+ if year_key in plan:
220
+ courses = plan[year_key].get(sem, [])
221
+ for course in courses:
222
+ if course in self.curriculum_graph:
223
+ prereqs = set(self.curriculum_graph.predecessors(course))
224
+ missing = prereqs - completed
225
+ if missing:
226
+ issues["errors"].append(
227
+ f"{course} in Year {year} {sem} missing prereqs: {', '.join(missing)}"
228
+ )
229
+ completed.update(courses)
230
+
231
+ return issues
232
+
233
+ def generate_llm_plan(self, student: StudentProfile) -> Dict:
234
+ """Generate AI-powered plan with LLM course selection"""
235
+ print("--- Generating AI-Optimized Plan ---")
236
+
237
+ # Ensure LLM is loaded
238
+ self.load_llm()
239
+
240
+ if not self.llm:
241
+ print("LLM not available, falling back to enhanced rule-based plan")
242
+ return self.generate_enhanced_rule_plan(student)
243
+
244
+ # Step 1: Identify track
245
+ track = self._identify_track(student)
246
+ print(f"Identified track: {track}")
247
+
248
+ # Step 2: Get LLM-suggested courses
249
+ llm_suggestions = self._get_llm_course_suggestions(student, track)
250
+
251
+ # Step 3: Build plan using LLM suggestions + rules
252
+ plan = self._build_structured_plan(student, track, llm_suggestions)
253
+
254
+ # Step 4: Validate plan
255
+ validation = self.validate_plan(plan)
256
+ if validation["errors"]:
257
+ print(f"Plan validation errors: {validation['errors']}")
258
+ # Try to fix errors
259
+ plan = self._fix_plan_errors(plan, validation, student)
260
+
261
+ # Step 5: Generate explanation
262
+ explanation = self._generate_explanation(student, plan, track, "AI-optimized")
263
+
264
+ return self._finalize_plan(plan, explanation, validation)
265
+
266
+ def generate_simple_plan(self, student: StudentProfile) -> Dict:
267
+ """Generate rule-based plan that considers student preferences"""
268
+ print("--- Generating Enhanced Rule-Based Plan ---")
269
+ return self.generate_enhanced_rule_plan(student)
270
+
271
+ def generate_enhanced_rule_plan(self, student: StudentProfile) -> Dict:
272
+ """Enhanced rule-based plan with proper sequencing"""
273
+
274
+ # Step 1: Identify track
275
+ track = self._identify_track(student)
276
+
277
+ # Step 2: Build structured plan
278
+ plan = self._build_structured_plan(student, track, None)
279
+
280
+ # Step 3: Validate
281
+ validation = self.validate_plan(plan)
282
+ if validation["errors"]:
283
+ plan = self._fix_plan_errors(plan, validation, student)
284
+ validation = self.validate_plan(plan) # Re-validate
285
+
286
+ # Step 4: Generate explanation
287
+ difficulty_level = self._map_difficulty(student.preferred_difficulty)
288
+ courses_per_semester = self._calculate_course_load(student.time_commitment)
289
+ explanation = f"Personalized {track} track ({difficulty_level} difficulty, {courses_per_semester} courses/semester)"
290
+
291
+ return self._finalize_plan(plan, explanation, validation)
292
+
293
+ def _build_structured_plan(
294
+ self,
295
+ student: StudentProfile,
296
+ track: str,
297
+ llm_suggestions: Optional[List[str]] = None
298
+ ) -> Dict:
299
+ """Build plan using structured concentration requirements"""
300
+
301
+ completed = set(student.completed_courses)
302
+ plan = {}
303
+ requirements = self.CONCENTRATION_REQUIREMENTS.get(track, self.CONCENTRATION_REQUIREMENTS["ai_ml"])
304
+
305
+ # Determine course load
306
+ courses_per_semester = self._calculate_course_load(student.time_commitment)
307
+
308
+ # Track which requirements have been satisfied
309
+ required_queue = []
310
+ pick_lists = []
311
+
312
+ # Build queue of required courses
313
+ for category, reqs in requirements.items():
314
+ if "required" in reqs:
315
+ required_queue.extend(reqs["required"])
316
+
317
+ # Handle pick lists
318
+ for key, courses in reqs.items():
319
+ if key.startswith("pick_"):
320
+ num_to_pick = int(re.search(r'\d+', key).group()) if re.search(r'\d+', key) else 1
321
+ pick_lists.append({
322
+ "courses": courses,
323
+ "num_to_pick": num_to_pick,
324
+ "category": category
325
+ })
326
+
327
+ # Handle course track commitments (physics/calculus)
328
+ physics_track = self._get_track_commitment(completed, "physics")
329
+ calc_track = self._get_track_commitment(completed, "calculus")
330
+
331
+ # Build semesters
332
+ for sem_num in range(1, 9):
333
+ year = ((sem_num - 1) // 2) + 1
334
+ is_fall = (sem_num % 2) == 1
335
+
336
+ available = self._get_available_courses(completed, year)
337
+ selected = []
338
+
339
+ # Apply track commitments
340
+ if not physics_track and year <= 2:
341
+ # Choose physics track based on difficulty preference
342
+ if student.preferred_difficulty == "challenging":
343
+ physics_track = "engineering"
344
+ else:
345
+ physics_track = "science"
346
+
347
+ # Priority 1: Required courses
348
+ for course in required_queue[:]:
349
+ if course in available and len(selected) < courses_per_semester:
350
+ if self._validate_sequence(selected, course):
351
+ selected.append(course)
352
+ required_queue.remove(course)
353
+ available.remove(course)
354
+
355
+ # Priority 2: Handle pick lists
356
+ for pick_list in pick_lists:
357
+ if len(selected) >= courses_per_semester:
358
+ break
359
+
360
+ # Filter available courses from this pick list
361
+ available_from_list = [c for c in pick_list["courses"] if c in available]
362
+
363
+ # Use LLM suggestions if available
364
+ if llm_suggestions:
365
+ # Prioritize LLM-suggested courses
366
+ for suggested in llm_suggestions:
367
+ if suggested in available_from_list and pick_list["num_to_pick"] > 0:
368
+ if self._validate_sequence(selected, suggested):
369
+ selected.append(suggested)
370
+ available.remove(suggested)
371
+ pick_list["num_to_pick"] -= 1
372
+
373
+ # Fill remaining slots
374
+ for course in available_from_list[:pick_list["num_to_pick"]]:
375
+ if len(selected) < courses_per_semester and course in available:
376
+ if self._validate_sequence(selected, course):
377
+ selected.append(course)
378
+ available.remove(course)
379
+ pick_list["num_to_pick"] -= 1
380
+
381
+ # Priority 3: Track-specific courses (physics/calc)
382
+ if physics_track and year <= 2:
383
+ physics_courses = self.COURSE_TRACKS["physics"].get(physics_track, [])
384
+ for course in physics_courses:
385
+ if course in available and len(selected) < courses_per_semester:
386
+ selected.append(course)
387
+ available.remove(course)
388
+
389
+ # Priority 4: Fill with electives
390
+ if len(selected) < courses_per_semester and available:
391
+ semantic_scores = self._compute_semantic_scores(student)
392
+ electives = sorted(
393
+ available,
394
+ key=lambda c: self._score_elective(c, semantic_scores, completed),
395
+ reverse=True
396
+ )
397
+
398
+ for elective in electives:
399
+ if len(selected) >= courses_per_semester:
400
+ break
401
+ if self._validate_sequence(selected, elective):
402
+ selected.append(elective)
403
+
404
+ # Add to plan
405
+ if selected:
406
+ year_key = f"year_{year}"
407
+ if year_key not in plan:
408
+ plan[year_key] = {}
409
+
410
+ sem_type = 'fall' if is_fall else 'spring'
411
+ plan[year_key][sem_type] = selected[:courses_per_semester]
412
+ completed.update(selected)
413
+
414
+ return plan
415
+
416
+ def _fix_plan_errors(self, plan: Dict, validation: Dict, student: StudentProfile) -> Dict:
417
+ """Attempt to fix validation errors in a plan"""
418
+
419
+ # For now, if there are sequence mixing errors, rebuild with enforced consistency
420
+ if any("Mixed" in error for error in validation["errors"]):
421
+ print("Fixing sequence mixing errors...")
422
+
423
+ # Find which tracks were mixed and pick the first one
424
+ for error in validation["errors"]:
425
+ if "Mixed physics" in error:
426
+ # Force engineering track (most common)
427
+ self.COURSE_TRACKS["physics"] = {"engineering": ["PHYS1151", "PHYS1155"]}
428
+ elif "Mixed calculus" in error:
429
+ # Force standard calc
430
+ self.COURSE_TRACKS["calculus"] = {"standard": ["MATH1341", "MATH1342"]}
431
+
432
+ # Rebuild plan with enforced tracks
433
+ return self._build_structured_plan(student, self._identify_track(student), None)
434
+
435
+ return plan
436
+
437
+ def _get_llm_course_suggestions(self, student: StudentProfile, track: str) -> List[str]:
438
+ """Use LLM to suggest personalized course priorities"""
439
+
440
+ requirements = self.CONCENTRATION_REQUIREMENTS.get(track, self.CONCENTRATION_REQUIREMENTS["ai_ml"])
441
+
442
+ # Gather all elective options from pick lists
443
+ all_options = []
444
+ for category, reqs in requirements.items():
445
+ for key, courses in reqs.items():
446
+ if key.startswith("pick_"):
447
+ all_options.extend(courses)
448
+
449
+ # Create course options text
450
+ course_options = []
451
+ for cid in all_options[:10]: # Limit to avoid token limits
452
+ if cid in self.courses:
453
+ name = self.courses[cid].get('name', cid)
454
+ desc = self.courses[cid].get('description', '')[:100]
455
+ course_options.append(f"{cid}: {name} - {desc}")
456
+
457
+ prompt = f"""You are a curriculum advisor. Given this student profile, rank the TOP 5 most relevant courses from the options below.
458
+
459
+ Student Profile:
460
+ - Career Goal: {student.career_goals}
461
+ - Interests: {', '.join(student.interests)}
462
+ - Time Commitment: {student.time_commitment} hours/week
463
+ - Preferred Difficulty: {student.preferred_difficulty}
464
+ - Current GPA: {student.current_gpa}
465
+
466
+ Available Courses:
467
+ {chr(10).join(course_options)}
468
+
469
+ Return ONLY the top 5 course IDs in order of priority, one per line. Example:
470
+ CS4100
471
+ DS4400
472
+ CS4120
473
+ CS4180
474
+ DS4440"""
475
+
476
+ try:
477
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(self.device)
478
+
479
+ with torch.no_grad():
480
+ outputs = self.llm.generate(
481
+ **inputs,
482
+ max_new_tokens=100,
483
+ temperature=0.3,
484
+ do_sample=True,
485
+ pad_token_id=self.tokenizer.eos_token_id
486
+ )
487
+
488
+ response = self.tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
489
+
490
+ # Extract course IDs
491
+ suggested_courses = []
492
+ for line in response.strip().split('\n'):
493
+ line = line.strip()
494
+ match = re.search(r'([A-Z]{2,4}\d{4})', line)
495
+ if match:
496
+ suggested_courses.append(match.group(1))
497
+
498
+ return suggested_courses[:5]
499
+
500
+ except Exception as e:
501
+ print(f"LLM suggestion failed: {e}")
502
+ return all_options[:5] # Fallback
503
+
504
+ def _map_difficulty(self, preferred_difficulty: str) -> str:
505
+ """Map UI difficulty to internal levels"""
506
+ mapping = {
507
+ "easy": "easy",
508
+ "moderate": "medium",
509
+ "challenging": "hard"
510
+ }
511
+ return mapping.get(preferred_difficulty.lower(), "medium")
512
+
513
+ def _calculate_course_load(self, time_commitment: int) -> int:
514
+ """Calculate courses per semester based on time commitment"""
515
+ if time_commitment < 20:
516
+ return 3 # Part-time
517
+ elif time_commitment < 30:
518
+ return 4 # Standard
519
+ elif time_commitment < 40:
520
+ return 4 # Standard-heavy
521
+ else:
522
+ return 4 # Max (prerequisites limit anyway)
523
+
524
+ def _identify_track(self, student: StudentProfile) -> str:
525
+ """Use embeddings to identify best track"""
526
+
527
+ profile_text = f"{student.career_goals} {' '.join(student.interests)}"
528
+ profile_emb = self.embedding_model.encode(profile_text, convert_to_tensor=True)
529
+
530
+ track_descriptions = {
531
+ "ai_ml": "artificial intelligence machine learning deep learning neural networks data science NLP computer vision LLM",
532
+ "systems": "operating systems distributed systems networks compilers databases performance optimization backend",
533
+ "security": "cybersecurity cryptography penetration testing security vulnerabilities network security ethical hacking"
534
+ }
535
+
536
+ best_track = "ai_ml"
537
+ best_score = -1
538
+
539
+ for track, description in track_descriptions.items():
540
+ track_emb = self.embedding_model.encode(description, convert_to_tensor=True)
541
+ score = float(util.cos_sim(profile_emb, track_emb))
542
+ if score > best_score:
543
+ best_score = score
544
+ best_track = track
545
+
546
+ return best_track
547
+
548
+ def _compute_semantic_scores(self, student: StudentProfile) -> Dict[str, float]:
549
+ """Compute semantic alignment for all courses"""
550
+
551
+ query_text = f"{student.career_goals} {' '.join(student.interests)}"
552
+ query_emb = self.embedding_model.encode(query_text, convert_to_tensor=True)
553
+
554
+ similarities = util.cos_sim(query_emb, self.course_embeddings)[0]
555
+
556
+ scores = {}
557
+ for idx, cid in enumerate(self.valid_courses):
558
+ scores[cid] = float(similarities[idx])
559
+
560
+ return scores
561
+
562
+ def _get_available_courses(self, completed: Set[str], year: int) -> List[str]:
563
+ """Get schedulable courses with year restrictions"""
564
+
565
+ available = []
566
+ max_level = 2999 if year == 1 else 3999 if year == 2 else 9999
567
+
568
+ for cid in self.valid_courses:
569
+ if cid in completed:
570
+ continue
571
+
572
+ if self._get_level(cid) > max_level:
573
+ continue
574
+
575
+ # Check prerequisites
576
+ if cid in self.curriculum_graph:
577
+ prereqs = set(self.curriculum_graph.predecessors(cid))
578
+ if not prereqs.issubset(completed):
579
+ continue
580
+
581
+ available.append(cid)
582
+
583
+ return available
584
+
585
+ def _score_elective(
586
+ self,
587
+ course_id: str,
588
+ semantic_scores: Dict[str, float],
589
+ completed: Set[str]
590
+ ) -> float:
591
+ """Basic elective scoring"""
592
+
593
+ score = 0.0
594
+
595
+ # Semantic alignment (50%)
596
+ score += semantic_scores.get(course_id, 0) * 0.5
597
+
598
+ # Unlocks future courses (30%)
599
+ if course_id in self.curriculum_graph:
600
+ unlocks = len(list(self.curriculum_graph.successors(course_id)))
601
+ score += min(unlocks / 5, 1.0) * 0.3
602
+
603
+ # Subject relevance (20%)
604
+ subject = self.courses.get(course_id, {}).get('subject', '')
605
+ subject_scores = {"CS": 1.0, "DS": 0.9, "IS": 0.6, "MATH": 0.7, "CY": 0.8}
606
+ score += subject_scores.get(subject, 0.3) * 0.2
607
+
608
+ return score
609
+
610
+ def _generate_explanation(self, student: StudentProfile, plan: Dict, track: str, plan_type: str) -> str:
611
+ """Generate explanation using LLM if available"""
612
+
613
+ if not self.llm:
614
+ return f"{plan_type} {track} track plan for {student.career_goals}"
615
+
616
+ # Count courses
617
+ total_courses = sum(
618
+ len(plan.get(f"year_{y}", {}).get(sem, []))
619
+ for y in range(1, 5)
620
+ for sem in ["fall", "spring"]
621
+ )
622
+
623
+ prompt = f"""Explain this curriculum plan in 1-2 sentences:
624
+ Plan Type: {plan_type}
625
+ Track: {track}
626
+ Student Goal: {student.career_goals}
627
+ Interests: {', '.join(student.interests[:2])}
628
+ Difficulty: {student.preferred_difficulty}
629
+ Time Commitment: {student.time_commitment}h/week
630
+ Total Courses: {total_courses}
631
+
632
+ Be specific about how the plan matches their preferences."""
633
+
634
+ try:
635
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(self.device)
636
+
637
+ with torch.no_grad():
638
+ outputs = self.llm.generate(
639
+ **inputs,
640
+ max_new_tokens=150,
641
+ temperature=0.7,
642
+ do_sample=True,
643
+ pad_token_id=self.tokenizer.eos_token_id
644
+ )
645
+
646
+ explanation = self.tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
647
+ return explanation.strip()
648
+
649
+ except Exception as e:
650
+ print(f"Explanation generation failed: {e}")
651
+ return f"{plan_type} {track} track plan optimized for {student.career_goals}"
652
+
653
+ def _get_level(self, course_id: str) -> int:
654
+ """Extract course level"""
655
+ match = re.search(r'\d+', course_id)
656
+ return int(match.group()) if match else 9999
657
+
658
+ def _finalize_plan(self, plan: Dict, explanation: str, validation: Dict = None) -> Dict:
659
+ """Add structure, metrics, and validation to plan"""
660
+
661
+ structured = {
662
+ "reasoning": explanation,
663
+ "validation": validation if validation else {"errors": [], "warnings": [], "info": []}
664
+ }
665
+
666
+ # Ensure all years present
667
+ for year in range(1, 5):
668
+ year_key = f"year_{year}"
669
+ if year_key not in plan:
670
+ plan[year_key] = {}
671
+
672
+ structured[year_key] = {
673
+ "fall": plan[year_key].get("fall", []),
674
+ "spring": plan[year_key].get("spring", []),
675
+ "summer": "co-op" if year in [2, 3] else []
676
+ }
677
+
678
+ # Calculate complexity metrics
679
+ complexities = []
680
+ for year_key in structured:
681
+ if year_key.startswith("year_"):
682
+ for sem in ["fall", "spring"]:
683
+ courses = structured[year_key].get(sem, [])
684
+ if courses:
685
+ sem_complexity = sum(
686
+ self.courses.get(c, {}).get('complexity', 50)
687
+ for c in courses
688
+ )
689
+ complexities.append(sem_complexity)
690
+
691
+ structured["complexity_analysis"] = {
692
+ "average_semester_complexity": float(np.mean(complexities)) if complexities else 0,
693
+ "peak_semester_complexity": float(np.max(complexities)) if complexities else 0,
694
+ "total_complexity": float(np.sum(complexities)) if complexities else 0,
695
+ "balance_score (std_dev)": float(np.std(complexities)) if complexities else 0
696
+ }
697
+
698
+ # Add metadata
699
+ structured["metadata"] = {
700
+ "generated": datetime.now().isoformat(),
701
+ "valid": len(validation.get("errors", [])) == 0 if validation else True,
702
+ "has_warnings": len(validation.get("warnings", [])) > 0 if validation else False
703
+ }
704
+
705
+ return {"pathway": structured}
706
+
707
+ # Backward compatibility wrapper
708
+ class CurriculumOptimizer(HybridOptimizer):
709
+ """Compatibility wrapper"""
710
+
711
+ def __init__(self):
712
+ super().__init__()
713
+
714
+ def generate_plan(self, student: StudentProfile) -> Dict:
715
+ """Default plan generation - uses enhanced rules"""
716
+ return self.generate_enhanced_rule_plan(student)