""" Personalized Learning Pathway System Implements adaptive learning capabilities that customize instruction based on comprehensive user profiling """ import json import os import pandas as pd import numpy as np from datetime import datetime, timedelta from typing import Dict, List, Optional, Tuple from dataclasses import dataclass, asdict from collections import defaultdict import pickle @dataclass class UserProfile: """User profile data structure""" user_id: str knowledge_level: Dict[str, float] # Knowledge level for each topic (0-1) learning_style: str # "visual", "textual", "practical", "mixed" learning_pace: str # "slow", "medium", "fast" preferred_topics: List[str] weak_areas: List[str] strong_areas: List[str] test_scores: Dict[str, List[float]] # Historical test scores question_history: List[Dict] # Question and answer history learning_time: Dict[str, float] # Learning time for each topic (minutes) last_activity: str total_questions_asked: int total_tests_completed: int bloom_level_performance: Dict[str, Dict[str, float]] # Bloom level performance for each topic created_at: str updated_at: str # Cold start related fields has_completed_onboarding: bool = False # Whether onboarding is completed background_experience: str = "" # Background experience (e.g., "experienced", "beginner", "intermediate") learning_goals: List[str] = None # Learning goals, None requires special handling initial_assessment_completed: bool = False # Whether initial assessment is completed initial_knowledge_survey: Dict[str, float] = None # Initial knowledge survey results, None requires special handling @dataclass class LearningPathNode: """Learning path node""" node_id: str topic: str bloom_level: str # "remember", "understand", "apply", "analyze", "evaluate", "create" difficulty: float # 0-1 prerequisites: List[str] # Prerequisite node IDs estimated_time: int # Estimated time (minutes) content_type: str # "reading", "quiz", "practical", "review" status: str # "pending", "in_progress", "completed", "skipped" completion_date: Optional[str] = None score: Optional[float] = None @dataclass class LearningPath: """Learning path""" path_id: str user_id: str nodes: List[LearningPathNode] current_node_index: int completion_percentage: float created_at: str updated_at: str estimated_total_time: int class UserProfilingSystem: """User profiling system""" def __init__(self, storage_dir: str = "user_data"): self.storage_dir = storage_dir os.makedirs(storage_dir, exist_ok=True) self.profiles_file = os.path.join(storage_dir, "user_profiles.json") self.profiles = self._load_profiles() def _load_profiles(self) -> Dict[str, UserProfile]: """Load user profiles""" if os.path.exists(self.profiles_file): try: with open(self.profiles_file, 'r', encoding='utf-8') as f: data = json.load(f) return {uid: UserProfile(**profile) for uid, profile in data.items()} except Exception as e: print(f"Error loading profiles: {e}") return {} def _save_profiles(self): """Save user profiles""" try: with open(self.profiles_file, 'w', encoding='utf-8') as f: data = {uid: asdict(profile) for uid, profile in self.profiles.items()} json.dump(data, f, indent=2, ensure_ascii=False) except Exception as e: print(f"Error saving profiles: {e}") def get_or_create_profile(self, user_id: str) -> UserProfile: """Get or create user profile (cold start)""" if user_id not in self.profiles: self.profiles[user_id] = UserProfile( user_id=user_id, knowledge_level={}, learning_style="mixed", learning_pace="medium", preferred_topics=[], weak_areas=[], strong_areas=[], test_scores={}, question_history=[], learning_time={}, last_activity=datetime.now().isoformat(), total_questions_asked=0, total_tests_completed=0, bloom_level_performance={}, created_at=datetime.now().isoformat(), updated_at=datetime.now().isoformat(), has_completed_onboarding=False, background_experience="", learning_goals=None, initial_assessment_completed=False, initial_knowledge_survey=None ) self._save_profiles() return self.profiles[user_id] def is_cold_start(self, user_id: str) -> bool: """Check if user is in cold start state""" if user_id not in self.profiles: return True profile = self.profiles[user_id] return not profile.has_completed_onboarding def complete_onboarding(self, user_id: str, onboarding_data: Dict): """Complete cold start setup and collect initial user information Information collected during cold start: 1. Learning preferences: - learning_style: Learning style preference - learning_pace: Learning pace preference 2. Background information: - background_experience: Background experience - learning_goals: List of learning goals 3. Initial knowledge assessment: - initial_knowledge_survey: Initial familiarity with each topic (0-1) - initial_assessment_completed: Whether initial assessment is completed """ profile = self.get_or_create_profile(user_id) # Update learning style if 'learning_style' in onboarding_data: profile.learning_style = onboarding_data['learning_style'] # Update learning pace if 'learning_pace' in onboarding_data: profile.learning_pace = onboarding_data['learning_pace'] # Update background experience if 'background_experience' in onboarding_data: profile.background_experience = onboarding_data['background_experience'] # Update learning goals if 'learning_goals' in onboarding_data: profile.learning_goals = onboarding_data['learning_goals'] # Update initial knowledge survey if 'initial_knowledge_survey' in onboarding_data: profile.initial_knowledge_survey = onboarding_data['initial_knowledge_survey'] # Convert initial survey results to knowledge level profile.knowledge_level = onboarding_data['initial_knowledge_survey'].copy() # Update preferred topics (based on initial survey, select topics with lower familiarity) if 'initial_knowledge_survey' in onboarding_data: survey = onboarding_data['initial_knowledge_survey'] # Select topics with lower familiarity as learning focus low_knowledge_topics = [topic for topic, level in survey.items() if level < 0.5] profile.preferred_topics = low_knowledge_topics[:3] # Take top 3 # Update initial assessment status if 'initial_assessment_completed' in onboarding_data: profile.initial_assessment_completed = onboarding_data['initial_assessment_completed'] # Mark cold start as completed profile.has_completed_onboarding = True profile.updated_at = datetime.now().isoformat() self._save_profiles() return profile def update_from_test_results(self, user_id: str, topic: str, test_results: List[Dict]): """Update user profile from test results""" profile = self.get_or_create_profile(user_id) # Calculate average score scores = [r.get('score', 1.0 if r.get('is_correct', False) else 0.0) for r in test_results] avg_score = np.mean(scores) if scores else 0.5 # Update knowledge level if topic not in profile.knowledge_level: profile.knowledge_level[topic] = avg_score else: # Weighted average (give more weight to latest results) profile.knowledge_level[topic] = 0.7 * avg_score + 0.3 * profile.knowledge_level[topic] # Update test score history if topic not in profile.test_scores: profile.test_scores[topic] = [] profile.test_scores[topic].append(avg_score) # Update Bloom level performance if topic not in profile.bloom_level_performance: profile.bloom_level_performance[topic] = {} for result in test_results: level = result.get('level', 'unknown') is_correct = result.get('is_correct', False) score = 1.0 if is_correct else 0.0 if level not in profile.bloom_level_performance[topic]: profile.bloom_level_performance[topic][level] = [] profile.bloom_level_performance[topic][level].append(score) # Calculate average performance for each Bloom level for level in profile.bloom_level_performance[topic]: scores = profile.bloom_level_performance[topic][level] profile.bloom_level_performance[topic][level] = np.mean(scores) if scores else 0.0 # Update weak and strong areas self._update_weak_strong_areas(profile) # Update learning pace profile.learning_pace = self._calculate_learning_pace(profile) profile.total_tests_completed += 1 profile.last_activity = datetime.now().isoformat() profile.updated_at = datetime.now().isoformat() self._save_profiles() return profile def update_from_question(self, user_id: str, question: str, topic: Optional[str] = None): """Update user profile from question history""" profile = self.get_or_create_profile(user_id) profile.question_history.append({ "question": question, "topic": topic, "timestamp": datetime.now().isoformat() }) # Analyze question type to infer learning style profile.learning_style = self._infer_learning_style(profile.question_history) # Update preferred topics if topic: if topic not in profile.preferred_topics: profile.preferred_topics.append(topic) # Sort by frequency topic_counts = defaultdict(int) for q in profile.question_history: if q.get('topic'): topic_counts[q['topic']] += 1 profile.preferred_topics = sorted(topic_counts.items(), key=lambda x: x[1], reverse=True)[:5] profile.preferred_topics = [t[0] for t in profile.preferred_topics] profile.total_questions_asked += 1 profile.last_activity = datetime.now().isoformat() profile.updated_at = datetime.now().isoformat() self._save_profiles() return profile def update_learning_time(self, user_id: str, topic: str, minutes: float): """Update learning time""" profile = self.get_or_create_profile(user_id) if topic not in profile.learning_time: profile.learning_time[topic] = 0.0 profile.learning_time[topic] += minutes profile.last_activity = datetime.now().isoformat() profile.updated_at = datetime.now().isoformat() self._save_profiles() return profile def _update_weak_strong_areas(self, profile: UserProfile): """Update weak and strong areas""" # Topics with knowledge level below 0.6 are weak areas weak = [topic for topic, level in profile.knowledge_level.items() if level < 0.6] # Topics with knowledge level above 0.8 are strong areas strong = [topic for topic, level in profile.knowledge_level.items() if level >= 0.8] profile.weak_areas = weak profile.strong_areas = strong def _calculate_learning_pace(self, profile: UserProfile) -> str: """Calculate learning pace""" if not profile.test_scores: return "medium" # Calculate test completion speed total_tests = profile.total_tests_completed if total_tests == 0: return "medium" # Analyze performance changes in recent tests recent_scores = [] for topic_scores in profile.test_scores.values(): if topic_scores: recent_scores.extend(topic_scores[-3:]) # Last 3 tests if not recent_scores: return "medium" # If recent scores improve quickly, likely a fast-paced learner if len(recent_scores) >= 3: improvement = recent_scores[-1] - recent_scores[0] if improvement > 0.3: return "fast" elif improvement < -0.1: return "slow" return "medium" def _infer_learning_style(self, question_history: List[Dict]) -> str: """Infer learning style from question history""" if not question_history: return "mixed" # Analyze question keywords visual_keywords = ["how", "what does", "show", "visual", "diagram", "illustration"] practical_keywords = ["how to", "step", "procedure", "activate", "use", "operate"] textual_keywords = ["what is", "explain", "define", "describe", "meaning"] visual_count = sum(1 for q in question_history if any(kw in q.get('question', '').lower() for kw in visual_keywords)) practical_count = sum(1 for q in question_history if any(kw in q.get('question', '').lower() for kw in practical_keywords)) textual_count = sum(1 for q in question_history if any(kw in q.get('question', '').lower() for kw in textual_keywords)) total = len(question_history) if total == 0: return "mixed" visual_ratio = visual_count / total practical_ratio = practical_count / total textual_ratio = textual_count / total max_ratio = max(visual_ratio, practical_ratio, textual_ratio) if max_ratio > 0.4: if visual_ratio == max_ratio: return "visual" elif practical_ratio == max_ratio: return "practical" else: return "textual" return "mixed" def get_profile_summary(self, user_id: str) -> Dict: """Get user profile summary""" profile = self.get_or_create_profile(user_id) return { "user_id": profile.user_id, "knowledge_level": profile.knowledge_level, "learning_style": profile.learning_style, "learning_pace": profile.learning_pace, "preferred_topics": profile.preferred_topics, "weak_areas": profile.weak_areas, "strong_areas": profile.strong_areas, "total_questions": profile.total_questions_asked, "total_tests": profile.total_tests_completed, "overall_progress": self._calculate_overall_progress(profile) } def _calculate_overall_progress(self, profile: UserProfile) -> float: """Calculate overall progress""" if not profile.knowledge_level: return 0.0 return np.mean(list(profile.knowledge_level.values())) class LearningPathGenerator: """Learning path generator""" def __init__(self, user_profiling: UserProfilingSystem, available_topics: List[str]): self.user_profiling = user_profiling self.available_topics = available_topics self.bloom_levels = ["remember", "understand", "apply", "analyze", "evaluate", "create"] def generate_path(self, user_id: str, focus_areas: Optional[List[str]] = None) -> LearningPath: """Generate personalized learning path""" profile = self.user_profiling.get_or_create_profile(user_id) # Determine topics to learn topics_to_learn = self._determine_topics(profile, focus_areas) # Generate learning nodes nodes = [] node_id_counter = 0 for topic in topics_to_learn: topic_level = profile.knowledge_level.get(topic, 0.0) bloom_performance = profile.bloom_level_performance.get(topic, {}) # Generate nodes for different Bloom levels for each topic for bloom_level in self.bloom_levels: # Check if this node is needed if not self._needs_node(profile, topic, bloom_level, topic_level, bloom_performance): continue node = LearningPathNode( node_id=f"node_{node_id_counter}", topic=topic, bloom_level=bloom_level, difficulty=self._calculate_difficulty(topic_level, bloom_level), prerequisites=self._get_prerequisites(nodes, topic, bloom_level), estimated_time=self._estimate_time(bloom_level, profile.learning_pace), content_type=self._determine_content_type(bloom_level, profile.learning_style), status="pending" ) nodes.append(node) node_id_counter += 1 # Sort nodes (considering prerequisites) ordered_nodes = self._topological_sort(nodes) # Calculate total time total_time = sum(node.estimated_time for node in ordered_nodes) path = LearningPath( path_id=f"path_{user_id}_{datetime.now().strftime('%Y%m%d%H%M%S')}", user_id=user_id, nodes=ordered_nodes, current_node_index=0, completion_percentage=0.0, created_at=datetime.now().isoformat(), updated_at=datetime.now().isoformat(), estimated_total_time=total_time ) return path def _determine_topics(self, profile: UserProfile, focus_areas: Optional[List[str]]) -> List[str]: """Determine topics to learn""" if focus_areas: return focus_areas # Prioritize weak areas topics = profile.weak_areas.copy() # Add unlearned topics for topic in self.available_topics: if topic not in profile.knowledge_level and topic not in topics: topics.append(topic) # If no weak areas, recommend preferred or strong area related topics if not topics: topics = profile.preferred_topics[:3] if profile.preferred_topics else self.available_topics[:3] return topics[:5] # Limit to maximum 5 topics def _needs_node(self, profile: UserProfile, topic: str, bloom_level: str, topic_level: float, bloom_performance: Dict) -> bool: """Determine if a node is needed""" # If performance at this Bloom level is already good, skip level_performance = bloom_performance.get(bloom_level, 0.0) if level_performance >= 0.8: return False # Decide based on knowledge level if topic_level < 0.3 and bloom_level in ["analyze", "evaluate", "create"]: return False # Insufficient foundational knowledge, learn basics first return True def _calculate_difficulty(self, topic_level: float, bloom_level: str) -> float: """Calculate node difficulty""" bloom_weights = { "remember": 0.2, "understand": 0.3, "apply": 0.5, "analyze": 0.7, "evaluate": 0.85, "create": 1.0 } base_difficulty = bloom_weights.get(bloom_level, 0.5) # Adjust based on current knowledge level adjusted = base_difficulty * (1 - topic_level * 0.3) return min(1.0, max(0.1, adjusted)) def _get_prerequisites(self, existing_nodes: List[LearningPathNode], topic: str, bloom_level: str) -> List[str]: """Get prerequisites""" prereqs = [] # Lower Bloom levels of the same topic are prerequisites bloom_order = ["remember", "understand", "apply", "analyze", "evaluate", "create"] current_index = bloom_order.index(bloom_level) if bloom_level in bloom_order else 0 for node in existing_nodes: if node.topic == topic: node_index = bloom_order.index(node.bloom_level) if node.bloom_level in bloom_order else 0 if node_index < current_index: prereqs.append(node.node_id) return prereqs def _estimate_time(self, bloom_level: str, learning_pace: str) -> int: """Estimate learning time (minutes)""" base_times = { "remember": 10, "understand": 15, "apply": 20, "analyze": 25, "evaluate": 30, "create": 35 } base_time = base_times.get(bloom_level, 20) pace_multipliers = { "slow": 1.5, "medium": 1.0, "fast": 0.7 } return int(base_time * pace_multipliers.get(learning_pace, 1.0)) def _determine_content_type(self, bloom_level: str, learning_style: str) -> str: """Determine content type""" # Decide based on learning style and Bloom level if learning_style == "visual": if bloom_level in ["remember", "understand"]: return "reading" else: return "practical" elif learning_style == "practical": return "practical" elif learning_style == "textual": return "reading" else: # mixed if bloom_level in ["apply", "analyze", "evaluate", "create"]: return "quiz" return "reading" def _topological_sort(self, nodes: List[LearningPathNode]) -> List[LearningPathNode]: """Topological sort to ensure prerequisites are completed first""" # Create node mapping node_map = {node.node_id: node for node in nodes} # Calculate in-degree in_degree = {node.node_id: len(node.prerequisites) for node in nodes} # Find nodes without prerequisites queue = [node.node_id for node in nodes if in_degree[node.node_id] == 0] result = [] while queue: current_id = queue.pop(0) current_node = node_map[current_id] result.append(current_node) # Update in-degree of other nodes for node in nodes: if current_id in node.prerequisites: in_degree[node.node_id] -= 1 if in_degree[node.node_id] == 0: queue.append(node.node_id) # Add remaining nodes (may have circular dependencies) remaining = [node for node in nodes if node not in result] result.extend(remaining) return result class AdaptiveLearningEngine: """Adaptive learning engine""" def __init__(self, user_profiling: UserProfilingSystem, learning_path_generator: LearningPathGenerator): self.user_profiling = user_profiling self.learning_path_generator = learning_path_generator self.paths_file = os.path.join("user_data", "learning_paths.json") self.paths = self._load_paths() def _load_paths(self) -> Dict[str, LearningPath]: """Load learning paths""" if os.path.exists(self.paths_file): try: with open(self.paths_file, 'r', encoding='utf-8') as f: data = json.load(f) paths = {} for pid, path_data in data.items(): nodes = [LearningPathNode(**node) for node in path_data['nodes']] path = LearningPath( path_id=path_data['path_id'], user_id=path_data['user_id'], nodes=nodes, current_node_index=path_data['current_node_index'], completion_percentage=path_data['completion_percentage'], created_at=path_data['created_at'], updated_at=path_data['updated_at'], estimated_total_time=path_data['estimated_total_time'] ) paths[pid] = path return paths except Exception as e: print(f"Error loading paths: {e}") return {} def _save_paths(self): """Save learning paths""" try: os.makedirs("user_data", exist_ok=True) with open(self.paths_file, 'w', encoding='utf-8') as f: data = {} for pid, path in self.paths.items(): path_dict = asdict(path) data[pid] = path_dict json.dump(data, f, indent=2, ensure_ascii=False) except Exception as e: print(f"Error saving paths: {e}") def create_or_update_path(self, user_id: str, focus_areas: Optional[List[str]] = None) -> LearningPath: """Create or update learning path""" # Check if there is an active path active_path = self.get_active_path(user_id) if active_path and active_path.completion_percentage < 1.0: # Update existing path return self._update_path(active_path) else: # Create new path path = self.learning_path_generator.generate_path(user_id, focus_areas) self.paths[path.path_id] = path self._save_paths() return path def get_active_path(self, user_id: str) -> Optional[LearningPath]: """Get user's current active path""" user_paths = [p for p in self.paths.values() if p.user_id == user_id] if not user_paths: return None # Return the latest incomplete path active = [p for p in user_paths if p.completion_percentage < 1.0] if active: return max(active, key=lambda p: p.created_at) # If no incomplete paths, return the latest one return max(user_paths, key=lambda p: p.created_at) def complete_node(self, user_id: str, node_id: str, score: float): """Complete a node""" path = self.get_active_path(user_id) if not path: return None # Find node and mark as completed for node in path.nodes: if node.node_id == node_id: node.status = "completed" node.score = score node.completion_date = datetime.now().isoformat() break # Update path progress completed = sum(1 for n in path.nodes if n.status == "completed") path.completion_percentage = completed / len(path.nodes) if path.nodes else 0.0 # Update current node index for i, node in enumerate(path.nodes): if node.status not in ["completed", "skipped"]: path.current_node_index = i break path.updated_at = datetime.now().isoformat() self._save_paths() # Update user profile current_node = path.nodes[path.current_node_index] if path.current_node_index < len(path.nodes) else None if current_node: self.user_profiling.update_learning_time( user_id, current_node.topic, current_node.estimated_time ) return path def _update_path(self, path: LearningPath) -> LearningPath: """Update path based on user performance""" profile = self.user_profiling.get_or_create_profile(path.user_id) # Check if path needs adjustment for node in path.nodes: if node.status == "pending": # Check if already mastered topic_level = profile.knowledge_level.get(node.topic, 0.0) bloom_perf = profile.bloom_level_performance.get(node.topic, {}).get(node.bloom_level, 0.0) if bloom_perf >= 0.8: # Already mastered, can skip node.status = "skipped" node.completion_date = datetime.now().isoformat() # Recalculate progress completed = sum(1 for n in path.nodes if n.status in ["completed", "skipped"]) path.completion_percentage = completed / len(path.nodes) if path.nodes else 0.0 path.updated_at = datetime.now().isoformat() self._save_paths() return path def get_recommendations(self, user_id: str) -> Dict: """Get learning recommendations""" profile = self.user_profiling.get_or_create_profile(user_id) path = self.get_active_path(user_id) recommendations = { "next_node": None, "suggested_topics": [], "review_topics": [], "challenge_topics": [] } # Recommend next node if path and path.current_node_index < len(path.nodes): next_node = path.nodes[path.current_node_index] recommendations["next_node"] = { "node_id": next_node.node_id, "topic": next_node.topic, "bloom_level": next_node.bloom_level, "content_type": next_node.content_type, "estimated_time": next_node.estimated_time } # Recommend topics for review recommendations["review_topics"] = profile.weak_areas[:3] # Recommend challenge topics (advanced content for strong areas) for topic in profile.strong_areas[:2]: if topic not in recommendations["challenge_topics"]: recommendations["challenge_topics"].append(topic) # Recommend new topics all_topics = set(self.learning_path_generator.available_topics) learned_topics = set(profile.knowledge_level.keys()) new_topics = list(all_topics - learned_topics)[:3] recommendations["suggested_topics"] = new_topics return recommendations def get_path_visualization_data(self, user_id: str) -> Dict: """Get path visualization data""" path = self.get_active_path(user_id) if not path: return {"nodes": [], "edges": []} nodes_data = [] edges_data = [] for node in path.nodes: nodes_data.append({ "id": node.node_id, "topic": node.topic, "bloom_level": node.bloom_level, "status": node.status, "difficulty": node.difficulty, "score": node.score }) # Add edges (prerequisites) for prereq_id in node.prerequisites: edges_data.append({ "source": prereq_id, "target": node.node_id }) return { "nodes": nodes_data, "edges": edges_data, "completion_percentage": path.completion_percentage, "current_node_index": path.current_node_index }