CSRC-Car-Manual-RAG / modules /personalized_learning.py
Bryceeee's picture
Upload 34 files
6a11527 verified
"""
Personalized Learning Pathway System
Implements adaptive learning capabilities that customize instruction based on comprehensive user profiling
"""
import json
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass, asdict
from collections import defaultdict
import pickle
@dataclass
class UserProfile:
"""User profile data structure"""
user_id: str
knowledge_level: Dict[str, float] # Knowledge level for each topic (0-1)
learning_style: str # "visual", "textual", "practical", "mixed"
learning_pace: str # "slow", "medium", "fast"
preferred_topics: List[str]
weak_areas: List[str]
strong_areas: List[str]
test_scores: Dict[str, List[float]] # Historical test scores
question_history: List[Dict] # Question and answer history
learning_time: Dict[str, float] # Learning time for each topic (minutes)
last_activity: str
total_questions_asked: int
total_tests_completed: int
bloom_level_performance: Dict[str, Dict[str, float]] # Bloom level performance for each topic
created_at: str
updated_at: str
# Cold start related fields
has_completed_onboarding: bool = False # Whether onboarding is completed
background_experience: str = "" # Background experience (e.g., "experienced", "beginner", "intermediate")
learning_goals: List[str] = None # Learning goals, None requires special handling
initial_assessment_completed: bool = False # Whether initial assessment is completed
initial_knowledge_survey: Dict[str, float] = None # Initial knowledge survey results, None requires special handling
@dataclass
class LearningPathNode:
"""Learning path node"""
node_id: str
topic: str
bloom_level: str # "remember", "understand", "apply", "analyze", "evaluate", "create"
difficulty: float # 0-1
prerequisites: List[str] # Prerequisite node IDs
estimated_time: int # Estimated time (minutes)
content_type: str # "reading", "quiz", "practical", "review"
status: str # "pending", "in_progress", "completed", "skipped"
completion_date: Optional[str] = None
score: Optional[float] = None
@dataclass
class LearningPath:
"""Learning path"""
path_id: str
user_id: str
nodes: List[LearningPathNode]
current_node_index: int
completion_percentage: float
created_at: str
updated_at: str
estimated_total_time: int
class UserProfilingSystem:
"""User profiling system"""
def __init__(self, storage_dir: str = "user_data"):
self.storage_dir = storage_dir
os.makedirs(storage_dir, exist_ok=True)
self.profiles_file = os.path.join(storage_dir, "user_profiles.json")
self.profiles = self._load_profiles()
def _load_profiles(self) -> Dict[str, UserProfile]:
"""Load user profiles"""
if os.path.exists(self.profiles_file):
try:
with open(self.profiles_file, 'r', encoding='utf-8') as f:
data = json.load(f)
return {uid: UserProfile(**profile) for uid, profile in data.items()}
except Exception as e:
print(f"Error loading profiles: {e}")
return {}
def _save_profiles(self):
"""Save user profiles"""
try:
with open(self.profiles_file, 'w', encoding='utf-8') as f:
data = {uid: asdict(profile) for uid, profile in self.profiles.items()}
json.dump(data, f, indent=2, ensure_ascii=False)
except Exception as e:
print(f"Error saving profiles: {e}")
def get_or_create_profile(self, user_id: str) -> UserProfile:
"""Get or create user profile (cold start)"""
if user_id not in self.profiles:
self.profiles[user_id] = UserProfile(
user_id=user_id,
knowledge_level={},
learning_style="mixed",
learning_pace="medium",
preferred_topics=[],
weak_areas=[],
strong_areas=[],
test_scores={},
question_history=[],
learning_time={},
last_activity=datetime.now().isoformat(),
total_questions_asked=0,
total_tests_completed=0,
bloom_level_performance={},
created_at=datetime.now().isoformat(),
updated_at=datetime.now().isoformat(),
has_completed_onboarding=False,
background_experience="",
learning_goals=None,
initial_assessment_completed=False,
initial_knowledge_survey=None
)
self._save_profiles()
return self.profiles[user_id]
def is_cold_start(self, user_id: str) -> bool:
"""Check if user is in cold start state"""
if user_id not in self.profiles:
return True
profile = self.profiles[user_id]
return not profile.has_completed_onboarding
def complete_onboarding(self, user_id: str, onboarding_data: Dict):
"""Complete cold start setup and collect initial user information
Information collected during cold start:
1. Learning preferences:
- learning_style: Learning style preference
- learning_pace: Learning pace preference
2. Background information:
- background_experience: Background experience
- learning_goals: List of learning goals
3. Initial knowledge assessment:
- initial_knowledge_survey: Initial familiarity with each topic (0-1)
- initial_assessment_completed: Whether initial assessment is completed
"""
profile = self.get_or_create_profile(user_id)
# Update learning style
if 'learning_style' in onboarding_data:
profile.learning_style = onboarding_data['learning_style']
# Update learning pace
if 'learning_pace' in onboarding_data:
profile.learning_pace = onboarding_data['learning_pace']
# Update background experience
if 'background_experience' in onboarding_data:
profile.background_experience = onboarding_data['background_experience']
# Update learning goals
if 'learning_goals' in onboarding_data:
profile.learning_goals = onboarding_data['learning_goals']
# Update initial knowledge survey
if 'initial_knowledge_survey' in onboarding_data:
profile.initial_knowledge_survey = onboarding_data['initial_knowledge_survey']
# Convert initial survey results to knowledge level
profile.knowledge_level = onboarding_data['initial_knowledge_survey'].copy()
# Update preferred topics (based on initial survey, select topics with lower familiarity)
if 'initial_knowledge_survey' in onboarding_data:
survey = onboarding_data['initial_knowledge_survey']
# Select topics with lower familiarity as learning focus
low_knowledge_topics = [topic for topic, level in survey.items() if level < 0.5]
profile.preferred_topics = low_knowledge_topics[:3] # Take top 3
# Update initial assessment status
if 'initial_assessment_completed' in onboarding_data:
profile.initial_assessment_completed = onboarding_data['initial_assessment_completed']
# Mark cold start as completed
profile.has_completed_onboarding = True
profile.updated_at = datetime.now().isoformat()
self._save_profiles()
return profile
def update_from_test_results(self, user_id: str, topic: str, test_results: List[Dict]):
"""Update user profile from test results"""
profile = self.get_or_create_profile(user_id)
# Calculate average score
scores = [r.get('score', 1.0 if r.get('is_correct', False) else 0.0)
for r in test_results]
avg_score = np.mean(scores) if scores else 0.5
# Update knowledge level
if topic not in profile.knowledge_level:
profile.knowledge_level[topic] = avg_score
else:
# Weighted average (give more weight to latest results)
profile.knowledge_level[topic] = 0.7 * avg_score + 0.3 * profile.knowledge_level[topic]
# Update test score history
if topic not in profile.test_scores:
profile.test_scores[topic] = []
profile.test_scores[topic].append(avg_score)
# Update Bloom level performance
if topic not in profile.bloom_level_performance:
profile.bloom_level_performance[topic] = {}
for result in test_results:
level = result.get('level', 'unknown')
is_correct = result.get('is_correct', False)
score = 1.0 if is_correct else 0.0
if level not in profile.bloom_level_performance[topic]:
profile.bloom_level_performance[topic][level] = []
profile.bloom_level_performance[topic][level].append(score)
# Calculate average performance for each Bloom level
for level in profile.bloom_level_performance[topic]:
scores = profile.bloom_level_performance[topic][level]
profile.bloom_level_performance[topic][level] = np.mean(scores) if scores else 0.0
# Update weak and strong areas
self._update_weak_strong_areas(profile)
# Update learning pace
profile.learning_pace = self._calculate_learning_pace(profile)
profile.total_tests_completed += 1
profile.last_activity = datetime.now().isoformat()
profile.updated_at = datetime.now().isoformat()
self._save_profiles()
return profile
def update_from_question(self, user_id: str, question: str, topic: Optional[str] = None):
"""Update user profile from question history"""
profile = self.get_or_create_profile(user_id)
profile.question_history.append({
"question": question,
"topic": topic,
"timestamp": datetime.now().isoformat()
})
# Analyze question type to infer learning style
profile.learning_style = self._infer_learning_style(profile.question_history)
# Update preferred topics
if topic:
if topic not in profile.preferred_topics:
profile.preferred_topics.append(topic)
# Sort by frequency
topic_counts = defaultdict(int)
for q in profile.question_history:
if q.get('topic'):
topic_counts[q['topic']] += 1
profile.preferred_topics = sorted(topic_counts.items(), key=lambda x: x[1], reverse=True)[:5]
profile.preferred_topics = [t[0] for t in profile.preferred_topics]
profile.total_questions_asked += 1
profile.last_activity = datetime.now().isoformat()
profile.updated_at = datetime.now().isoformat()
self._save_profiles()
return profile
def update_learning_time(self, user_id: str, topic: str, minutes: float):
"""Update learning time"""
profile = self.get_or_create_profile(user_id)
if topic not in profile.learning_time:
profile.learning_time[topic] = 0.0
profile.learning_time[topic] += minutes
profile.last_activity = datetime.now().isoformat()
profile.updated_at = datetime.now().isoformat()
self._save_profiles()
return profile
def _update_weak_strong_areas(self, profile: UserProfile):
"""Update weak and strong areas"""
# Topics with knowledge level below 0.6 are weak areas
weak = [topic for topic, level in profile.knowledge_level.items() if level < 0.6]
# Topics with knowledge level above 0.8 are strong areas
strong = [topic for topic, level in profile.knowledge_level.items() if level >= 0.8]
profile.weak_areas = weak
profile.strong_areas = strong
def _calculate_learning_pace(self, profile: UserProfile) -> str:
"""Calculate learning pace"""
if not profile.test_scores:
return "medium"
# Calculate test completion speed
total_tests = profile.total_tests_completed
if total_tests == 0:
return "medium"
# Analyze performance changes in recent tests
recent_scores = []
for topic_scores in profile.test_scores.values():
if topic_scores:
recent_scores.extend(topic_scores[-3:]) # Last 3 tests
if not recent_scores:
return "medium"
# If recent scores improve quickly, likely a fast-paced learner
if len(recent_scores) >= 3:
improvement = recent_scores[-1] - recent_scores[0]
if improvement > 0.3:
return "fast"
elif improvement < -0.1:
return "slow"
return "medium"
def _infer_learning_style(self, question_history: List[Dict]) -> str:
"""Infer learning style from question history"""
if not question_history:
return "mixed"
# Analyze question keywords
visual_keywords = ["how", "what does", "show", "visual", "diagram", "illustration"]
practical_keywords = ["how to", "step", "procedure", "activate", "use", "operate"]
textual_keywords = ["what is", "explain", "define", "describe", "meaning"]
visual_count = sum(1 for q in question_history
if any(kw in q.get('question', '').lower() for kw in visual_keywords))
practical_count = sum(1 for q in question_history
if any(kw in q.get('question', '').lower() for kw in practical_keywords))
textual_count = sum(1 for q in question_history
if any(kw in q.get('question', '').lower() for kw in textual_keywords))
total = len(question_history)
if total == 0:
return "mixed"
visual_ratio = visual_count / total
practical_ratio = practical_count / total
textual_ratio = textual_count / total
max_ratio = max(visual_ratio, practical_ratio, textual_ratio)
if max_ratio > 0.4:
if visual_ratio == max_ratio:
return "visual"
elif practical_ratio == max_ratio:
return "practical"
else:
return "textual"
return "mixed"
def get_profile_summary(self, user_id: str) -> Dict:
"""Get user profile summary"""
profile = self.get_or_create_profile(user_id)
return {
"user_id": profile.user_id,
"knowledge_level": profile.knowledge_level,
"learning_style": profile.learning_style,
"learning_pace": profile.learning_pace,
"preferred_topics": profile.preferred_topics,
"weak_areas": profile.weak_areas,
"strong_areas": profile.strong_areas,
"total_questions": profile.total_questions_asked,
"total_tests": profile.total_tests_completed,
"overall_progress": self._calculate_overall_progress(profile)
}
def _calculate_overall_progress(self, profile: UserProfile) -> float:
"""Calculate overall progress"""
if not profile.knowledge_level:
return 0.0
return np.mean(list(profile.knowledge_level.values()))
class LearningPathGenerator:
"""Learning path generator"""
def __init__(self, user_profiling: UserProfilingSystem, available_topics: List[str]):
self.user_profiling = user_profiling
self.available_topics = available_topics
self.bloom_levels = ["remember", "understand", "apply", "analyze", "evaluate", "create"]
def generate_path(self, user_id: str, focus_areas: Optional[List[str]] = None) -> LearningPath:
"""Generate personalized learning path"""
profile = self.user_profiling.get_or_create_profile(user_id)
# Determine topics to learn
topics_to_learn = self._determine_topics(profile, focus_areas)
# Generate learning nodes
nodes = []
node_id_counter = 0
for topic in topics_to_learn:
topic_level = profile.knowledge_level.get(topic, 0.0)
bloom_performance = profile.bloom_level_performance.get(topic, {})
# Generate nodes for different Bloom levels for each topic
for bloom_level in self.bloom_levels:
# Check if this node is needed
if not self._needs_node(profile, topic, bloom_level, topic_level, bloom_performance):
continue
node = LearningPathNode(
node_id=f"node_{node_id_counter}",
topic=topic,
bloom_level=bloom_level,
difficulty=self._calculate_difficulty(topic_level, bloom_level),
prerequisites=self._get_prerequisites(nodes, topic, bloom_level),
estimated_time=self._estimate_time(bloom_level, profile.learning_pace),
content_type=self._determine_content_type(bloom_level, profile.learning_style),
status="pending"
)
nodes.append(node)
node_id_counter += 1
# Sort nodes (considering prerequisites)
ordered_nodes = self._topological_sort(nodes)
# Calculate total time
total_time = sum(node.estimated_time for node in ordered_nodes)
path = LearningPath(
path_id=f"path_{user_id}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
user_id=user_id,
nodes=ordered_nodes,
current_node_index=0,
completion_percentage=0.0,
created_at=datetime.now().isoformat(),
updated_at=datetime.now().isoformat(),
estimated_total_time=total_time
)
return path
def _determine_topics(self, profile: UserProfile, focus_areas: Optional[List[str]]) -> List[str]:
"""Determine topics to learn"""
if focus_areas:
return focus_areas
# Prioritize weak areas
topics = profile.weak_areas.copy()
# Add unlearned topics
for topic in self.available_topics:
if topic not in profile.knowledge_level and topic not in topics:
topics.append(topic)
# If no weak areas, recommend preferred or strong area related topics
if not topics:
topics = profile.preferred_topics[:3] if profile.preferred_topics else self.available_topics[:3]
return topics[:5] # Limit to maximum 5 topics
def _needs_node(self, profile: UserProfile, topic: str, bloom_level: str,
topic_level: float, bloom_performance: Dict) -> bool:
"""Determine if a node is needed"""
# If performance at this Bloom level is already good, skip
level_performance = bloom_performance.get(bloom_level, 0.0)
if level_performance >= 0.8:
return False
# Decide based on knowledge level
if topic_level < 0.3 and bloom_level in ["analyze", "evaluate", "create"]:
return False # Insufficient foundational knowledge, learn basics first
return True
def _calculate_difficulty(self, topic_level: float, bloom_level: str) -> float:
"""Calculate node difficulty"""
bloom_weights = {
"remember": 0.2,
"understand": 0.3,
"apply": 0.5,
"analyze": 0.7,
"evaluate": 0.85,
"create": 1.0
}
base_difficulty = bloom_weights.get(bloom_level, 0.5)
# Adjust based on current knowledge level
adjusted = base_difficulty * (1 - topic_level * 0.3)
return min(1.0, max(0.1, adjusted))
def _get_prerequisites(self, existing_nodes: List[LearningPathNode],
topic: str, bloom_level: str) -> List[str]:
"""Get prerequisites"""
prereqs = []
# Lower Bloom levels of the same topic are prerequisites
bloom_order = ["remember", "understand", "apply", "analyze", "evaluate", "create"]
current_index = bloom_order.index(bloom_level) if bloom_level in bloom_order else 0
for node in existing_nodes:
if node.topic == topic:
node_index = bloom_order.index(node.bloom_level) if node.bloom_level in bloom_order else 0
if node_index < current_index:
prereqs.append(node.node_id)
return prereqs
def _estimate_time(self, bloom_level: str, learning_pace: str) -> int:
"""Estimate learning time (minutes)"""
base_times = {
"remember": 10,
"understand": 15,
"apply": 20,
"analyze": 25,
"evaluate": 30,
"create": 35
}
base_time = base_times.get(bloom_level, 20)
pace_multipliers = {
"slow": 1.5,
"medium": 1.0,
"fast": 0.7
}
return int(base_time * pace_multipliers.get(learning_pace, 1.0))
def _determine_content_type(self, bloom_level: str, learning_style: str) -> str:
"""Determine content type"""
# Decide based on learning style and Bloom level
if learning_style == "visual":
if bloom_level in ["remember", "understand"]:
return "reading"
else:
return "practical"
elif learning_style == "practical":
return "practical"
elif learning_style == "textual":
return "reading"
else:
# mixed
if bloom_level in ["apply", "analyze", "evaluate", "create"]:
return "quiz"
return "reading"
def _topological_sort(self, nodes: List[LearningPathNode]) -> List[LearningPathNode]:
"""Topological sort to ensure prerequisites are completed first"""
# Create node mapping
node_map = {node.node_id: node for node in nodes}
# Calculate in-degree
in_degree = {node.node_id: len(node.prerequisites) for node in nodes}
# Find nodes without prerequisites
queue = [node.node_id for node in nodes if in_degree[node.node_id] == 0]
result = []
while queue:
current_id = queue.pop(0)
current_node = node_map[current_id]
result.append(current_node)
# Update in-degree of other nodes
for node in nodes:
if current_id in node.prerequisites:
in_degree[node.node_id] -= 1
if in_degree[node.node_id] == 0:
queue.append(node.node_id)
# Add remaining nodes (may have circular dependencies)
remaining = [node for node in nodes if node not in result]
result.extend(remaining)
return result
class AdaptiveLearningEngine:
"""Adaptive learning engine"""
def __init__(self, user_profiling: UserProfilingSystem, learning_path_generator: LearningPathGenerator):
self.user_profiling = user_profiling
self.learning_path_generator = learning_path_generator
self.paths_file = os.path.join("user_data", "learning_paths.json")
self.paths = self._load_paths()
def _load_paths(self) -> Dict[str, LearningPath]:
"""Load learning paths"""
if os.path.exists(self.paths_file):
try:
with open(self.paths_file, 'r', encoding='utf-8') as f:
data = json.load(f)
paths = {}
for pid, path_data in data.items():
nodes = [LearningPathNode(**node) for node in path_data['nodes']]
path = LearningPath(
path_id=path_data['path_id'],
user_id=path_data['user_id'],
nodes=nodes,
current_node_index=path_data['current_node_index'],
completion_percentage=path_data['completion_percentage'],
created_at=path_data['created_at'],
updated_at=path_data['updated_at'],
estimated_total_time=path_data['estimated_total_time']
)
paths[pid] = path
return paths
except Exception as e:
print(f"Error loading paths: {e}")
return {}
def _save_paths(self):
"""Save learning paths"""
try:
os.makedirs("user_data", exist_ok=True)
with open(self.paths_file, 'w', encoding='utf-8') as f:
data = {}
for pid, path in self.paths.items():
path_dict = asdict(path)
data[pid] = path_dict
json.dump(data, f, indent=2, ensure_ascii=False)
except Exception as e:
print(f"Error saving paths: {e}")
def create_or_update_path(self, user_id: str, focus_areas: Optional[List[str]] = None) -> LearningPath:
"""Create or update learning path"""
# Check if there is an active path
active_path = self.get_active_path(user_id)
if active_path and active_path.completion_percentage < 1.0:
# Update existing path
return self._update_path(active_path)
else:
# Create new path
path = self.learning_path_generator.generate_path(user_id, focus_areas)
self.paths[path.path_id] = path
self._save_paths()
return path
def get_active_path(self, user_id: str) -> Optional[LearningPath]:
"""Get user's current active path"""
user_paths = [p for p in self.paths.values() if p.user_id == user_id]
if not user_paths:
return None
# Return the latest incomplete path
active = [p for p in user_paths if p.completion_percentage < 1.0]
if active:
return max(active, key=lambda p: p.created_at)
# If no incomplete paths, return the latest one
return max(user_paths, key=lambda p: p.created_at)
def complete_node(self, user_id: str, node_id: str, score: float):
"""Complete a node"""
path = self.get_active_path(user_id)
if not path:
return None
# Find node and mark as completed
for node in path.nodes:
if node.node_id == node_id:
node.status = "completed"
node.score = score
node.completion_date = datetime.now().isoformat()
break
# Update path progress
completed = sum(1 for n in path.nodes if n.status == "completed")
path.completion_percentage = completed / len(path.nodes) if path.nodes else 0.0
# Update current node index
for i, node in enumerate(path.nodes):
if node.status not in ["completed", "skipped"]:
path.current_node_index = i
break
path.updated_at = datetime.now().isoformat()
self._save_paths()
# Update user profile
current_node = path.nodes[path.current_node_index] if path.current_node_index < len(path.nodes) else None
if current_node:
self.user_profiling.update_learning_time(
user_id,
current_node.topic,
current_node.estimated_time
)
return path
def _update_path(self, path: LearningPath) -> LearningPath:
"""Update path based on user performance"""
profile = self.user_profiling.get_or_create_profile(path.user_id)
# Check if path needs adjustment
for node in path.nodes:
if node.status == "pending":
# Check if already mastered
topic_level = profile.knowledge_level.get(node.topic, 0.0)
bloom_perf = profile.bloom_level_performance.get(node.topic, {}).get(node.bloom_level, 0.0)
if bloom_perf >= 0.8:
# Already mastered, can skip
node.status = "skipped"
node.completion_date = datetime.now().isoformat()
# Recalculate progress
completed = sum(1 for n in path.nodes if n.status in ["completed", "skipped"])
path.completion_percentage = completed / len(path.nodes) if path.nodes else 0.0
path.updated_at = datetime.now().isoformat()
self._save_paths()
return path
def get_recommendations(self, user_id: str) -> Dict:
"""Get learning recommendations"""
profile = self.user_profiling.get_or_create_profile(user_id)
path = self.get_active_path(user_id)
recommendations = {
"next_node": None,
"suggested_topics": [],
"review_topics": [],
"challenge_topics": []
}
# Recommend next node
if path and path.current_node_index < len(path.nodes):
next_node = path.nodes[path.current_node_index]
recommendations["next_node"] = {
"node_id": next_node.node_id,
"topic": next_node.topic,
"bloom_level": next_node.bloom_level,
"content_type": next_node.content_type,
"estimated_time": next_node.estimated_time
}
# Recommend topics for review
recommendations["review_topics"] = profile.weak_areas[:3]
# Recommend challenge topics (advanced content for strong areas)
for topic in profile.strong_areas[:2]:
if topic not in recommendations["challenge_topics"]:
recommendations["challenge_topics"].append(topic)
# Recommend new topics
all_topics = set(self.learning_path_generator.available_topics)
learned_topics = set(profile.knowledge_level.keys())
new_topics = list(all_topics - learned_topics)[:3]
recommendations["suggested_topics"] = new_topics
return recommendations
def get_path_visualization_data(self, user_id: str) -> Dict:
"""Get path visualization data"""
path = self.get_active_path(user_id)
if not path:
return {"nodes": [], "edges": []}
nodes_data = []
edges_data = []
for node in path.nodes:
nodes_data.append({
"id": node.node_id,
"topic": node.topic,
"bloom_level": node.bloom_level,
"status": node.status,
"difficulty": node.difficulty,
"score": node.score
})
# Add edges (prerequisites)
for prereq_id in node.prerequisites:
edges_data.append({
"source": prereq_id,
"target": node.node_id
})
return {
"nodes": nodes_data,
"edges": edges_data,
"completion_percentage": path.completion_percentage,
"current_node_index": path.current_node_index
}