what-if-simulation-app / src /utils /engagement_analysis.py
tranhuonglan
first commit
e448441
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple
import torch
from collections import defaultdict
def calculate_engagement_metrics(trajectories: np.ndarray,
world,
new_state_id: int,
window_size: int = 5) -> Tuple[Dict, Dict]:
"""
Calculate engagement metrics before and after encountering new state
Parameters:
-----------
trajectories : np.ndarray
Array of student trajectories
world : ClickstreamWorld
World object containing state information
new_state_id : int
ID of the newly added state
window_size : int
Number of states to consider before and after encountering new state
Returns:
--------
before_metrics, after_metrics : Dict, Dict
Dictionaries containing engagement metrics by chapter
Keys are chapters, values are dictionaries containing:
- 'time_spent': Average time spent in the chapter
- 'visit_count': Number of visits to states in the chapter
- 'problem_attempts': Number of problem attempts in the chapter
- 'video_views': Number of video views in the chapter
"""
before_metrics = defaultdict(lambda: {'time_spent': [], 'visit_count': defaultdict(int),
'problem_attempts': 0, 'video_views': 0})
after_metrics = defaultdict(lambda: {'time_spent': [], 'visit_count': defaultdict(int),
'problem_attempts': 0, 'video_views': 0})
for trajectory in trajectories:
# Find where the new state appears in trajectory
new_state_indices = np.where(trajectory[:, 0] == new_state_id)[0]
if len(new_state_indices) == 0:
continue
first_encounter = new_state_indices[0]
# Analyze window before first encounter
start_idx = max(0, first_encounter - window_size)
for idx in range(start_idx, first_encounter):
state = int(trajectory[idx, 0])
action = int(trajectory[idx, 1])
chapter = int(world.values['chapter'][state])
before_metrics[chapter]['visit_count'][state] += 1
if world.values['is_problem'][state]:
if 'Quiz' in world.dict_action[action] or 'Assignment' in world.dict_action[action]:
before_metrics[chapter]['problem_attempts'] += 1
else:
if 'Watch' in world.dict_action[action]:
before_metrics[chapter]['video_views'] += 1
before_metrics[chapter]['time_spent'].append(world.values['duration'][state])
# Analyze window after first encounter
end_idx = min(len(trajectory), first_encounter + window_size + 1)
for idx in range(first_encounter + 1, end_idx):
state = int(trajectory[idx, 0])
action = int(trajectory[idx, 1])
chapter = int(world.values['chapter'][state])
after_metrics[chapter]['visit_count'][state] += 1
if world.values['is_problem'][state]:
if 'Quiz' in world.dict_action[action] or 'Assignment' in world.dict_action[action]:
after_metrics[chapter]['problem_attempts'] += 1
else:
if 'Watch' in world.dict_action[action]:
after_metrics[chapter]['video_views'] += 1
after_metrics[chapter]['time_spent'].append(world.values['duration'][state])
# Calculate averages and convert visit counts
for metrics in [before_metrics, after_metrics]:
for chapter in metrics:
metrics[chapter]['avg_time_spent'] = np.mean(metrics[chapter]['time_spent']) if metrics[chapter]['time_spent'] else 0
metrics[chapter]['unique_states_visited'] = len(metrics[chapter]['visit_count'])
metrics[chapter]['total_visits'] = sum(metrics[chapter]['visit_count'].values())
del metrics[chapter]['time_spent'] # Remove raw time data
del metrics[chapter]['visit_count'] # Remove raw visit data
return dict(before_metrics), dict(after_metrics)
def plot_engagement_comparison(before_metrics: Dict,
after_metrics: Dict,
new_state_chapter: int,
save_path: str = None):
"""
Plot engagement metrics comparison before and after new state addition
Parameters:
-----------
before_metrics : Dict
Engagement metrics before new state encounter
after_metrics : Dict
Engagement metrics after new state encounter
new_state_chapter : int
Chapter where the new state was added
save_path : str, optional
Path to save the plots
"""
metrics = ['avg_time_spent', 'unique_states_visited', 'total_visits',
'problem_attempts', 'video_views']
fig, axes = plt.subplots(len(metrics), 1, figsize=(12, 4*len(metrics)))
fig.suptitle(f'Engagement Metrics Before and After New State Addition\n(New State in Chapter {new_state_chapter})',
fontsize=16)
for idx, metric in enumerate(metrics):
chapters = sorted(set(before_metrics.keys()) | set(after_metrics.keys()))
before_values = [before_metrics.get(ch, {}).get(metric, 0) for ch in chapters]
after_values = [after_metrics.get(ch, {}).get(metric, 0) for ch in chapters]
x = np.arange(len(chapters))
width = 0.35
axes[idx].bar(x - width/2, before_values, width, label='Before', color='skyblue')
axes[idx].bar(x + width/2, after_values, width, label='After', color='lightcoral')
# Highlight the chapter where new state was added
axes[idx].axvspan(new_state_chapter - 0.5, new_state_chapter + 0.5,
color='yellow', alpha=0.2)
axes[idx].set_xlabel('Chapter')
axes[idx].set_ylabel(metric.replace('_', ' ').title())
axes[idx].set_xticks(x)
axes[idx].set_xticklabels(chapters)
axes[idx].legend()
# Add percentage change annotations
for i in range(len(chapters)):
before_val = before_values[i]
after_val = after_values[i]
if before_val > 0:
pct_change = ((after_val - before_val) / before_val) * 100
axes[idx].text(i, max(before_val, after_val),
f'{pct_change:+.1f}%',
ha='center', va='bottom')
plt.tight_layout()
if save_path:
plt.savefig(save_path)
plt.show()
def analyze_engagement_changes(world,
trajectories: np.ndarray,
new_state_id: int,
window_size: int = 5,
save_path: str = None) -> Tuple[Dict, Dict]:
"""
Analyze and visualize engagement changes after adding a new state
Parameters:
-----------
world : ClickstreamWorld
World object containing state information
trajectories : np.ndarray
Array of student trajectories
new_state_id : int
ID of the newly added state
window_size : int
Number of states to consider before and after encountering new state
save_path : str, optional
Path to save the visualization
Returns:
--------
before_metrics, after_metrics : Dict, Dict
Engagement metrics before and after new state encounter
"""
new_state_chapter = int(world.values['chapter'][new_state_id])
before_metrics, after_metrics = calculate_engagement_metrics(
trajectories, world, new_state_id, window_size
)
plot_engagement_comparison(
before_metrics,
after_metrics,
new_state_chapter,
save_path
)
return before_metrics, after_metrics