| | import pandas as pd |
| | import os |
| | import re |
| | from huggingface_hub import InferenceClient |
| | |
| |
|
| | class DataProcessor: |
| | INTERVENTION_COLUMN = 'Did the intervention happen today?' |
| | ENGAGED_STR = 'Engaged (Respect, Responsibility, Effort)' |
| | PARTIALLY_ENGAGED_STR = 'Partially Engaged (about 50%)' |
| | NOT_ENGAGED_STR = 'Not Engaged (less than 50%)' |
| |
|
| | def __init__(self, student_metrics_df=None): |
| | self.hf_api_key = os.getenv('HF_API_KEY') |
| | if not self.hf_api_key: |
| | raise ValueError("HF_API_KEY not set in environment variables") |
| | self.client = InferenceClient(api_key=self.hf_api_key) |
| | self.student_metrics_df = student_metrics_df |
| |
|
| | def read_excel(self, uploaded_file): |
| | return pd.read_excel(uploaded_file) |
| |
|
| | def format_session_data(self, df): |
| | df['Date of Session'] = self.safe_convert_to_datetime(df['Date of Session'], '%m/%d/%Y') |
| | df['Timestamp'] = self.safe_convert_to_datetime(df['Timestamp'], '%I:%M %p') |
| | df['Session Start Time'] = self.safe_convert_to_time(df['Session Start Time'], '%I:%M %p') |
| | df['Session End Time'] = self.safe_convert_to_time(df['Session End Time'], '%I:%M %p') |
| | return df |
| |
|
| | def safe_convert_to_time(self, series, format_str='%I:%M %p'): |
| | try: |
| | converted = pd.to_datetime(series, format='%H:%M:%S', errors='coerce') |
| | if format_str: |
| | return converted.dt.strftime(format_str) |
| | return converted |
| | except Exception as e: |
| | print(f"Error converting series to time: {e}") |
| | return series |
| |
|
| | def safe_convert_to_datetime(self, series, format_str=None): |
| | try: |
| | converted = pd.to_datetime(series, errors='coerce') |
| | if format_str: |
| | return converted.dt.strftime(format_str) |
| | return converted |
| | except Exception as e: |
| | print(f"Error converting series to datetime: {e}") |
| | return series |
| |
|
| | def replace_student_names_with_initials(self, df): |
| | updated_columns = [] |
| | for col in df.columns: |
| | if col.startswith('Student Attendance'): |
| | match = re.match(r'Student Attendance \[(.+?)\]', col) |
| | if match: |
| | name = match.group(1) |
| | initials = ''.join([part[0] for part in name.split()]) |
| | updated_columns.append(f'Student Attendance [{initials}]') |
| | else: |
| | updated_columns.append(col) |
| | else: |
| | updated_columns.append(col) |
| | df.columns = updated_columns |
| | return df |
| |
|
| | def compute_intervention_statistics(self, df): |
| | total_days = len(df) |
| | sessions_held = df[self.INTERVENTION_COLUMN].str.strip().str.lower().eq('yes').sum() |
| | intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0 |
| | return pd.DataFrame({ |
| | 'Intervention Frequency (%)': [round(intervention_frequency, 0)], |
| | 'Intervention Sessions Held': [sessions_held], |
| | 'Intervention Sessions Not Held': [total_days - sessions_held], |
| | 'Total Number of Days Available': [total_days] |
| | }) |
| |
|
| | def compute_student_metrics(self, df): |
| | intervention_df = df[df[self.INTERVENTION_COLUMN].str.strip().str.lower() == 'yes'] |
| | intervention_sessions_held = len(intervention_df) |
| | student_columns = [col for col in df.columns if col.startswith('Student Attendance')] |
| | |
| | student_metrics = {} |
| | for col in student_columns: |
| | student_name = col.replace('Student Attendance [', '').replace(']', '').strip() |
| | student_data = intervention_df[[col]].copy() |
| | student_data[col] = student_data[col].fillna('Absent') |
| | |
| | attendance_values = student_data[col].apply(lambda x: 1 if x in [ |
| | self.ENGAGED_STR, |
| | self.PARTIALLY_ENGAGED_STR, |
| | self.NOT_ENGAGED_STR |
| | ] else 0) |
| | |
| | sessions_attended = attendance_values.sum() |
| | attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0 |
| | attendance_pct = round(attendance_pct) |
| | |
| | engagement_counts = { |
| | 'Engaged': 0, |
| | 'Partially Engaged': 0, |
| | 'Not Engaged': 0, |
| | 'Absent': 0 |
| | } |
| | |
| | for x in student_data[col]: |
| | if x == self.ENGAGED_STR: |
| | engagement_counts['Engaged'] += 1 |
| | elif x == self.PARTIALLY_ENGAGED_STR: |
| | engagement_counts['Partially Engaged'] += 1 |
| | elif x == self.NOT_ENGAGED_STR: |
| | engagement_counts['Not Engaged'] += 1 |
| | else: |
| | engagement_counts['Absent'] += 1 |
| | |
| | |
| | total_sessions = sum(engagement_counts.values()) |
| | |
| | |
| | engagement_pct = (engagement_counts['Engaged'] / total_sessions * 100) if total_sessions > 0 else 0 |
| | engagement_pct = round(engagement_pct) |
| | |
| | engaged_pct = (engagement_counts['Engaged'] / total_sessions * 100) if total_sessions > 0 else 0 |
| | engaged_pct = round(engaged_pct) |
| | |
| | partially_engaged_pct = (engagement_counts['Partially Engaged'] / total_sessions * 100) if total_sessions > 0 else 0 |
| | partially_engaged_pct = round(partially_engaged_pct) |
| | |
| | not_engaged_pct = (engagement_counts['Not Engaged'] / total_sessions * 100) if total_sessions > 0 else 0 |
| | not_engaged_pct = round(not_engaged_pct) |
| | |
| | absent_pct = (engagement_counts['Absent'] / total_sessions * 100) if total_sessions > 0 else 0 |
| | absent_pct = round(absent_pct) |
| | |
| | |
| | attended_90 = "Yes" if attendance_pct >= 90 else "No" |
| | |
| | |
| | engaged_80 = "Yes" if engaged_pct >= 80 else "No" |
| | |
| | |
| | student_metrics[student_name] = { |
| | 'Attended ≥ 90%': attended_90, |
| | 'Engagement ≥ 80%': engaged_80, |
| | 'Attendance (%)': attendance_pct, |
| | |
| | 'Engagement (%)': engagement_pct, |
| | 'Engaged (%)': engaged_pct, |
| | 'Partially Engaged (%)': partially_engaged_pct, |
| | 'Not Engaged (%)': not_engaged_pct, |
| | 'Absent (%)': absent_pct |
| | } |
| | |
| | |
| | student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index() |
| | student_metrics_df.rename(columns={'index': 'Student'}, inplace=True) |
| | return student_metrics_df |
| | |
| | def compute_average_metrics(self, student_metrics_df): |
| | |
| | attendance_avg_stats = student_metrics_df['Attendance (%)'].mean() |
| | engagement_avg_stats = student_metrics_df['Engagement (%)'].mean() |
| | |
| | |
| | attendance_avg_stats = round(attendance_avg_stats) |
| | engagement_avg_stats = round(engagement_avg_stats) |
| | |
| | return attendance_avg_stats, engagement_avg_stats |
| | |
| | def evaluate_student(self, row, attendance_threshold=90, engagement_threshold=80): |
| | if row["Attended ≥ 90%"] == "No": |
| | return "Address Attendance" |
| | elif row["Engagement ≥ 80%"] == "No": |
| | return "Address Engagement" |
| | return "Consider barriers, fidelity, and progress monitoring" |