Update data_processor.py
Browse files- data_processor.py +4 -19
data_processor.py
CHANGED
|
@@ -8,6 +8,7 @@ class DataProcessor:
|
|
| 8 |
'Did the intervention happen today?',
|
| 9 |
'Did the intervention take place today?'
|
| 10 |
]
|
|
|
|
| 11 |
ENGAGED_STR = 'Engaged'
|
| 12 |
PARTIALLY_ENGAGED_STR = 'Partially Engaged'
|
| 13 |
NOT_ENGAGED_STR = 'Not Engaged'
|
|
@@ -55,22 +56,6 @@ class DataProcessor:
|
|
| 55 |
print(f"Error converting series to datetime: {e}")
|
| 56 |
return series
|
| 57 |
|
| 58 |
-
# def replace_student_names_with_initials(self, df):
|
| 59 |
-
# updated_columns = []
|
| 60 |
-
# for col in df.columns:
|
| 61 |
-
# if col.startswith('Student Attendance'):
|
| 62 |
-
# match = re.match(r'Student Attendance \[(.+?)\]', col)
|
| 63 |
-
# if match:
|
| 64 |
-
# name = match.group(1)
|
| 65 |
-
# initials = ''.join([part[0] for part in name.split()])
|
| 66 |
-
# updated_columns.append(f'Student Attendance [{initials}]')
|
| 67 |
-
# else:
|
| 68 |
-
# updated_columns.append(col)
|
| 69 |
-
# else:
|
| 70 |
-
# updated_columns.append(col)
|
| 71 |
-
# df.columns = updated_columns
|
| 72 |
-
# return df
|
| 73 |
-
|
| 74 |
def replace_student_names_with_initials(self, df):
|
| 75 |
updated_columns = []
|
| 76 |
for col in df.columns:
|
|
@@ -112,7 +97,7 @@ class DataProcessor:
|
|
| 112 |
def compute_intervention_statistics(self, df):
|
| 113 |
intervention_column = self.get_intervention_column(df)
|
| 114 |
total_days = len(df)
|
| 115 |
-
sessions_held = df[intervention_column].str.strip().str.lower().
|
| 116 |
intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
|
| 117 |
return pd.DataFrame({
|
| 118 |
'Intervention Dosage (%)': [round(intervention_frequency, 0)],
|
|
@@ -134,7 +119,7 @@ class DataProcessor:
|
|
| 134 |
|
| 135 |
def compute_student_metrics(self, df):
|
| 136 |
intervention_column = self.get_intervention_column(df)
|
| 137 |
-
intervention_df = df[df[intervention_column].str.strip().str.lower()
|
| 138 |
intervention_sessions_held = len(intervention_df)
|
| 139 |
student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
|
| 140 |
|
|
@@ -151,7 +136,7 @@ class DataProcessor:
|
|
| 151 |
] else 0)
|
| 152 |
|
| 153 |
sessions_attended = attendance_values.sum()
|
| 154 |
-
attendance_pct = (sessions_attended / intervention_sessions_held
|
| 155 |
attendance_pct = round(attendance_pct)
|
| 156 |
|
| 157 |
engagement_counts = {
|
|
|
|
| 8 |
'Did the intervention happen today?',
|
| 9 |
'Did the intervention take place today?'
|
| 10 |
]
|
| 11 |
+
YES_RESPONSES = ['yes', 'assessment day'] # Added this line
|
| 12 |
ENGAGED_STR = 'Engaged'
|
| 13 |
PARTIALLY_ENGAGED_STR = 'Partially Engaged'
|
| 14 |
NOT_ENGAGED_STR = 'Not Engaged'
|
|
|
|
| 56 |
print(f"Error converting series to datetime: {e}")
|
| 57 |
return series
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
def replace_student_names_with_initials(self, df):
|
| 60 |
updated_columns = []
|
| 61 |
for col in df.columns:
|
|
|
|
| 97 |
def compute_intervention_statistics(self, df):
|
| 98 |
intervention_column = self.get_intervention_column(df)
|
| 99 |
total_days = len(df)
|
| 100 |
+
sessions_held = df[intervention_column].str.strip().str.lower().isin(self.YES_RESPONSES).sum() # Modified line
|
| 101 |
intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
|
| 102 |
return pd.DataFrame({
|
| 103 |
'Intervention Dosage (%)': [round(intervention_frequency, 0)],
|
|
|
|
| 119 |
|
| 120 |
def compute_student_metrics(self, df):
|
| 121 |
intervention_column = self.get_intervention_column(df)
|
| 122 |
+
intervention_df = df[df[intervention_column].str.strip().str.lower().isin(self.YES_RESPONSES)] # Modified line
|
| 123 |
intervention_sessions_held = len(intervention_df)
|
| 124 |
student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
|
| 125 |
|
|
|
|
| 136 |
] else 0)
|
| 137 |
|
| 138 |
sessions_attended = attendance_values.sum()
|
| 139 |
+
attendance_pct = (sessions_attended / intervention_sessions_held * 100) if intervention_sessions_held > 0 else 0
|
| 140 |
attendance_pct = round(attendance_pct)
|
| 141 |
|
| 142 |
engagement_counts = {
|