Rakshitjan commited on
Commit
06cd7ac
·
verified ·
1 Parent(s): 2e14720

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +283 -95
main.py CHANGED
@@ -1,3 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Import necessary libraries
2
  from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel
@@ -40,97 +229,75 @@ def get_credentials():
40
  creds = get_credentials()
41
  client = gspread.authorize(creds)
42
 
43
- # Input the paths and coaching code
44
- journal_file_path = ''
45
- panic_button_file_path = ''
46
- test_file_path = ''
47
- coachingCode = '1919'
48
-
49
- if coachingCode == '1919':
50
- journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
51
- panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
52
- test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
53
-
54
- # Step 3: Open Google Sheets using the URLs
55
- journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
56
- panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1') # Fixed missing part
57
- test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
58
-
59
- # Step 4: Convert the sheets into Pandas DataFrames
60
- journal_df = pd.DataFrame(journal_file.get_all_values())
61
- panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
62
- test_df = pd.DataFrame(test_file.get_all_values())
63
-
64
- # Label the columns manually since there are no headers
65
- journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
66
- panic_button_df.columns = ['user_id', 'panic_button']
67
-
68
- # Initialize a list for the merged data
69
- merged_data = []
70
-
71
- # Step 5: Group panic buttons by user_id and combine into a single comma-separated string
72
- panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
73
-
74
- # Merge journal and panic button data
75
- merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
76
-
77
- # Step 6: Process the test data
78
- test_data = []
79
- for index, row in test_df.iterrows():
80
- user_id = row[0]
81
- i = 1
82
- while i < len(row) and pd.notna(row[i]): # Process chapter and score pairs
83
- chapter = row[i].lower().strip()
84
- score = row[i + 1]
85
- if pd.notna(score):
86
- test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
87
- i += 2
88
-
89
- # Convert the processed test data into a DataFrame
90
- test_df_processed = pd.DataFrame(test_data)
91
-
92
- # Step 7: Merge the journal+panic button data with the test data
93
- merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
94
-
95
- # Step 8: Drop rows where all data (except user_id and test_chapter) is missing
96
- merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
97
-
98
- # Group the merged DataFrame by user_id
99
- df = pd.DataFrame(merged_data_cleaned)
100
-
101
- # Function to process panic button counts and test scores
102
- def process_group(group):
103
- # Panic button counts
104
- panic_button_series = group['panic_button'].dropna()
105
- panic_button_dict = panic_button_series.value_counts().to_dict()
106
 
107
- # Test scores aggregation
108
- test_scores = group[['test_chapter', 'test_score']].dropna()
109
- test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
110
 
111
- # Create the test_scores_dict excluding NaN values
112
- test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
 
 
 
 
113
 
114
- return pd.Series({
115
- 'productivity_yes_no': group['productivity_yes_no'].iloc[0],
116
- 'productivity_rate': group['productivity_rate'].iloc[0],
117
- 'panic_button': panic_button_dict,
118
- 'test_scores': test_scores_dict
119
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- # Apply the group processing function
122
- merged_df = df.groupby('user_id').apply(process_group).reset_index()
123
 
124
- # Step 9: Calculate potential score
125
- # Panic button weightages
126
- academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
127
- non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
128
 
129
- # Max weighted panic score
130
- max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
131
 
132
- # Function to calculate potential score
133
  def calculate_potential_score(row):
 
 
 
 
134
  # Test score normalization (70% weightage)
135
  if row['test_scores']: # Check if test_scores is not empty
136
  avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
@@ -170,18 +337,39 @@ def calculate_potential_score(row):
170
  total_potential_score = test_score_normalized + panic_score + journal_score
171
  return total_potential_score
172
 
173
- # Apply potential score calculation to the dataframe
174
- merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
175
- merged_df['potential_score'] = merged_df['potential_score'].round(2)
 
 
 
 
 
 
176
 
177
- # Step 10: Sort by potential score
178
- sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- # Step 11: Define API endpoint to get the sorted potential scores
181
- @app.get("/sorted-potential-scores")
182
- async def get_sorted_potential_scores():
183
- try:
184
- result = sorted_df.to_dict(orient="records")
185
- return {"sorted_scores": result}
186
  except Exception as e:
187
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
1
+ # # Import necessary libraries
2
+ # from fastapi import FastAPI, HTTPException
3
+ # from pydantic import BaseModel
4
+ # import gspread
5
+ # from google.oauth2.service_account import Credentials
6
+ # import pandas as pd
7
+ # from collections import defaultdict
8
+ # import os
9
+
10
+ # # Initialize the FastAPI app
11
+ # app = FastAPI()
12
+
13
+ # # Step 1: Define a function to get Google Sheets API credentials
14
+ # def get_credentials():
15
+ # """Get Google Sheets API credentials from environment variables."""
16
+ # try:
17
+ # # Construct the service account info dictionary
18
+ # service_account_info = {
19
+ # "type": os.getenv("SERVICE_ACCOUNT_TYPE"),
20
+ # "project_id": os.getenv("PROJECT_ID"),
21
+ # "private_key_id": os.getenv("PRIVATE_KEY_ID"),
22
+ # "private_key": os.getenv("PRIVATE_KEY").replace('\\n', '\n'),
23
+ # "client_email": os.getenv("CLIENT_EMAIL"),
24
+ # "client_id": os.getenv("CLIENT_ID"),
25
+ # "auth_uri": os.getenv("AUTH_URI"),
26
+ # "token_uri": os.getenv("TOKEN_URI"),
27
+ # "auth_provider_x509_cert_url": os.getenv("AUTH_PROVIDER_X509_CERT_URL"),
28
+ # "client_x509_cert_url": os.getenv("CLIENT_X509_CERT_URL"),
29
+ # "universe_domain": os.getenv("UNIVERSE_DOMAIN")
30
+ # }
31
+ # scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
32
+ # creds = Credentials.from_service_account_info(service_account_info, scopes=scope)
33
+ # return creds
34
+
35
+ # except Exception as e:
36
+ # print(f"Error getting credentials: {e}")
37
+ # return None
38
+
39
+ # # Step 2: Authorize gspread using the credentials
40
+ # creds = get_credentials()
41
+ # client = gspread.authorize(creds)
42
+
43
+ # # Input the paths and coaching code
44
+ # journal_file_path = ''
45
+ # panic_button_file_path = ''
46
+ # test_file_path = ''
47
+ # coachingCode = '1919'
48
+
49
+ # if coachingCode == '1919':
50
+ # journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
51
+ # panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
52
+ # test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
53
+
54
+ # # Step 3: Open Google Sheets using the URLs
55
+ # journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
56
+ # panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1') # Fixed missing part
57
+ # test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
58
+
59
+ # # Step 4: Convert the sheets into Pandas DataFrames
60
+ # journal_df = pd.DataFrame(journal_file.get_all_values())
61
+ # panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
62
+ # test_df = pd.DataFrame(test_file.get_all_values())
63
+
64
+ # # Label the columns manually since there are no headers
65
+ # journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
66
+ # panic_button_df.columns = ['user_id', 'panic_button']
67
+
68
+ # # Initialize a list for the merged data
69
+ # merged_data = []
70
+
71
+ # # Step 5: Group panic buttons by user_id and combine into a single comma-separated string
72
+ # panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
73
+
74
+ # # Merge journal and panic button data
75
+ # merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
76
+
77
+ # # Step 6: Process the test data
78
+ # test_data = []
79
+ # for index, row in test_df.iterrows():
80
+ # user_id = row[0]
81
+ # i = 1
82
+ # while i < len(row) and pd.notna(row[i]): # Process chapter and score pairs
83
+ # chapter = row[i].lower().strip()
84
+ # score = row[i + 1]
85
+ # if pd.notna(score):
86
+ # test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
87
+ # i += 2
88
+
89
+ # # Convert the processed test data into a DataFrame
90
+ # test_df_processed = pd.DataFrame(test_data)
91
+
92
+ # # Step 7: Merge the journal+panic button data with the test data
93
+ # merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
94
+
95
+ # # Step 8: Drop rows where all data (except user_id and test_chapter) is missing
96
+ # merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
97
+
98
+ # # Group the merged DataFrame by user_id
99
+ # df = pd.DataFrame(merged_data_cleaned)
100
+
101
+ # # Function to process panic button counts and test scores
102
+ # def process_group(group):
103
+ # # Panic button counts
104
+ # panic_button_series = group['panic_button'].dropna()
105
+ # panic_button_dict = panic_button_series.value_counts().to_dict()
106
+
107
+ # # Test scores aggregation
108
+ # test_scores = group[['test_chapter', 'test_score']].dropna()
109
+ # test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
110
+
111
+ # # Create the test_scores_dict excluding NaN values
112
+ # test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
113
+
114
+ # return pd.Series({
115
+ # 'productivity_yes_no': group['productivity_yes_no'].iloc[0],
116
+ # 'productivity_rate': group['productivity_rate'].iloc[0],
117
+ # 'panic_button': panic_button_dict,
118
+ # 'test_scores': test_scores_dict
119
+ # })
120
+
121
+ # # Apply the group processing function
122
+ # merged_df = df.groupby('user_id').apply(process_group).reset_index()
123
+
124
+ # # Step 9: Calculate potential score
125
+ # # Panic button weightages
126
+ # academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
127
+ # non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
128
+
129
+ # # Max weighted panic score
130
+ # max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
131
+
132
+ # # Function to calculate potential score
133
+ # def calculate_potential_score(row):
134
+ # # Test score normalization (70% weightage)
135
+ # if row['test_scores']: # Check if test_scores is not empty
136
+ # avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
137
+ # test_score_normalized = (avg_test_score / 40) * 70 # Scale test score to 70
138
+ # else:
139
+ # test_score_normalized = 0 # Default value for users with no test scores
140
+
141
+ # # Panic score calculation (20% weightage)
142
+ # student_panic_score = 0
143
+ # if row['panic_button']: # Ensure panic_button is not NaN or empty
144
+ # for factor, count in row['panic_button'].items():
145
+ # if factor in academic_weights:
146
+ # student_panic_score += academic_weights[factor] * count
147
+ # elif factor in non_academic_weights:
148
+ # student_panic_score += non_academic_weights[factor] * count
149
+ # else:
150
+ # student_panic_score = 0 # Default if no panic button issues
151
+
152
+ # # Panic score normalized to 20
153
+ # panic_score = 20 * (1 - (student_panic_score / max_weighted_panic_score) if max_weighted_panic_score != 0 else 1)
154
+
155
+ # # Journal score calculation (10% weightage)
156
+ # if pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'Yes':
157
+ # if pd.notna(row['productivity_rate']):
158
+ # journal_score = (float(row['productivity_rate']) / 10) * 10 # Scale journal score to 10
159
+ # else:
160
+ # journal_score = 0 # Default if productivity_rate is missing
161
+ # elif pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'No':
162
+ # if pd.notna(row['productivity_rate']):
163
+ # journal_score = (float(row['productivity_rate']) / 10) * 5 # Scale journal score to 5 if "No"
164
+ # else:
165
+ # journal_score = 0 # Default if productivity_rate is missing
166
+ # else:
167
+ # journal_score = 0 # Default if productivity_yes_no is missing
168
+
169
+ # # Total score based on new weightages
170
+ # total_potential_score = test_score_normalized + panic_score + journal_score
171
+ # return total_potential_score
172
+
173
+ # # Apply potential score calculation to the dataframe
174
+ # merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
175
+ # merged_df['potential_score'] = merged_df['potential_score'].round(2)
176
+
177
+ # # Step 10: Sort by potential score
178
+ # sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
179
+
180
+ # # Step 11: Define API endpoint to get the sorted potential scores
181
+ # @app.get("/sorted-potential-scores")
182
+ # async def get_sorted_potential_scores():
183
+ # try:
184
+ # result = sorted_df.to_dict(orient="records")
185
+ # return {"sorted_scores": result}
186
+ # except Exception as e:
187
+ # raise HTTPException(status_code=500, detail=str(e))
188
+
189
+
190
  # Import necessary libraries
191
  from fastapi import FastAPI, HTTPException
192
  from pydantic import BaseModel
 
229
  creds = get_credentials()
230
  client = gspread.authorize(creds)
231
 
232
+ # Step 3: Define function to set paths based on coachingCode
233
+ def get_sheet_paths(coachingCode: str):
234
+ if coachingCode == '1919':
235
+ journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
236
+ panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
237
+ test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
238
+ else:
239
+ # Handle cases for other coaching codes, set default or raise an error
240
+ raise HTTPException(status_code=404, detail="Coaching code not found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
+ return journal_file_path, panic_button_file_path, test_file_path
 
 
243
 
244
+ # Step 4: Define function to fetch and process data from Google Sheets
245
+ def fetch_and_process_data(journal_file_path, panic_button_file_path, test_file_path):
246
+ # Open Google Sheets using the URLs
247
+ journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
248
+ panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1')
249
+ test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
250
 
251
+ # Convert the sheets into Pandas DataFrames
252
+ journal_df = pd.DataFrame(journal_file.get_all_values())
253
+ panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
254
+ test_df = pd.DataFrame(test_file.get_all_values())
255
+
256
+ # Label the columns manually since there are no headers
257
+ journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
258
+ panic_button_df.columns = ['user_id', 'panic_button']
259
+
260
+ # Initialize a list for the merged data
261
+ merged_data = []
262
+
263
+ # Group panic buttons by user_id and combine into a single comma-separated string
264
+ panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
265
+
266
+ # Merge journal and panic button data
267
+ merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
268
+
269
+ # Process the test data
270
+ test_data = []
271
+ for index, row in test_df.iterrows():
272
+ user_id = row[0]
273
+ i = 1
274
+ while i < len(row) and pd.notna(row[i]): # Process chapter and score pairs
275
+ chapter = row[i].lower().strip()
276
+ score = row[i + 1]
277
+ if pd.notna(score):
278
+ test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
279
+ i += 2
280
+
281
+ # Convert the processed test data into a DataFrame
282
+ test_df_processed = pd.DataFrame(test_data)
283
+
284
+ # Merge the journal+panic button data with the test data
285
+ merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
286
 
287
+ # Drop rows where all data (except user_id and test_chapter) is missing
288
+ merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
289
 
290
+ # Group the merged DataFrame by user_id
291
+ df = pd.DataFrame(merged_data_cleaned)
 
 
292
 
293
+ return df
 
294
 
295
+ # Step 5: Define function to calculate potential score
296
  def calculate_potential_score(row):
297
+ academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
298
+ non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
299
+ max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
300
+
301
  # Test score normalization (70% weightage)
302
  if row['test_scores']: # Check if test_scores is not empty
303
  avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
 
337
  total_potential_score = test_score_normalized + panic_score + journal_score
338
  return total_potential_score
339
 
340
+ # Step 6: API endpoint to get sorted potential scores based on coachingCode
341
+ @app.get("/sorted-potential-scores/{coachingCode}")
342
+ async def get_sorted_potential_scores(coachingCode: str):
343
+ try:
344
+ # Get the appropriate file paths for the given coachingCode
345
+ journal_file_path, panic_button_file_path, test_file_path = get_sheet_paths(coachingCode)
346
+
347
+ # Fetch and process data from Google Sheets
348
+ df = fetch_and_process_data(journal_file_path, panic_button_file_path, test_file_path)
349
 
350
+ # Group the DataFrame by user_id and process
351
+ merged_df = df.groupby('user_id').apply(process_group).reset_index()
352
+
353
+ # Apply potential score calculation
354
+ merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
355
+ merged_df['potential_score'] = merged_df['potential_score'].round(2)
356
+
357
+ # Sort by potential score
358
+ sorted_df = merged_df.sort_values('potential_score', ascending=False)
359
+
360
+ # Convert DataFrame to dictionary
361
+ sorted_data = sorted_df[['user_id', 'potential_score']].to_dict(orient='records')
362
+
363
+ return sorted_data
364
 
 
 
 
 
 
 
365
  except Exception as e:
366
+ raise HTTPException(status_code=500, detail=f"Error: {e}")
367
+
368
+ # Helper function to process group for merging test scores
369
+ def process_group(group):
370
+ return pd.Series({
371
+ 'productivity_yes_no': group['productivity_yes_no'].iloc[0],
372
+ 'productivity_rate': group['productivity_rate'].iloc[0],
373
+ 'panic_button': {row['panic_button']: 1 for index, row in group.iterrows() if pd.notna(row['panic_button'])},
374
+ 'test_scores': {row['test_chapter']: float(row['test_score']) for index, row in group.iterrows() if pd.notna(row['test_chapter']) and pd.notna(row['test_score'])}
375
+ })