Rakshitjan commited on
Commit
5141c51
·
verified ·
1 Parent(s): 29994d9

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +164 -129
main.py CHANGED
@@ -1,152 +1,187 @@
 
1
  from fastapi import FastAPI, HTTPException
 
2
  import gspread
3
  from google.oauth2.service_account import Credentials
4
- from google.auth.exceptions import GoogleAuthError
5
  import pandas as pd
6
  from collections import defaultdict
7
- from pydantic import BaseModel
8
- from fastapi.middleware.cors import CORSMiddleware
9
- import os
10
 
 
11
  app = FastAPI()
12
- app.add_middleware(
13
- CORSMiddleware,
14
- allow_origins=["*"], # You can specify domains instead of "*" to restrict access
15
- allow_credentials=True,
16
- allow_methods=["*"], # Allows all HTTP methods (POST, GET, OPTIONS, etc.)
17
- allow_headers=["*"], # Allows all headers
18
- )
19
- # Define Google Sheets API credentials function
20
  def get_credentials():
 
21
  try:
 
22
  service_account_info = {
23
- "type": os.getenv("SERVICE_ACCOUNT_TYPE"),
24
- "project_id": os.getenv("PROJECT_ID"),
25
- "private_key_id": os.getenv("PRIVATE_KEY_ID"),
26
- "private_key": os.getenv("PRIVATE_KEY").replace('\\n', '\n'),
27
- "client_email": os.getenv("CLIENT_EMAIL"),
28
- "client_id": os.getenv("CLIENT_ID"),
29
- "auth_uri": os.getenv("AUTH_URI"),
30
- "token_uri": os.getenv("TOKEN_URI"),
31
- "auth_provider_x509_cert_url": os.getenv("AUTH_PROVIDER_X509_CERT_URL"),
32
- "client_x509_cert_url": os.getenv("CLIENT_X509_CERT_URL"),
33
- "universe_domain": os.getenv("UNIVERSE_DOMAIN")
34
  }
35
  scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
36
  creds = Credentials.from_service_account_info(service_account_info, scopes=scope)
37
  return creds
 
38
  except Exception as e:
39
  print(f"Error getting credentials: {e}")
40
  return None
41
 
42
- # Initialize Google Sheets Client
43
  creds = get_credentials()
44
- if creds:
45
- client = gspread.authorize(creds)
46
- print("Client run done")
47
-
48
- # Define input model
49
- class CoachingCodeInput(BaseModel):
50
- coachingCode: str
51
-
52
- # Define the endpoint
53
- @app.post("/process/")
54
- def process_data(input_data: CoachingCodeInput):
55
- coachingCode = input_data.coachingCode
56
-
57
- # Define Google Sheet URLs based on coachingCode
58
- journal_file_path = ''
59
- panic_button_file_path = ''
60
- test_file_path = ''
61
-
62
- if coachingCode == '1919':
63
- journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
64
- panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
65
- test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  else:
67
- raise HTTPException(status_code=404, detail="Invalid coaching code")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
 
 
 
 
 
 
69
  try:
70
- # Open the Google Sheets
71
- journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
72
- panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1')
73
- test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
74
- print("Google sheet open")
75
-
76
- # Step 1: Read the Google Sheets into DataFrames
77
- journal_df = pd.DataFrame(journal_file.get_all_values())
78
- panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
79
- test_df = pd.DataFrame(test_file.get_all_values())
80
- print("Google sheet read")
81
-
82
- # Label the columns manually
83
- journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
84
- panic_button_df.columns = ['user_id', 'panic_button']
85
- print("Journal data processed")
86
- # Step 2: Merge Journal and Panic Button data
87
- panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
88
- merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
89
- print("Panic data processed")
90
- # Step 3: Process Test data
91
- test_data = []
92
- for index, row in test_df.iterrows():
93
- user_id = row[0]
94
- i = 1
95
- while i < len(row) and pd.notna(row[i]):
96
- chapter = row[i].lower().strip()
97
- score = row[i + 1]
98
- if pd.notna(score):
99
- test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
100
- i += 2
101
-
102
- test_df_processed = pd.DataFrame(test_data)
103
- print("test data processed")
104
-
105
- # Step 4: Merge all data
106
- merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
107
- merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
108
- print("all data merged")
109
- # Step 5: Process Data
110
- df = pd.DataFrame(merged_data_cleaned)
111
- academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
112
- non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
113
- max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
114
- print("step 5 : data processing done")
115
- def calculate_potential_score(row):
116
- test_score_normalized = 0
117
- if row['test_scores']:
118
- avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
119
- test_score_normalized = (avg_test_score / 40) * 70
120
-
121
- student_panic_score = 0
122
- if row['panic_button']:
123
- for factor, count in row['panic_button'].items():
124
- if factor in academic_weights:
125
- student_panic_score += academic_weights[factor] * count
126
- elif factor in non_academic_weights:
127
- student_panic_score += non_academic_weights[factor] * count
128
-
129
- panic_score = 20 * (1 - (student_panic_score / max_weighted_panic_score)) if max_weighted_panic_score != 0 else 1
130
- journal_score = (float(row['productivity_rate']) / 10) * 10 if pd.notna(row['productivity_rate']) else 0
131
-
132
- total_potential_score = test_score_normalized + panic_score + journal_score
133
- return total_potential_score
134
-
135
- merged_df = df.groupby('user_id').apply(lambda group: pd.Series({
136
- 'potential_score': calculate_potential_score(group)
137
- })).reset_index()
138
- print("step 6 : data merged_df")
139
-
140
- merged_df['potential_score'] = merged_df['potential_score'].round(2)
141
- sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
142
-
143
- # Return the result as JSON
144
- return sorted_df.to_dict(orient='records')
145
-
146
- except GoogleAuthError as e:
147
- raise HTTPException(status_code=500, detail=f"Authentication failed: {str(e)}")
148
  except Exception as e:
149
- raise HTTPException(status_code=500, detail=f"Error processing data: {str(e)}")
150
-
151
- # To run the app:
152
- # uvicorn filename:app --reload
 
1
+ # Import necessary libraries
2
  from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
  import gspread
5
  from google.oauth2.service_account import Credentials
 
6
  import pandas as pd
7
  from collections import defaultdict
8
+ from google.colab import userdata
 
 
9
 
10
+ # Initialize the FastAPI app
11
  app = FastAPI()
12
+
13
+ # Step 1: Define a function to get Google Sheets API credentials
 
 
 
 
 
 
14
  def get_credentials():
15
+ """Get Google Sheets API credentials from environment variables."""
16
  try:
17
+ # Construct the service account info dictionary
18
  service_account_info = {
19
+ "type": userdata.get("SERVICE_ACCOUNT_TYPE"),
20
+ "project_id": userdata.get("PROJECT_ID"),
21
+ "private_key_id": userdata.get("PRIVATE_KEY_ID"),
22
+ "private_key": userdata.get("PRIVATE_KEY").replace('\\n', '\n'),
23
+ "client_email": userdata.get("CLIENT_EMAIL"),
24
+ "client_id": userdata.get("CLIENT_ID"),
25
+ "auth_uri": userdata.get("AUTH_URI"),
26
+ "token_uri": userdata.get("TOKEN_URI"),
27
+ "auth_provider_x509_cert_url": userdata.get("AUTH_PROVIDER_X509_CERT_URL"),
28
+ "client_x509_cert_url": userdata.get("CLIENT_X509_CERT_URL"),
29
+ "universe_domain": userdata.get("UNIVERSE_DOMAIN")
30
  }
31
  scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
32
  creds = Credentials.from_service_account_info(service_account_info, scopes=scope)
33
  return creds
34
+
35
  except Exception as e:
36
  print(f"Error getting credentials: {e}")
37
  return None
38
 
39
+ # Step 2: Authorize gspread using the credentials
40
  creds = get_credentials()
41
+ client = gspread.authorize(creds)
42
+
43
+ # Input the paths and coaching code
44
+ journal_file_path = ''
45
+ panic_button_file_path = ''
46
+ test_file_path = ''
47
+ coachingCode = '1919'
48
+
49
+ if coachingCode == '1919':
50
+ journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
51
+ panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
52
+ test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
53
+
54
+ # Step 3: Open Google Sheets using the URLs
55
+ journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
56
+ panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1') # Fixed missing part
57
+ test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
58
+
59
+ # Step 4: Convert the sheets into Pandas DataFrames
60
+ journal_df = pd.DataFrame(journal_file.get_all_values())
61
+ panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
62
+ test_df = pd.DataFrame(test_file.get_all_values())
63
+
64
+ # Label the columns manually since there are no headers
65
+ journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
66
+ panic_button_df.columns = ['user_id', 'panic_button']
67
+
68
+ # Initialize a list for the merged data
69
+ merged_data = []
70
+
71
+ # Step 5: Group panic buttons by user_id and combine into a single comma-separated string
72
+ panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
73
+
74
+ # Merge journal and panic button data
75
+ merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
76
+
77
+ # Step 6: Process the test data
78
+ test_data = []
79
+ for index, row in test_df.iterrows():
80
+ user_id = row[0]
81
+ i = 1
82
+ while i < len(row) and pd.notna(row[i]): # Process chapter and score pairs
83
+ chapter = row[i].lower().strip()
84
+ score = row[i + 1]
85
+ if pd.notna(score):
86
+ test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
87
+ i += 2
88
+
89
+ # Convert the processed test data into a DataFrame
90
+ test_df_processed = pd.DataFrame(test_data)
91
+
92
+ # Step 7: Merge the journal+panic button data with the test data
93
+ merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
94
+
95
+ # Step 8: Drop rows where all data (except user_id and test_chapter) is missing
96
+ merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
97
+
98
+ # Group the merged DataFrame by user_id
99
+ df = pd.DataFrame(merged_data_cleaned)
100
+
101
+ # Function to process panic button counts and test scores
102
+ def process_group(group):
103
+ # Panic button counts
104
+ panic_button_series = group['panic_button'].dropna()
105
+ panic_button_dict = panic_button_series.value_counts().to_dict()
106
+
107
+ # Test scores aggregation
108
+ test_scores = group[['test_chapter', 'test_score']].dropna()
109
+ test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
110
+
111
+ # Create the test_scores_dict excluding NaN values
112
+ test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
113
+
114
+ return pd.Series({
115
+ 'productivity_yes_no': group['productivity_yes_no'].iloc[0],
116
+ 'productivity_rate': group['productivity_rate'].iloc[0],
117
+ 'panic_button': panic_button_dict,
118
+ 'test_scores': test_scores_dict
119
+ })
120
+
121
+ # Apply the group processing function
122
+ merged_df = df.groupby('user_id').apply(process_group).reset_index()
123
+
124
+ # Step 9: Calculate potential score
125
+ # Panic button weightages
126
+ academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
127
+ non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
128
+
129
+ # Max weighted panic score
130
+ max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
131
+
132
+ # Function to calculate potential score
133
+ def calculate_potential_score(row):
134
+ # Test score normalization (70% weightage)
135
+ if row['test_scores']: # Check if test_scores is not empty
136
+ avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
137
+ test_score_normalized = (avg_test_score / 40) * 70 # Scale test score to 70
138
+ else:
139
+ test_score_normalized = 0 # Default value for users with no test scores
140
+
141
+ # Panic score calculation (20% weightage)
142
+ student_panic_score = 0
143
+ if row['panic_button']: # Ensure panic_button is not NaN or empty
144
+ for factor, count in row['panic_button'].items():
145
+ if factor in academic_weights:
146
+ student_panic_score += academic_weights[factor] * count
147
+ elif factor in non_academic_weights:
148
+ student_panic_score += non_academic_weights[factor] * count
149
  else:
150
+ student_panic_score = 0 # Default if no panic button issues
151
+
152
+ # Panic score normalized to 20
153
+ panic_score = 20 * (1 - (student_panic_score / max_weighted_panic_score) if max_weighted_panic_score != 0 else 1)
154
+
155
+ # Journal score calculation (10% weightage)
156
+ if pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'Yes':
157
+ if pd.notna(row['productivity_rate']):
158
+ journal_score = (float(row['productivity_rate']) / 10) * 10 # Scale journal score to 10
159
+ else:
160
+ journal_score = 0 # Default if productivity_rate is missing
161
+ elif pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'No':
162
+ if pd.notna(row['productivity_rate']):
163
+ journal_score = (float(row['productivity_rate']) / 10) * 5 # Scale journal score to 5 if "No"
164
+ else:
165
+ journal_score = 0 # Default if productivity_rate is missing
166
+ else:
167
+ journal_score = 0 # Default if productivity_yes_no is missing
168
+
169
+ # Total score based on new weightages
170
+ total_potential_score = test_score_normalized + panic_score + journal_score
171
+ return total_potential_score
172
+
173
+ # Apply potential score calculation to the dataframe
174
+ merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
175
+ merged_df['potential_score'] = merged_df['potential_score'].round(2)
176
 
177
+ # Step 10: Sort by potential score
178
+ sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
179
+
180
+ # Step 11: Define API endpoint to get the sorted potential scores
181
+ @app.get("/sorted-potential-scores")
182
+ async def get_sorted_potential_scores():
183
  try:
184
+ result = sorted_df.to_dict(orient="records")
185
+ return {"sorted_scores": result}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  except Exception as e:
187
+ raise HTTPException(status_code=500, detail=str(e))