Spaces:

SstudizeSA
/

b2boutliner

Sleeping

App Files Files Community

Rakshitjan commited on Oct 6, 2024

Commit

a42cd71

verified ·

1 Parent(s): d06e14a

Update main.py

Browse files

Files changed (1) hide show

main.py +282 -93

main.py CHANGED Viewed

@@ -1,5 +1,194 @@
 # Import necessary libraries
-from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import gspread
 from google.oauth2.service_account import Credentials
@@ -40,88 +229,18 @@ def get_credentials():
 creds = get_credentials()
 client = gspread.authorize(creds)
-# Input the paths and coaching code
-journal_file_path = ''
-panic_button_file_path = ''
-test_file_path = ''
-coachingCode = '1919'
-if coachingCode == '1919':
-    journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
-    panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
-    test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
-# Step 3: Open Google Sheets using the URLs
-journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
-panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1')  # Fixed missing part
-test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
-# Step 4: Convert the sheets into Pandas DataFrames
-journal_df = pd.DataFrame(journal_file.get_all_values())
-panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
-test_df = pd.DataFrame(test_file.get_all_values())
-# Label the columns manually since there are no headers
-journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
-panic_button_df.columns = ['user_id', 'panic_button']
-# Initialize a list for the merged data
-merged_data = []
-# Step 5: Group panic buttons by user_id and combine into a single comma-separated string
-panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
-# Merge journal and panic button data
-merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
-# Step 6: Process the test data
-test_data = []
-for index, row in test_df.iterrows():
-    user_id = row[0]
-    i = 1
-    while i < len(row) and pd.notna(row[i]):  # Process chapter and score pairs
-        chapter = row[i].lower().strip()
-        score = row[i + 1]
-        if pd.notna(score):
-            test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
-        i += 2
-# Convert the processed test data into a DataFrame
-test_df_processed = pd.DataFrame(test_data)
-# Step 7: Merge the journal+panic button data with the test data
-merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
-# Step 8: Drop rows where all data (except user_id and test_chapter) is missing
-merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
-# Group the merged DataFrame by user_id
-df = pd.DataFrame(merged_data_cleaned)
-# Function to process panic button counts and test scores
-def process_group(group):
-    # Panic button counts
-    panic_button_series = group['panic_button'].dropna()
-    panic_button_dict = panic_button_series.value_counts().to_dict()
-    # Test scores aggregation
-    test_scores = group[['test_chapter', 'test_score']].dropna()
-    test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
-    # Create the test_scores_dict excluding NaN values
-    test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
-    return pd.Series({
-        'productivity_yes_no': group['productivity_yes_no'].iloc[0],
-        'productivity_rate': group['productivity_rate'].iloc[0],
-        'panic_button': panic_button_dict,
-        'test_scores': test_scores_dict
-    })
-# Apply the group processing function
-merged_df = df.groupby('user_id').apply(process_group).reset_index()
-# Step 9: Calculate potential score
 # Panic button weightages
 academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
 non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
@@ -170,20 +289,90 @@ def calculate_potential_score(row):
     total_potential_score = test_score_normalized + panic_score + journal_score
     return total_potential_score
-# Apply potential score calculation to the dataframe
-merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
-merged_df['potential_score'] = merged_df['potential_score'].round(2)
-# Step 10: Sort by potential score
-sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
 # Step 11: Define API endpoint to get the sorted potential scores
 @app.get("/sorted-potential-scores")
-async def get_sorted_potential_scores():
     try:
         result = sorted_df.to_dict(orient="records")
         return {"sorted_scores": result}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

+# # Import necessary libraries
+# from fastapi import FastAPI, HTTPException
+# from pydantic import BaseModel
+# import gspread
+# from google.oauth2.service_account import Credentials
+# import pandas as pd
+# from collections import defaultdict
+# import os
+# # Initialize the FastAPI app
+# app = FastAPI()
+# # Step 1: Define a function to get Google Sheets API credentials
+# def get_credentials():
+#     """Get Google Sheets API credentials from environment variables."""
+#     try:
+#         # Construct the service account info dictionary
+#         service_account_info = {
+#             "type": os.getenv("SERVICE_ACCOUNT_TYPE"),
+#             "project_id": os.getenv("PROJECT_ID"),
+#             "private_key_id": os.getenv("PRIVATE_KEY_ID"),
+#             "private_key": os.getenv("PRIVATE_KEY").replace('\\n', '\n'),
+#             "client_email": os.getenv("CLIENT_EMAIL"),
+#             "client_id": os.getenv("CLIENT_ID"),
+#             "auth_uri": os.getenv("AUTH_URI"),
+#             "token_uri": os.getenv("TOKEN_URI"),
+#             "auth_provider_x509_cert_url": os.getenv("AUTH_PROVIDER_X509_CERT_URL"),
+#             "client_x509_cert_url": os.getenv("CLIENT_X509_CERT_URL"),
+#             "universe_domain": os.getenv("UNIVERSE_DOMAIN")
+#         }
+#         scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
+#         creds = Credentials.from_service_account_info(service_account_info, scopes=scope)
+#         return creds
+#     except Exception as e:
+#         print(f"Error getting credentials: {e}")
+#         return None
+# # Step 2: Authorize gspread using the credentials
+# creds = get_credentials()
+# client = gspread.authorize(creds)
+# # Input the paths and coaching code
+# journal_file_path = ''
+# panic_button_file_path = ''
+# test_file_path = ''
+# coachingCode = '1919'
+# if coachingCode == '1919':
+#     journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
+#     panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
+#     test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
+# # Step 3: Open Google Sheets using the URLs
+# journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
+# panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1')  # Fixed missing part
+# test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
+# # Step 4: Convert the sheets into Pandas DataFrames
+# journal_df = pd.DataFrame(journal_file.get_all_values())
+# panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
+# test_df = pd.DataFrame(test_file.get_all_values())
+# # Label the columns manually since there are no headers
+# journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
+# panic_button_df.columns = ['user_id', 'panic_button']
+# # Initialize a list for the merged data
+# merged_data = []
+# # Step 5: Group panic buttons by user_id and combine into a single comma-separated string
+# panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
+# # Merge journal and panic button data
+# merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
+# # Step 6: Process the test data
+# test_data = []
+# for index, row in test_df.iterrows():
+#     user_id = row[0]
+#     i = 1
+#     while i < len(row) and pd.notna(row[i]):  # Process chapter and score pairs
+#         chapter = row[i].lower().strip()
+#         score = row[i + 1]
+#         if pd.notna(score):
+#             test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
+#         i += 2
+# # Convert the processed test data into a DataFrame
+# test_df_processed = pd.DataFrame(test_data)
+# # Step 7: Merge the journal+panic button data with the test data
+# merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
+# # Step 8: Drop rows where all data (except user_id and test_chapter) is missing
+# merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
+# # Group the merged DataFrame by user_id
+# df = pd.DataFrame(merged_data_cleaned)
+# # Function to process panic button counts and test scores
+# def process_group(group):
+#     # Panic button counts
+#     panic_button_series = group['panic_button'].dropna()
+#     panic_button_dict = panic_button_series.value_counts().to_dict()
+#     # Test scores aggregation
+#     test_scores = group[['test_chapter', 'test_score']].dropna()
+#     test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
+#     # Create the test_scores_dict excluding NaN values
+#     test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
+#     return pd.Series({
+#         'productivity_yes_no': group['productivity_yes_no'].iloc[0],
+#         'productivity_rate': group['productivity_rate'].iloc[0],
+#         'panic_button': panic_button_dict,
+#         'test_scores': test_scores_dict
+#     })
+# # Apply the group processing function
+# merged_df = df.groupby('user_id').apply(process_group).reset_index()
+# # Step 9: Calculate potential score
+# # Panic button weightages
+# academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
+# non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
+# # Max weighted panic score
+# max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
+# # Function to calculate potential score
+# def calculate_potential_score(row):
+#     # Test score normalization (70% weightage)
+#     if row['test_scores']:  # Check if test_scores is not empty
+#         avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
+#         test_score_normalized = (avg_test_score / 40) * 70  # Scale test score to 70
+#     else:
+#         test_score_normalized = 0  # Default value for users with no test scores
+#     # Panic score calculation (20% weightage)
+#     student_panic_score = 0
+#     if row['panic_button']:  # Ensure panic_button is not NaN or empty
+#         for factor, count in row['panic_button'].items():
+#             if factor in academic_weights:
+#                 student_panic_score += academic_weights[factor] * count
+#             elif factor in non_academic_weights:
+#                 student_panic_score += non_academic_weights[factor] * count
+#     else:
+#         student_panic_score = 0  # Default if no panic button issues
+#     # Panic score normalized to 20
+#     panic_score = 20 * (1 - (student_panic_score / max_weighted_panic_score) if max_weighted_panic_score != 0 else 1)
+#     # Journal score calculation (10% weightage)
+#     if pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'Yes':
+#         if pd.notna(row['productivity_rate']):
+#             journal_score = (float(row['productivity_rate']) / 10) * 10  # Scale journal score to 10
+#         else:
+#             journal_score = 0  # Default if productivity_rate is missing
+#     elif pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'No':
+#         if pd.notna(row['productivity_rate']):
+#             journal_score = (float(row['productivity_rate']) / 10) * 5  # Scale journal score to 5 if "No"
+#         else:
+#             journal_score = 0  # Default if productivity_rate is missing
+#     else:
+#         journal_score = 0  # Default if productivity_yes_no is missing
+#     # Total score based on new weightages
+#     total_potential_score = test_score_normalized + panic_score + journal_score
+#     return total_potential_score
+# # Apply potential score calculation to the dataframe
+# merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
+# merged_df['potential_score'] = merged_df['potential_score'].round(2)
+# # Step 10: Sort by potential score
+# sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
+# # Step 11: Define API endpoint to get the sorted potential scores
+# @app.get("/sorted-potential-scores")
+# async def get_sorted_potential_scores():
+#     try:
+#         result = sorted_df.to_dict(orient="records")
+#         return {"sorted_scores": result}
+#     except Exception as e:
+#         raise HTTPException(status_code=500, detail=str(e))
 # Import necessary libraries
+from fastapi import FastAPI, HTTPException, Query
 from pydantic import BaseModel
 import gspread
 from google.oauth2.service_account import Credentials
 creds = get_credentials()
 client = gspread.authorize(creds)
+# Function to get file paths based on coaching code
+def get_file_paths(coaching_code):
+    if coaching_code == '1919':
+        return {
+            'journal': 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link',
+            'panic_button': 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link',
+            'test': 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
+        }
+    else:
+        # Add more coaching codes and their corresponding file paths here
+        return None
 # Panic button weightages
 academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
 non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
     total_potential_score = test_score_normalized + panic_score + journal_score
     return total_potential_score
 # Step 11: Define API endpoint to get the sorted potential scores
 @app.get("/sorted-potential-scores")
+async def get_sorted_potential_scores(coaching_code: str = Query(..., description="Coaching code to determine file paths")):
     try:
+        file_paths = get_file_paths(coaching_code)
+        if not file_paths:
+            raise HTTPException(status_code=400, detail="Invalid coaching code")
+        # Open Google Sheets using the URLs
+        journal_file = client.open_by_url(file_paths['journal']).worksheet('Sheet1')
+        panic_button_file = client.open_by_url(file_paths['panic_button']).worksheet('Sheet1')
+        test_file = client.open_by_url(file_paths['test']).worksheet('Sheet1')
+        # Convert the sheets into Pandas DataFrames
+        journal_df = pd.DataFrame(journal_file.get_all_values())
+        panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
+        test_df = pd.DataFrame(test_file.get_all_values())
+        # Label the columns manually since there are no headers
+        journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
+        panic_button_df.columns = ['user_id', 'panic_button']
+        # Initialize a list for the merged data
+        merged_data = []
+        # Group panic buttons by user_id and combine into a single comma-separated string
+        panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
+        # Merge journal and panic button data
+        merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
+        # Process the test data
+        test_data = []
+        for index, row in test_df.iterrows():
+            user_id = row[0]
+            i = 1
+            while i < len(row) and pd.notna(row[i]):  # Process chapter and score pairs
+                chapter = row[i].lower().strip()
+                score = row[i + 1]
+                if pd.notna(score):
+                    test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
+                i += 2
+        # Convert the processed test data into a DataFrame
+        test_df_processed = pd.DataFrame(test_data)
+        # Merge the journal+panic button data with the test data
+        merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
+        # Drop rows where all data (except user_id and test_chapter) is missing
+        merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
+        # Group the merged DataFrame by user_id
+        df = pd.DataFrame(merged_data_cleaned)
+        # Function to process panic button counts and test scores
+        def process_group(group):
+            # Panic button counts
+            panic_button_series = group['panic_button'].dropna()
+            panic_button_dict = panic_button_series.value_counts().to_dict()
+            # Test scores aggregation
+            test_scores = group[['test_chapter', 'test_score']].dropna()
+            test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
+            # Create the test_scores_dict excluding NaN values
+            test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
+            return pd.Series({
+                'productivity_yes_no': group['productivity_yes_no'].iloc[0],
+                'productivity_rate': group['productivity_rate'].iloc[0],
+                'panic_button': panic_button_dict,
+                'test_scores': test_scores_dict
+            })
+        # Apply the group processing function
+        merged_df = df.groupby('user_id').apply(process_group).reset_index()
+        # Calculate potential scores and sort
+        merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
+        merged_df['potential_score'] = merged_df['potential_score'].round(2)
+        sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
         result = sorted_df.to_dict(orient="records")
         return {"sorted_scores": result}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))