Spaces:

SstudizeSA
/

b2boutliner

Sleeping

App Files Files Community

Rakshitjan commited on Oct 5, 2024

Commit

5141c51

verified ·

1 Parent(s): 29994d9

Update main.py

Browse files

Files changed (1) hide show

main.py +164 -129

main.py CHANGED Viewed

@@ -1,152 +1,187 @@
 from fastapi import FastAPI, HTTPException
 import gspread
 from google.oauth2.service_account import Credentials
-from google.auth.exceptions import GoogleAuthError
 import pandas as pd
 from collections import defaultdict
-from pydantic import BaseModel
-from fastapi.middleware.cors import CORSMiddleware
-import os
 app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # You can specify domains instead of "*" to restrict access
-    allow_credentials=True,
-    allow_methods=["*"],  # Allows all HTTP methods (POST, GET, OPTIONS, etc.)
-    allow_headers=["*"],  # Allows all headers
-)
-# Define Google Sheets API credentials function
 def get_credentials():
     try:
         service_account_info = {
-            "type": os.getenv("SERVICE_ACCOUNT_TYPE"),
-            "project_id": os.getenv("PROJECT_ID"),
-            "private_key_id": os.getenv("PRIVATE_KEY_ID"),
-            "private_key": os.getenv("PRIVATE_KEY").replace('\\n', '\n'),
-            "client_email": os.getenv("CLIENT_EMAIL"),
-            "client_id": os.getenv("CLIENT_ID"),
-            "auth_uri": os.getenv("AUTH_URI"),
-            "token_uri": os.getenv("TOKEN_URI"),
-            "auth_provider_x509_cert_url": os.getenv("AUTH_PROVIDER_X509_CERT_URL"),
-            "client_x509_cert_url": os.getenv("CLIENT_X509_CERT_URL"),
-            "universe_domain": os.getenv("UNIVERSE_DOMAIN")
         }
         scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
         creds = Credentials.from_service_account_info(service_account_info, scopes=scope)
         return creds
     except Exception as e:
         print(f"Error getting credentials: {e}")
         return None
-# Initialize Google Sheets Client
 creds = get_credentials()
-if creds:
-    client = gspread.authorize(creds)
-    print("Client run done")
-# Define input model
-class CoachingCodeInput(BaseModel):
-    coachingCode: str
-# Define the endpoint
-@app.post("/process/")
-def process_data(input_data: CoachingCodeInput):
-    coachingCode = input_data.coachingCode
-    # Define Google Sheet URLs based on coachingCode
-    journal_file_path = ''
-    panic_button_file_path = ''
-    test_file_path = ''
-    if coachingCode == '1919':
-        journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
-        panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
-        test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
     else:
-        raise HTTPException(status_code=404, detail="Invalid coaching code")
     try:
-        # Open the Google Sheets
-        journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
-        panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1')
-        test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
-        print("Google sheet open")
-        # Step 1: Read the Google Sheets into DataFrames
-        journal_df = pd.DataFrame(journal_file.get_all_values())
-        panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
-        test_df = pd.DataFrame(test_file.get_all_values())
-        print("Google sheet read")
-        # Label the columns manually
-        journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
-        panic_button_df.columns = ['user_id', 'panic_button']
-        print("Journal data processed")
-        # Step 2: Merge Journal and Panic Button data
-        panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
-        merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
-        print("Panic data processed")
-        # Step 3: Process Test data
-        test_data = []
-        for index, row in test_df.iterrows():
-            user_id = row[0]
-            i = 1
-            while i < len(row) and pd.notna(row[i]):
-                chapter = row[i].lower().strip()
-                score = row[i + 1]
-                if pd.notna(score):
-                    test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
-                i += 2
-        test_df_processed = pd.DataFrame(test_data)
-        print("test data processed")
-        # Step 4: Merge all data
-        merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
-        merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
-        print("all data merged")
-        # Step 5: Process Data
-        df = pd.DataFrame(merged_data_cleaned)
-        academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
-        non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
-        max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
-        print("step 5 : data processing done")
-        def calculate_potential_score(row):
-            test_score_normalized = 0
-            if row['test_scores']:
-                avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
-                test_score_normalized = (avg_test_score / 40) * 70
-            student_panic_score = 0
-            if row['panic_button']:
-                for factor, count in row['panic_button'].items():
-                    if factor in academic_weights:
-                        student_panic_score += academic_weights[factor] * count
-                    elif factor in non_academic_weights:
-                        student_panic_score += non_academic_weights[factor] * count
-            panic_score = 20 * (1 - (student_panic_score / max_weighted_panic_score)) if max_weighted_panic_score != 0 else 1
-            journal_score = (float(row['productivity_rate']) / 10) * 10 if pd.notna(row['productivity_rate']) else 0
-            total_potential_score = test_score_normalized + panic_score + journal_score
-            return total_potential_score
-        merged_df = df.groupby('user_id').apply(lambda group: pd.Series({
-            'potential_score': calculate_potential_score(group)
-        })).reset_index()
-        print("step 6 : data merged_df")
-        merged_df['potential_score'] = merged_df['potential_score'].round(2)
-        sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
-        # Return the result as JSON
-        return sorted_df.to_dict(orient='records')
-    except GoogleAuthError as e:
-        raise HTTPException(status_code=500, detail=f"Authentication failed: {str(e)}")
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error processing data: {str(e)}")
-# To run the app:
-# uvicorn filename:app --reload

+# Import necessary libraries
 from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
 import gspread
 from google.oauth2.service_account import Credentials
 import pandas as pd
 from collections import defaultdict
+from google.colab import userdata
+# Initialize the FastAPI app
 app = FastAPI()
+# Step 1: Define a function to get Google Sheets API credentials
 def get_credentials():
+    """Get Google Sheets API credentials from environment variables."""
     try:
+        # Construct the service account info dictionary
         service_account_info = {
+            "type": userdata.get("SERVICE_ACCOUNT_TYPE"),
+            "project_id": userdata.get("PROJECT_ID"),
+            "private_key_id": userdata.get("PRIVATE_KEY_ID"),
+            "private_key": userdata.get("PRIVATE_KEY").replace('\\n', '\n'),
+            "client_email": userdata.get("CLIENT_EMAIL"),
+            "client_id": userdata.get("CLIENT_ID"),
+            "auth_uri": userdata.get("AUTH_URI"),
+            "token_uri": userdata.get("TOKEN_URI"),
+            "auth_provider_x509_cert_url": userdata.get("AUTH_PROVIDER_X509_CERT_URL"),
+            "client_x509_cert_url": userdata.get("CLIENT_X509_CERT_URL"),
+            "universe_domain": userdata.get("UNIVERSE_DOMAIN")
         }
         scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
         creds = Credentials.from_service_account_info(service_account_info, scopes=scope)
         return creds
     except Exception as e:
         print(f"Error getting credentials: {e}")
         return None
+# Step 2: Authorize gspread using the credentials
 creds = get_credentials()
+client = gspread.authorize(creds)
+# Input the paths and coaching code
+journal_file_path = ''
+panic_button_file_path = ''
+test_file_path = ''
+coachingCode = '1919'
+if coachingCode == '1919':
+    journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
+    panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
+    test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
+# Step 3: Open Google Sheets using the URLs
+journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
+panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1')  # Fixed missing part
+test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
+# Step 4: Convert the sheets into Pandas DataFrames
+journal_df = pd.DataFrame(journal_file.get_all_values())
+panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
+test_df = pd.DataFrame(test_file.get_all_values())
+# Label the columns manually since there are no headers
+journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
+panic_button_df.columns = ['user_id', 'panic_button']
+# Initialize a list for the merged data
+merged_data = []
+# Step 5: Group panic buttons by user_id and combine into a single comma-separated string
+panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
+# Merge journal and panic button data
+merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
+# Step 6: Process the test data
+test_data = []
+for index, row in test_df.iterrows():
+    user_id = row[0]
+    i = 1
+    while i < len(row) and pd.notna(row[i]):  # Process chapter and score pairs
+        chapter = row[i].lower().strip()
+        score = row[i + 1]
+        if pd.notna(score):
+            test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
+        i += 2
+# Convert the processed test data into a DataFrame
+test_df_processed = pd.DataFrame(test_data)
+# Step 7: Merge the journal+panic button data with the test data
+merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
+# Step 8: Drop rows where all data (except user_id and test_chapter) is missing
+merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
+# Group the merged DataFrame by user_id
+df = pd.DataFrame(merged_data_cleaned)
+# Function to process panic button counts and test scores
+def process_group(group):
+    # Panic button counts
+    panic_button_series = group['panic_button'].dropna()
+    panic_button_dict = panic_button_series.value_counts().to_dict()
+    # Test scores aggregation
+    test_scores = group[['test_chapter', 'test_score']].dropna()
+    test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
+    # Create the test_scores_dict excluding NaN values
+    test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
+    return pd.Series({
+        'productivity_yes_no': group['productivity_yes_no'].iloc[0],
+        'productivity_rate': group['productivity_rate'].iloc[0],
+        'panic_button': panic_button_dict,
+        'test_scores': test_scores_dict
+    })
+# Apply the group processing function
+merged_df = df.groupby('user_id').apply(process_group).reset_index()
+# Step 9: Calculate potential score
+# Panic button weightages
+academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
+non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
+# Max weighted panic score
+max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
+# Function to calculate potential score
+def calculate_potential_score(row):
+    # Test score normalization (70% weightage)
+    if row['test_scores']:  # Check if test_scores is not empty
+        avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
+        test_score_normalized = (avg_test_score / 40) * 70  # Scale test score to 70
+    else:
+        test_score_normalized = 0  # Default value for users with no test scores
+    # Panic score calculation (20% weightage)
+    student_panic_score = 0
+    if row['panic_button']:  # Ensure panic_button is not NaN or empty
+        for factor, count in row['panic_button'].items():
+            if factor in academic_weights:
+                student_panic_score += academic_weights[factor] * count
+            elif factor in non_academic_weights:
+                student_panic_score += non_academic_weights[factor] * count
     else:
+        student_panic_score = 0  # Default if no panic button issues
+    # Panic score normalized to 20
+    panic_score = 20 * (1 - (student_panic_score / max_weighted_panic_score) if max_weighted_panic_score != 0 else 1)
+    # Journal score calculation (10% weightage)
+    if pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'Yes':
+        if pd.notna(row['productivity_rate']):
+            journal_score = (float(row['productivity_rate']) / 10) * 10  # Scale journal score to 10
+        else:
+            journal_score = 0  # Default if productivity_rate is missing
+    elif pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'No':
+        if pd.notna(row['productivity_rate']):
+            journal_score = (float(row['productivity_rate']) / 10) * 5  # Scale journal score to 5 if "No"
+        else:
+            journal_score = 0  # Default if productivity_rate is missing
+    else:
+        journal_score = 0  # Default if productivity_yes_no is missing
+    # Total score based on new weightages
+    total_potential_score = test_score_normalized + panic_score + journal_score
+    return total_potential_score
+# Apply potential score calculation to the dataframe
+merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
+merged_df['potential_score'] = merged_df['potential_score'].round(2)
+# Step 10: Sort by potential score
+sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
+# Step 11: Define API endpoint to get the sorted potential scores
+@app.get("/sorted-potential-scores")
+async def get_sorted_potential_scores():
     try:
+        result = sorted_df.to_dict(orient="records")
+        return {"sorted_scores": result}
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))