Spaces:

logeswari
/

cap_backend

Sleeping

App Files Files Community

logeswari commited on Mar 20, 2025

Commit

efcc6b0

1 Parent(s): fd38566

commit the changes

Browse files

Files changed (19) hide show

__pycache__/config.cpython-313.pyc +0 -0
__pycache__/database.cpython-313.pyc +0 -0
__pycache__/main.cpython-313.pyc +0 -0
__pycache__/schemas.cpython-313.pyc +0 -0
database.py +9 -1
main.py +6 -2
models/__pycache__/schemas.cpython-313.pyc +0 -0
models/label_encoder.pkl +3 -0
models/performance_model.pkl +3 -0
models/retention_model.pkl +3 -0
models/satisfaction_model.pkl +3 -0
models/train_models.py +68 -0
models/training_model.pkl +3 -0
requirements.txt +2 -0
routes/__pycache__/analytics.cpython-313.pyc +0 -0
routes/analytics.py +144 -25
models/schemas.py → schemas.py +22 -0
utils/__pycache__/load_models.cpython-313.pyc +0 -0
utils/load_models.py +14 -0

__pycache__/config.cpython-313.pyc ADDED Viewed

Binary file (374 Bytes). View file

__pycache__/database.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/database.cpython-313.pyc and b/__pycache__/database.cpython-313.pyc differ

__pycache__/main.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/main.cpython-313.pyc and b/__pycache__/main.cpython-313.pyc differ

__pycache__/schemas.cpython-313.pyc ADDED Viewed

Binary file (1.88 kB). View file

database.py CHANGED Viewed

@@ -1,9 +1,15 @@
 import os
 from dotenv import load_dotenv
 from supabase import create_client, Client
 from dotenv import load_dotenv
 load_dotenv(dotenv_path=".env")  # ✅ Explicitly load the .env file
-from config import SUPABASE_URL, SUPABASE_KEY
 # Load environment variables
@@ -15,3 +21,5 @@ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
 # Initialize Supabase client
 supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
 import os
 from dotenv import load_dotenv
 from supabase import create_client, Client
 from dotenv import load_dotenv
 load_dotenv(dotenv_path=".env")  # ✅ Explicitly load the .env file
+from cap_backend.config import SUPABASE_URL, SUPABASE_KEY
 # Load environment variables
 # Initialize Supabase client
 supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from fastapi import FastAPI
-from routes import analytics
 app = FastAPI()
@@ -9,4 +9,8 @@ app.include_router(analytics.router)
 @app.get("/")
 def home():
-    return {"message": "HR Analytics API is running"}

 from fastapi import FastAPI
+from cap_backend.routes import analytics
 app = FastAPI()
 @app.get("/")
 def home():
+    return {"message": "HR Analytics API is running"}
+def home():
+    return

models/__pycache__/schemas.cpython-313.pyc ADDED Viewed

Binary file (1.89 kB). View file

models/label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33c5b189b3ee4e2892e03ce6ac61395c9f8c99dde50f0875a121ac3934f54d40
+size 548

models/performance_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:863f3f32e36dc3d7dea16c5ba3e2d5865de646da571cd1835b6025b7c0bc45fe
+size 1000

models/retention_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18d3de0838ba22e2188993f2402d00875fe80912eb45084c36a4208a32eb4ae5
+size 1394281

models/satisfaction_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d90eb58e370a9bc92631aeefaa0f4da05798703791340a2e378b8e3fc26e4ca
+size 960

models/train_models.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# app/models/train_models.py
+import sys
+import os
+# Add backend directory to PYTHONPATH
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
+import pandas as pd
+from sklearn.linear_model import LinearRegression
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.preprocessing import LabelEncoder
+import joblib
+import os
+# Fetch data from Supabase
+from database import supabase
+response = supabase.table("HR_analysis").select("*").execute()
+df = pd.DataFrame(response.data) if response.data else pd.DataFrame()
+# Encode categorical data
+label_enc = LabelEncoder()
+df['Performance Score'] = label_enc.fit_transform(df['Performance Score'])
+df['EmployeeStatus'] = label_enc.fit_transform(df['EmployeeStatus'])
+df['Training Outcome'] = label_enc.fit_transform(df['Training Outcome'])
+df['Training Type'] = label_enc.fit_transform(df['Training Type'])
+# Save label encoder
+joblib.dump(label_enc, 'models/label_encoder.pkl')
+# Prepare training data
+X_satisfaction = df[['Engagement Score', 'Work-Life Balance Score', 'Performance Score']]
+y_satisfaction = df['Satisfaction Score']
+X_performance = df[['Satisfaction Score', 'Engagement Score', 'Training Duration(Days)', 'Training Cost']]
+y_performance = df['Current Employee Rating']
+X_retention = df[['Satisfaction Score', 'Engagement Score', 'Performance Score']]
+y_retention = df['EmployeeStatus']
+X_training = df[['Training Type', 'Training Duration(Days)', 'Training Cost']]
+y_training = df['Training Outcome']
+# Train and Save Models
+print("Training models...")
+# Linear Regression Models
+satisfaction_model = LinearRegression()
+satisfaction_model.fit(X_satisfaction, y_satisfaction)
+joblib.dump(satisfaction_model, 'models/satisfaction_model.pkl')
+performance_model = LinearRegression()
+performance_model.fit(X_performance, y_performance)
+joblib.dump(performance_model, 'models/performance_model.pkl')
+# Classification Models
+retention_model = RandomForestClassifier()
+retention_model.fit(X_retention, y_retention)
+joblib.dump(retention_model, 'models/retention_model.pkl')
+training_model = RandomForestClassifier()
+training_model.fit(X_training, y_training)
+joblib.dump(training_model, 'models/training_model.pkl')
+print("✅ Models trained and saved successfully!")

models/training_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aabc910c5f2e537f8ee7eb47c3ebc96c385c926772a0cfb5de77cb14467ecd2d
+size 25943001

requirements.txt CHANGED Viewed

@@ -3,3 +3,5 @@ uvicorn
 pandas
 supabase
 python-dotenv

 pandas
 supabase
 python-dotenv
+scikit-learn
+joblib

routes/__pycache__/analytics.cpython-313.pyc ADDED Viewed

Binary file (14.1 kB). View file

routes/analytics.py CHANGED Viewed

@@ -1,13 +1,19 @@
 from fastapi import APIRouter, HTTPException, Query
 import pandas as pd
-from database import supabase
 from dotenv import load_dotenv
 router = APIRouter()
 # Fetch data from Supabase
 try:
-    response = supabase.table("HR analysis").select("*").execute()
     data = pd.DataFrame(response.data) if response.data else pd.DataFrame()
 except Exception as e:
     print(f"Error fetching data: {e}")
@@ -30,12 +36,20 @@ if 'Performance Score' in data.columns:
 @router.get("/satisfaction-analysis")
 def satisfaction_analysis(department: str = Query(None, description="Filter by department")):
     try:
         if "DepartmentType" not in data.columns or "Satisfaction Score" not in data.columns:
             raise HTTPException(status_code=500, detail="Required columns missing in dataset")
         filtered_data = data.copy()
         if department:
             department = department.strip().title()
             filtered_data = filtered_data[filtered_data["DepartmentType"].str.strip().str.title() == department]
@@ -50,6 +64,12 @@ def satisfaction_analysis(department: str = Query(None, description="Filter by d
 @router.get("/department-performance")
 def department_performance():
     try:
         result = data.groupby("DepartmentType")[["Performance Score", "Current Employee Rating"]].mean().reset_index()
         return result.to_dict(orient="records")
@@ -58,9 +78,17 @@ def department_performance():
 @router.get("/training-analytics")
 def training_analytics(program_name: str = Query(None, description="Filter by training program name")):
     try:
         filtered_data = data if program_name is None else data[data["Training Program Name"] == program_name]
         if filtered_data.empty:
             return []
@@ -69,10 +97,14 @@ def training_analytics(program_name: str = Query(None, description="Filter by tr
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/engagement-performance")
 def engagement_performance():
     try:
         correlation = data[['Engagement Score', 'Performance Score']].corr().iloc[0, 1]
         return {"correlation_coefficient": correlation}
@@ -81,6 +113,12 @@ def engagement_performance():
 @router.get("/cost-benefit-analysis")
 def cost_benefit_analysis():
     try:
         result = data.groupby("DepartmentType").apply(lambda x: x['Performance Score'].mean() / x['Training Cost'].sum()).reset_index(name="ROI")
         return result.to_dict(orient="records")
@@ -89,6 +127,12 @@ def cost_benefit_analysis():
 @router.get("/training-effectiveness")
 def training_effectiveness():
     try:
         result = data.groupby("Training Program Name")["Performance Score"].mean().reset_index()
         return result.to_dict(orient="records")
@@ -97,46 +141,121 @@ def training_effectiveness():
 @router.get("/diversity-inclusion")
 def diversity_dashboard():
-    try:
-        if "DepartmentType" not in data.columns or "GenderCode" not in data.columns:
-            raise HTTPException(status_code=500, detail="Required columns missing in dataset")
-        # Compute gender distribution by department
         diversity_metrics = data.groupby("DepartmentType")["GenderCode"].value_counts(normalize=True).unstack(fill_value=0).reset_index()
         return diversity_metrics.to_dict(orient="records")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/work-life-balance")
 def worklife_balance_impact():
     try:
         correlation = data[['Work-Life Balance Score', 'Performance Score']].corr().iloc[0, 1]
-        return {"correlation_coefficient": round(correlation, 3)}  # Return as a JSON object
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/career-development")
 def career_development(employee_id: str = Query(None, description="Filter by Employee ID")):
     try:
-        if "Employee ID" not in data.columns or "StartDate" not in data.columns:
-            raise HTTPException(status_code=500, detail="Required columns missing in dataset")
-        # Print available Employee IDs for debugging
-        print("Available Employee IDs:", data["Employee ID"].unique())
-        filtered_data = data.copy()
-        if employee_id:
-            employee_id = employee_id.strip()  # Remove leading/trailing spaces
-            filtered_data = filtered_data[filtered_data["Employee ID"].astype(str) == employee_id]
-        if filtered_data.empty:
-            return []  # Return an empty list if no matching records
-        career_progress = filtered_data.groupby("Employee ID")["StartDate"].count().reset_index(name="Career Movements")
-        return career_progress.to_dict(orient="records")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 from fastapi import APIRouter, HTTPException, Query
 import pandas as pd
+from cap_backend.database import supabase
 from dotenv import load_dotenv
+from cap_backend.schemas import (
+    SatisfactionRequest, PerformanceRequest, RetentionRequest, TrainingRequest
+)
+from cap_backend.utils.load_models import (
+    satisfaction_model, performance_model, retention_model, training_model, label_enc
+)
 router = APIRouter()
 # Fetch data from Supabase
 try:
+    response = supabase.table("HR_analysis").select("*").execute()
     data = pd.DataFrame(response.data) if response.data else pd.DataFrame()
 except Exception as e:
     print(f"Error fetching data: {e}")
 @router.get("/satisfaction-analysis")
 def satisfaction_analysis(department: str = Query(None, description="Filter by department")):
+    """
+    Get average satisfaction score for each department.
+    Args:
+        department (str, optional): Filter by department name.
+    Returns:
+        list: A list of average satisfaction scores per department.
+    """
     try:
         if "DepartmentType" not in data.columns or "Satisfaction Score" not in data.columns:
             raise HTTPException(status_code=500, detail="Required columns missing in dataset")
         filtered_data = data.copy()
         if department:
             department = department.strip().title()
             filtered_data = filtered_data[filtered_data["DepartmentType"].str.strip().str.title() == department]
 @router.get("/department-performance")
 def department_performance():
+    """
+    Get average performance score and employee rating by department.
+    Returns:
+        list: A list of average scores per department.
+    """
     try:
         result = data.groupby("DepartmentType")[["Performance Score", "Current Employee Rating"]].mean().reset_index()
         return result.to_dict(orient="records")
 @router.get("/training-analytics")
 def training_analytics(program_name: str = Query(None, description="Filter by training program name")):
+    """
+    Get training program analytics.
+    Args:
+        program_name (str, optional): Filter by training program name.
+    Returns:
+        list: Training completion rates per program.
+    """
     try:
         filtered_data = data if program_name is None else data[data["Training Program Name"] == program_name]
         if filtered_data.empty:
             return []
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/engagement-performance")
 def engagement_performance():
+    """
+    Get correlation between engagement score and performance score.
+    Returns:
+        dict: Correlation coefficient.
+    """
     try:
         correlation = data[['Engagement Score', 'Performance Score']].corr().iloc[0, 1]
         return {"correlation_coefficient": correlation}
 @router.get("/cost-benefit-analysis")
 def cost_benefit_analysis():
+    """
+    Calculate Return on Investment (ROI) for training programs.
+    Returns:
+        list: ROI per department.
+    """
     try:
         result = data.groupby("DepartmentType").apply(lambda x: x['Performance Score'].mean() / x['Training Cost'].sum()).reset_index(name="ROI")
         return result.to_dict(orient="records")
 @router.get("/training-effectiveness")
 def training_effectiveness():
+    """
+    Get average performance score after training.
+    Returns:
+        list: Average performance score per training program.
+    """
     try:
         result = data.groupby("Training Program Name")["Performance Score"].mean().reset_index()
         return result.to_dict(orient="records")
 @router.get("/diversity-inclusion")
 def diversity_dashboard():
+    """
+    Get gender diversity breakdown by department.
+    Returns:
+        list: Percentage distribution of genders per department.
+    """
+    try:
         diversity_metrics = data.groupby("DepartmentType")["GenderCode"].value_counts(normalize=True).unstack(fill_value=0).reset_index()
         return diversity_metrics.to_dict(orient="records")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/work-life-balance")
 def worklife_balance_impact():
+    """
+    Get correlation between work-life balance score and performance score.
+    Returns:
+        dict: Correlation coefficient between work-life balance and performance.
+    """
     try:
+        if "Work-Life Balance Score" not in data.columns or "Performance Score" not in data.columns:
+            raise HTTPException(status_code=500, detail="Required columns missing in dataset")
         correlation = data[['Work-Life Balance Score', 'Performance Score']].corr().iloc[0, 1]
+        return {"correlation_coefficient": round(correlation, 3)}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/career-development")
 def career_development(employee_id: str = Query(None, description="Filter by Employee ID")):
+    """
+    Get career development data.
+    Args:
+        employee_id (str, optional): Filter by employee ID.
+    Returns:
+        list: Career movements per employee.
+    """
     try:
+        filtered_data = data if employee_id is None else data[data["Employee ID"] == employee_id]
+        career_progress = filtered_data.groupby("Employee ID")["StartDate"].count().reset_index(name="Career Movements")
+        return career_progress.to_dict(orient="records")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# ✅ Prediction Endpoints
+@router.post('/predict/satisfaction')
+def predict_satisfaction(data: SatisfactionRequest):
+    """
+    Predict employee satisfaction score.
+    Args:
+        data (SatisfactionRequest): Satisfaction model inputs.
+    Returns:
+        dict: Predicted satisfaction score.
+    """
+    try:
+        prediction = satisfaction_model.predict([[data.engagement_score, data.work_life_balance_score, data.performance_score]])
+        return {'satisfaction_score': prediction[0]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post('/predict/performance')
+def predict_performance(data: PerformanceRequest):
+    """
+    Predict employee performance score.
+    Args:
+        data (PerformanceRequest): Performance model inputs.
+    Returns:
+        dict: Predicted performance score.
+    """
+    try:
+        prediction = performance_model.predict([[data.satisfaction_score, data.engagement_score, data.training_duration, data.training_cost]])
+        return {'performance_score': prediction[0]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post('/predict/retention')
+def predict_retention(data: RetentionRequest):
+    """
+    Predict employee retention risk.
+    Args:
+        data (RetentionRequest): Retention model inputs.
+    Returns:
+        dict: Predicted retention risk.
+    """
+    try:
+        prediction = retention_model.predict([[data.satisfaction_score, data.engagement_score, data.performance_score]])
+        result = label_enc.inverse_transform(prediction)
+        return {'retention_risk': result[0]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post('/predict/training')
+def predict_training(data: TrainingRequest):
+    """
+    Predict training success.
+    Args:
+        data (TrainingRequest): Training model inputs.
+    Returns:
+        dict: Predicted training success.
+    """
+    try:
+        prediction = training_model.predict([[data.training_type, data.training_duration, data.training_cost]])
+        result = label_enc.inverse_transform(prediction)
+        return {'training_success': result[0]}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

models/schemas.py → schemas.py RENAMED Viewed

@@ -8,3 +8,25 @@ class HRAnalysis(BaseModel):
     Performance_Score: Optional[int]
     Training_Program_Name: Optional[str]
     Training_Outcome: Optional[str]

     Performance_Score: Optional[int]
     Training_Program_Name: Optional[str]
     Training_Outcome: Optional[str]
+class SatisfactionRequest(BaseModel):
+    engagement_score: int
+    work_life_balance_score: int
+    performance_score: int
+class PerformanceRequest(BaseModel):
+    satisfaction_score: int
+    engagement_score: int
+    training_duration: int
+    training_cost: float
+class RetentionRequest(BaseModel):
+    satisfaction_score: int
+    engagement_score: int
+    performance_score: int
+class TrainingRequest(BaseModel):
+    training_type: int
+    training_duration: int
+    training_cost: float

utils/__pycache__/load_models.cpython-313.pyc ADDED Viewed

Binary file (1.18 kB). View file

utils/load_models.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# app/utils/load_models.py
+import joblib
+import os
+models_path = os.path.join(os.path.dirname(__file__), '../models')
+# Load models
+satisfaction_model = joblib.load(os.path.join(models_path, 'satisfaction_model.pkl'))
+performance_model = joblib.load(os.path.join(models_path, 'performance_model.pkl'))
+retention_model = joblib.load(os.path.join(models_path, 'retention_model.pkl'))
+training_model = joblib.load(os.path.join(models_path, 'training_model.pkl'))
+# Load label encoder
+label_enc = joblib.load(os.path.join(models_path, 'label_encoder.pkl'))