logeswari commited on
Commit
efcc6b0
Β·
1 Parent(s): fd38566

commit the changes

Browse files
__pycache__/config.cpython-313.pyc ADDED
Binary file (374 Bytes). View file
 
__pycache__/database.cpython-313.pyc CHANGED
Binary files a/__pycache__/database.cpython-313.pyc and b/__pycache__/database.cpython-313.pyc differ
 
__pycache__/main.cpython-313.pyc CHANGED
Binary files a/__pycache__/main.cpython-313.pyc and b/__pycache__/main.cpython-313.pyc differ
 
__pycache__/schemas.cpython-313.pyc ADDED
Binary file (1.88 kB). View file
 
database.py CHANGED
@@ -1,9 +1,15 @@
 
 
 
 
 
 
1
  import os
2
  from dotenv import load_dotenv
3
  from supabase import create_client, Client
4
  from dotenv import load_dotenv
5
  load_dotenv(dotenv_path=".env") # βœ… Explicitly load the .env file
6
- from config import SUPABASE_URL, SUPABASE_KEY
7
 
8
 
9
  # Load environment variables
@@ -15,3 +21,5 @@ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
15
 
16
  # Initialize Supabase client
17
  supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
 
 
 
1
+ import sys
2
+ import os
3
+
4
+
5
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
6
+
7
  import os
8
  from dotenv import load_dotenv
9
  from supabase import create_client, Client
10
  from dotenv import load_dotenv
11
  load_dotenv(dotenv_path=".env") # βœ… Explicitly load the .env file
12
+ from cap_backend.config import SUPABASE_URL, SUPABASE_KEY
13
 
14
 
15
  # Load environment variables
 
21
 
22
  # Initialize Supabase client
23
  supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
24
+
25
+
main.py CHANGED
@@ -1,5 +1,5 @@
1
  from fastapi import FastAPI
2
- from routes import analytics
3
 
4
 
5
  app = FastAPI()
@@ -9,4 +9,8 @@ app.include_router(analytics.router)
9
 
10
  @app.get("/")
11
  def home():
12
- return {"message": "HR Analytics API is running"}
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from cap_backend.routes import analytics
3
 
4
 
5
  app = FastAPI()
 
9
 
10
  @app.get("/")
11
  def home():
12
+ return {"message": "HR Analytics API is running"}
13
+
14
+
15
+ def home():
16
+ return
models/__pycache__/schemas.cpython-313.pyc ADDED
Binary file (1.89 kB). View file
 
models/label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c5b189b3ee4e2892e03ce6ac61395c9f8c99dde50f0875a121ac3934f54d40
3
+ size 548
models/performance_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:863f3f32e36dc3d7dea16c5ba3e2d5865de646da571cd1835b6025b7c0bc45fe
3
+ size 1000
models/retention_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18d3de0838ba22e2188993f2402d00875fe80912eb45084c36a4208a32eb4ae5
3
+ size 1394281
models/satisfaction_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d90eb58e370a9bc92631aeefaa0f4da05798703791340a2e378b8e3fc26e4ca
3
+ size 960
models/train_models.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/models/train_models.py
2
+ import sys
3
+ import os
4
+
5
+ # Add backend directory to PYTHONPATH
6
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
7
+
8
+
9
+
10
+ import pandas as pd
11
+ from sklearn.linear_model import LinearRegression
12
+ from sklearn.ensemble import RandomForestClassifier
13
+ from sklearn.preprocessing import LabelEncoder
14
+ import joblib
15
+ import os
16
+
17
+ # Fetch data from Supabase
18
+ from database import supabase
19
+
20
+
21
+ response = supabase.table("HR_analysis").select("*").execute()
22
+ df = pd.DataFrame(response.data) if response.data else pd.DataFrame()
23
+
24
+ # Encode categorical data
25
+ label_enc = LabelEncoder()
26
+ df['Performance Score'] = label_enc.fit_transform(df['Performance Score'])
27
+ df['EmployeeStatus'] = label_enc.fit_transform(df['EmployeeStatus'])
28
+ df['Training Outcome'] = label_enc.fit_transform(df['Training Outcome'])
29
+ df['Training Type'] = label_enc.fit_transform(df['Training Type'])
30
+
31
+ # Save label encoder
32
+ joblib.dump(label_enc, 'models/label_encoder.pkl')
33
+
34
+ # Prepare training data
35
+ X_satisfaction = df[['Engagement Score', 'Work-Life Balance Score', 'Performance Score']]
36
+ y_satisfaction = df['Satisfaction Score']
37
+
38
+ X_performance = df[['Satisfaction Score', 'Engagement Score', 'Training Duration(Days)', 'Training Cost']]
39
+ y_performance = df['Current Employee Rating']
40
+
41
+ X_retention = df[['Satisfaction Score', 'Engagement Score', 'Performance Score']]
42
+ y_retention = df['EmployeeStatus']
43
+
44
+ X_training = df[['Training Type', 'Training Duration(Days)', 'Training Cost']]
45
+ y_training = df['Training Outcome']
46
+
47
+ # Train and Save Models
48
+ print("Training models...")
49
+
50
+ # Linear Regression Models
51
+ satisfaction_model = LinearRegression()
52
+ satisfaction_model.fit(X_satisfaction, y_satisfaction)
53
+ joblib.dump(satisfaction_model, 'models/satisfaction_model.pkl')
54
+
55
+ performance_model = LinearRegression()
56
+ performance_model.fit(X_performance, y_performance)
57
+ joblib.dump(performance_model, 'models/performance_model.pkl')
58
+
59
+ # Classification Models
60
+ retention_model = RandomForestClassifier()
61
+ retention_model.fit(X_retention, y_retention)
62
+ joblib.dump(retention_model, 'models/retention_model.pkl')
63
+
64
+ training_model = RandomForestClassifier()
65
+ training_model.fit(X_training, y_training)
66
+ joblib.dump(training_model, 'models/training_model.pkl')
67
+
68
+ print("βœ… Models trained and saved successfully!")
models/training_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aabc910c5f2e537f8ee7eb47c3ebc96c385c926772a0cfb5de77cb14467ecd2d
3
+ size 25943001
requirements.txt CHANGED
@@ -3,3 +3,5 @@ uvicorn
3
  pandas
4
  supabase
5
  python-dotenv
 
 
 
3
  pandas
4
  supabase
5
  python-dotenv
6
+ scikit-learn
7
+ joblib
routes/__pycache__/analytics.cpython-313.pyc ADDED
Binary file (14.1 kB). View file
 
routes/analytics.py CHANGED
@@ -1,13 +1,19 @@
1
  from fastapi import APIRouter, HTTPException, Query
2
  import pandas as pd
3
- from database import supabase
4
  from dotenv import load_dotenv
 
 
 
 
 
 
5
 
6
  router = APIRouter()
7
 
8
  # Fetch data from Supabase
9
  try:
10
- response = supabase.table("HR analysis").select("*").execute()
11
  data = pd.DataFrame(response.data) if response.data else pd.DataFrame()
12
  except Exception as e:
13
  print(f"Error fetching data: {e}")
@@ -30,12 +36,20 @@ if 'Performance Score' in data.columns:
30
 
31
  @router.get("/satisfaction-analysis")
32
  def satisfaction_analysis(department: str = Query(None, description="Filter by department")):
 
 
 
 
 
 
 
 
 
33
  try:
34
  if "DepartmentType" not in data.columns or "Satisfaction Score" not in data.columns:
35
  raise HTTPException(status_code=500, detail="Required columns missing in dataset")
36
 
37
  filtered_data = data.copy()
38
-
39
  if department:
40
  department = department.strip().title()
41
  filtered_data = filtered_data[filtered_data["DepartmentType"].str.strip().str.title() == department]
@@ -50,6 +64,12 @@ def satisfaction_analysis(department: str = Query(None, description="Filter by d
50
 
51
  @router.get("/department-performance")
52
  def department_performance():
 
 
 
 
 
 
53
  try:
54
  result = data.groupby("DepartmentType")[["Performance Score", "Current Employee Rating"]].mean().reset_index()
55
  return result.to_dict(orient="records")
@@ -58,9 +78,17 @@ def department_performance():
58
 
59
  @router.get("/training-analytics")
60
  def training_analytics(program_name: str = Query(None, description="Filter by training program name")):
 
 
 
 
 
 
 
 
 
61
  try:
62
  filtered_data = data if program_name is None else data[data["Training Program Name"] == program_name]
63
-
64
  if filtered_data.empty:
65
  return []
66
 
@@ -69,10 +97,14 @@ def training_analytics(program_name: str = Query(None, description="Filter by tr
69
  except Exception as e:
70
  raise HTTPException(status_code=500, detail=str(e))
71
 
72
-
73
-
74
  @router.get("/engagement-performance")
75
  def engagement_performance():
 
 
 
 
 
 
76
  try:
77
  correlation = data[['Engagement Score', 'Performance Score']].corr().iloc[0, 1]
78
  return {"correlation_coefficient": correlation}
@@ -81,6 +113,12 @@ def engagement_performance():
81
 
82
  @router.get("/cost-benefit-analysis")
83
  def cost_benefit_analysis():
 
 
 
 
 
 
84
  try:
85
  result = data.groupby("DepartmentType").apply(lambda x: x['Performance Score'].mean() / x['Training Cost'].sum()).reset_index(name="ROI")
86
  return result.to_dict(orient="records")
@@ -89,6 +127,12 @@ def cost_benefit_analysis():
89
 
90
  @router.get("/training-effectiveness")
91
  def training_effectiveness():
 
 
 
 
 
 
92
  try:
93
  result = data.groupby("Training Program Name")["Performance Score"].mean().reset_index()
94
  return result.to_dict(orient="records")
@@ -97,46 +141,121 @@ def training_effectiveness():
97
 
98
  @router.get("/diversity-inclusion")
99
  def diversity_dashboard():
100
- try:
101
- if "DepartmentType" not in data.columns or "GenderCode" not in data.columns:
102
- raise HTTPException(status_code=500, detail="Required columns missing in dataset")
103
 
104
- # Compute gender distribution by department
 
 
 
105
  diversity_metrics = data.groupby("DepartmentType")["GenderCode"].value_counts(normalize=True).unstack(fill_value=0).reset_index()
106
-
107
  return diversity_metrics.to_dict(orient="records")
108
  except Exception as e:
109
  raise HTTPException(status_code=500, detail=str(e))
110
 
111
-
112
  @router.get("/work-life-balance")
113
  def worklife_balance_impact():
 
 
 
 
 
 
114
  try:
 
 
 
115
  correlation = data[['Work-Life Balance Score', 'Performance Score']].corr().iloc[0, 1]
116
- return {"correlation_coefficient": round(correlation, 3)} # Return as a JSON object
117
  except Exception as e:
118
  raise HTTPException(status_code=500, detail=str(e))
119
 
120
 
121
  @router.get("/career-development")
122
  def career_development(employee_id: str = Query(None, description="Filter by Employee ID")):
 
 
 
 
 
 
 
 
 
123
  try:
124
- if "Employee ID" not in data.columns or "StartDate" not in data.columns:
125
- raise HTTPException(status_code=500, detail="Required columns missing in dataset")
 
 
 
126
 
127
- # Print available Employee IDs for debugging
128
- print("Available Employee IDs:", data["Employee ID"].unique())
 
 
 
129
 
130
- filtered_data = data.copy()
 
 
 
 
 
 
 
 
 
 
131
 
132
- if employee_id:
133
- employee_id = employee_id.strip() # Remove leading/trailing spaces
134
- filtered_data = filtered_data[filtered_data["Employee ID"].astype(str) == employee_id]
 
135
 
136
- if filtered_data.empty:
137
- return [] # Return an empty list if no matching records
138
 
139
- career_progress = filtered_data.groupby("Employee ID")["StartDate"].count().reset_index(name="Career Movements")
140
- return career_progress.to_dict(orient="records")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  except Exception as e:
142
  raise HTTPException(status_code=500, detail=str(e))
 
1
  from fastapi import APIRouter, HTTPException, Query
2
  import pandas as pd
3
+ from cap_backend.database import supabase
4
  from dotenv import load_dotenv
5
+ from cap_backend.schemas import (
6
+ SatisfactionRequest, PerformanceRequest, RetentionRequest, TrainingRequest
7
+ )
8
+ from cap_backend.utils.load_models import (
9
+ satisfaction_model, performance_model, retention_model, training_model, label_enc
10
+ )
11
 
12
  router = APIRouter()
13
 
14
  # Fetch data from Supabase
15
  try:
16
+ response = supabase.table("HR_analysis").select("*").execute()
17
  data = pd.DataFrame(response.data) if response.data else pd.DataFrame()
18
  except Exception as e:
19
  print(f"Error fetching data: {e}")
 
36
 
37
  @router.get("/satisfaction-analysis")
38
  def satisfaction_analysis(department: str = Query(None, description="Filter by department")):
39
+ """
40
+ Get average satisfaction score for each department.
41
+
42
+ Args:
43
+ department (str, optional): Filter by department name.
44
+
45
+ Returns:
46
+ list: A list of average satisfaction scores per department.
47
+ """
48
  try:
49
  if "DepartmentType" not in data.columns or "Satisfaction Score" not in data.columns:
50
  raise HTTPException(status_code=500, detail="Required columns missing in dataset")
51
 
52
  filtered_data = data.copy()
 
53
  if department:
54
  department = department.strip().title()
55
  filtered_data = filtered_data[filtered_data["DepartmentType"].str.strip().str.title() == department]
 
64
 
65
  @router.get("/department-performance")
66
  def department_performance():
67
+ """
68
+ Get average performance score and employee rating by department.
69
+
70
+ Returns:
71
+ list: A list of average scores per department.
72
+ """
73
  try:
74
  result = data.groupby("DepartmentType")[["Performance Score", "Current Employee Rating"]].mean().reset_index()
75
  return result.to_dict(orient="records")
 
78
 
79
  @router.get("/training-analytics")
80
  def training_analytics(program_name: str = Query(None, description="Filter by training program name")):
81
+ """
82
+ Get training program analytics.
83
+
84
+ Args:
85
+ program_name (str, optional): Filter by training program name.
86
+
87
+ Returns:
88
+ list: Training completion rates per program.
89
+ """
90
  try:
91
  filtered_data = data if program_name is None else data[data["Training Program Name"] == program_name]
 
92
  if filtered_data.empty:
93
  return []
94
 
 
97
  except Exception as e:
98
  raise HTTPException(status_code=500, detail=str(e))
99
 
 
 
100
  @router.get("/engagement-performance")
101
  def engagement_performance():
102
+ """
103
+ Get correlation between engagement score and performance score.
104
+
105
+ Returns:
106
+ dict: Correlation coefficient.
107
+ """
108
  try:
109
  correlation = data[['Engagement Score', 'Performance Score']].corr().iloc[0, 1]
110
  return {"correlation_coefficient": correlation}
 
113
 
114
  @router.get("/cost-benefit-analysis")
115
  def cost_benefit_analysis():
116
+ """
117
+ Calculate Return on Investment (ROI) for training programs.
118
+
119
+ Returns:
120
+ list: ROI per department.
121
+ """
122
  try:
123
  result = data.groupby("DepartmentType").apply(lambda x: x['Performance Score'].mean() / x['Training Cost'].sum()).reset_index(name="ROI")
124
  return result.to_dict(orient="records")
 
127
 
128
  @router.get("/training-effectiveness")
129
  def training_effectiveness():
130
+ """
131
+ Get average performance score after training.
132
+
133
+ Returns:
134
+ list: Average performance score per training program.
135
+ """
136
  try:
137
  result = data.groupby("Training Program Name")["Performance Score"].mean().reset_index()
138
  return result.to_dict(orient="records")
 
141
 
142
  @router.get("/diversity-inclusion")
143
  def diversity_dashboard():
144
+ """
145
+ Get gender diversity breakdown by department.
 
146
 
147
+ Returns:
148
+ list: Percentage distribution of genders per department.
149
+ """
150
+ try:
151
  diversity_metrics = data.groupby("DepartmentType")["GenderCode"].value_counts(normalize=True).unstack(fill_value=0).reset_index()
 
152
  return diversity_metrics.to_dict(orient="records")
153
  except Exception as e:
154
  raise HTTPException(status_code=500, detail=str(e))
155
 
 
156
  @router.get("/work-life-balance")
157
  def worklife_balance_impact():
158
+ """
159
+ Get correlation between work-life balance score and performance score.
160
+
161
+ Returns:
162
+ dict: Correlation coefficient between work-life balance and performance.
163
+ """
164
  try:
165
+ if "Work-Life Balance Score" not in data.columns or "Performance Score" not in data.columns:
166
+ raise HTTPException(status_code=500, detail="Required columns missing in dataset")
167
+
168
  correlation = data[['Work-Life Balance Score', 'Performance Score']].corr().iloc[0, 1]
169
+ return {"correlation_coefficient": round(correlation, 3)}
170
  except Exception as e:
171
  raise HTTPException(status_code=500, detail=str(e))
172
 
173
 
174
  @router.get("/career-development")
175
  def career_development(employee_id: str = Query(None, description="Filter by Employee ID")):
176
+ """
177
+ Get career development data.
178
+
179
+ Args:
180
+ employee_id (str, optional): Filter by employee ID.
181
+
182
+ Returns:
183
+ list: Career movements per employee.
184
+ """
185
  try:
186
+ filtered_data = data if employee_id is None else data[data["Employee ID"] == employee_id]
187
+ career_progress = filtered_data.groupby("Employee ID")["StartDate"].count().reset_index(name="Career Movements")
188
+ return career_progress.to_dict(orient="records")
189
+ except Exception as e:
190
+ raise HTTPException(status_code=500, detail=str(e))
191
 
192
+ # βœ… Prediction Endpoints
193
+ @router.post('/predict/satisfaction')
194
+ def predict_satisfaction(data: SatisfactionRequest):
195
+ """
196
+ Predict employee satisfaction score.
197
 
198
+ Args:
199
+ data (SatisfactionRequest): Satisfaction model inputs.
200
+
201
+ Returns:
202
+ dict: Predicted satisfaction score.
203
+ """
204
+ try:
205
+ prediction = satisfaction_model.predict([[data.engagement_score, data.work_life_balance_score, data.performance_score]])
206
+ return {'satisfaction_score': prediction[0]}
207
+ except Exception as e:
208
+ raise HTTPException(status_code=500, detail=str(e))
209
 
210
+ @router.post('/predict/performance')
211
+ def predict_performance(data: PerformanceRequest):
212
+ """
213
+ Predict employee performance score.
214
 
215
+ Args:
216
+ data (PerformanceRequest): Performance model inputs.
217
 
218
+ Returns:
219
+ dict: Predicted performance score.
220
+ """
221
+ try:
222
+ prediction = performance_model.predict([[data.satisfaction_score, data.engagement_score, data.training_duration, data.training_cost]])
223
+ return {'performance_score': prediction[0]}
224
+ except Exception as e:
225
+ raise HTTPException(status_code=500, detail=str(e))
226
+
227
+ @router.post('/predict/retention')
228
+ def predict_retention(data: RetentionRequest):
229
+ """
230
+ Predict employee retention risk.
231
+
232
+ Args:
233
+ data (RetentionRequest): Retention model inputs.
234
+
235
+ Returns:
236
+ dict: Predicted retention risk.
237
+ """
238
+ try:
239
+ prediction = retention_model.predict([[data.satisfaction_score, data.engagement_score, data.performance_score]])
240
+ result = label_enc.inverse_transform(prediction)
241
+ return {'retention_risk': result[0]}
242
+ except Exception as e:
243
+ raise HTTPException(status_code=500, detail=str(e))
244
+
245
+ @router.post('/predict/training')
246
+ def predict_training(data: TrainingRequest):
247
+ """
248
+ Predict training success.
249
+
250
+ Args:
251
+ data (TrainingRequest): Training model inputs.
252
+
253
+ Returns:
254
+ dict: Predicted training success.
255
+ """
256
+ try:
257
+ prediction = training_model.predict([[data.training_type, data.training_duration, data.training_cost]])
258
+ result = label_enc.inverse_transform(prediction)
259
+ return {'training_success': result[0]}
260
  except Exception as e:
261
  raise HTTPException(status_code=500, detail=str(e))
models/schemas.py β†’ schemas.py RENAMED
@@ -8,3 +8,25 @@ class HRAnalysis(BaseModel):
8
  Performance_Score: Optional[int]
9
  Training_Program_Name: Optional[str]
10
  Training_Outcome: Optional[str]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  Performance_Score: Optional[int]
9
  Training_Program_Name: Optional[str]
10
  Training_Outcome: Optional[str]
11
+
12
+
13
+ class SatisfactionRequest(BaseModel):
14
+ engagement_score: int
15
+ work_life_balance_score: int
16
+ performance_score: int
17
+
18
+ class PerformanceRequest(BaseModel):
19
+ satisfaction_score: int
20
+ engagement_score: int
21
+ training_duration: int
22
+ training_cost: float
23
+
24
+ class RetentionRequest(BaseModel):
25
+ satisfaction_score: int
26
+ engagement_score: int
27
+ performance_score: int
28
+
29
+ class TrainingRequest(BaseModel):
30
+ training_type: int
31
+ training_duration: int
32
+ training_cost: float
utils/__pycache__/load_models.cpython-313.pyc ADDED
Binary file (1.18 kB). View file
 
utils/load_models.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/utils/load_models.py
2
+ import joblib
3
+ import os
4
+
5
+ models_path = os.path.join(os.path.dirname(__file__), '../models')
6
+
7
+ # Load models
8
+ satisfaction_model = joblib.load(os.path.join(models_path, 'satisfaction_model.pkl'))
9
+ performance_model = joblib.load(os.path.join(models_path, 'performance_model.pkl'))
10
+ retention_model = joblib.load(os.path.join(models_path, 'retention_model.pkl'))
11
+ training_model = joblib.load(os.path.join(models_path, 'training_model.pkl'))
12
+
13
+ # Load label encoder
14
+ label_enc = joblib.load(os.path.join(models_path, 'label_encoder.pkl'))