agentsay commited on
Commit
85393ff
·
verified ·
1 Parent(s): db8b5c5

Update modelLoanAPI.py

Browse files
Files changed (1) hide show
  1. modelLoanAPI.py +30 -259
modelLoanAPI.py CHANGED
@@ -1,279 +1,50 @@
1
- # ```python
2
  from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel
4
  import pandas as pd
5
- import numpy as np
6
- from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
7
- from sklearn.preprocessing import LabelEncoder, StandardScaler
8
- from sklearn.metrics import accuracy_score
9
- import matplotlib.pyplot as plt
10
- import json
11
- import base64
12
- from io import BytesIO
13
- import warnings
14
-
15
- warnings.filterwarnings("ignore")
16
 
17
  app = FastAPI()
18
 
 
19
  class WorkerIdRequest(BaseModel):
20
  worker_id: int
21
 
 
 
 
 
 
 
 
 
 
 
 
22
  @app.post("/predict_worker_earnings/")
23
  async def predict_worker_earnings(request: WorkerIdRequest):
24
  try:
25
  worker_id = request.worker_id
26
- # Initialize result dictionary
27
- results = {
28
- 'worker_id': worker_id,
29
- 'classification_metrics': {},
30
- 'worker_profile': {},
31
- 'plot': ''
32
- }
33
-
34
- # Load data
35
- try:
36
- df = pd.read_csv('/app/extended_worker_dataset.csv')
37
- except FileNotFoundError:
38
- raise HTTPException(status_code=500, detail="CSV file not found at /app/data/extended_worker_dataset.csv")
39
- except Exception as e:
40
- raise HTTPException(status_code=500, detail=f"Error reading CSV file: {str(e)}")
41
-
42
- # Filter for one worker_id
43
- df = df[df['worker_id'] == worker_id].copy()
44
- if df.empty:
45
- raise HTTPException(status_code=404, detail=f"No data found for worker_id {worker_id}")
46
-
47
- # Data preprocessing
48
- try:
49
- df['timestamp'] = pd.to_datetime(df['timestamp'])
50
- except Exception as e:
51
- raise HTTPException(status_code=500, detail=f"Error converting timestamp: {str(e)}")
52
-
53
- df['has_job'] = (df['job_type'] != "No Job").astype(int)
54
-
55
- try:
56
- wage_cap = df[df['contracted_wage'] > 0]['contracted_wage'].quantile(0.90)
57
- if np.isnan(wage_cap) or wage_cap <= 500:
58
- raise ValueError("Invalid wage cap calculated")
59
- df['contracted_wage'] = df['contracted_wage'].clip(lower=500, upper=wage_cap)
60
- except Exception as e:
61
- raise HTTPException(status_code=500, detail=f"Error processing wage data: {str(e)}")
62
-
63
- # Encode job_type
64
- le = LabelEncoder()
65
- try:
66
- df['job_type_encoded'] = le.fit_transform(df['job_type'])
67
- except Exception as e:
68
- raise HTTPException(status_code=500, detail=f"Error encoding job_type: {str(e)}")
69
-
70
- # Split data
71
- if len(df) < 2:
72
- raise HTTPException(status_code=400, detail="Insufficient data points for training and testing")
73
- split_point = int(len(df) * 0.8)
74
- train_df = df.iloc[:split_point].copy()
75
- test_df = df.iloc[split_point:].copy()
76
-
77
- # Scale features
78
- scaler = StandardScaler()
79
- try:
80
- train_df[['job_type_scaled', 'years_exp_scaled']] = scaler.fit_transform(
81
- train_df[['job_type_encoded', 'years_of_experience']]
82
- )
83
- train_df['job_exp_interaction'] = train_df['job_type_scaled'] * train_df['years_exp_scaled']
84
- except Exception as e:
85
- raise HTTPException(status_code=500, detail=f"Error scaling features: {str(e)}")
86
-
87
- for subset in [train_df, test_df]:
88
- subset['dayofweek'] = subset['timestamp'].dt.dayofweek
89
- subset['month'] = subset['timestamp'].dt.month
90
- subset['year'] = subset['timestamp'].dt.year
91
- subset['dayofyear'] = subset['timestamp'].dt.dayofyear
92
- subset['is_weekend'] = subset['dayofweek'].isin([5, 6]).astype(int)
93
-
94
- # Train classifier
95
- X_train_class = train_df[['dayofweek', 'month', 'year', 'dayofyear',
96
- 'is_weekend', 'job_type_encoded', 'feedback_score',
97
- 'years_of_experience']]
98
- y_train_class = train_df['has_job']
99
-
100
- try:
101
- classifier = RandomForestClassifier(
102
- n_estimators=500, max_depth=12, min_samples_split=5, random_state=42
103
- )
104
- classifier.fit(X_train_class, y_train_class)
105
- except Exception as e:
106
- raise HTTPException(status_code=500, detail=f"Error training classifier: {str(e)}")
107
-
108
- # Train regressor
109
- train_df_reg = train_df[train_df['has_job'] == 1].copy()
110
- if train_df_reg.empty:
111
- raise HTTPException(status_code=404, detail="No data available for regression (all has_job == 0)")
112
-
113
- X_train_reg = train_df_reg[['dayofweek', 'month', 'year', 'dayofyear',
114
- 'is_weekend', 'job_type_scaled', 'feedback_score',
115
- 'years_exp_scaled', 'job_exp_interaction']]
116
- y_train_reg = train_df_reg['contracted_wage']
117
-
118
- try:
119
- regressor = RandomForestRegressor(
120
- n_estimators=300, max_depth=10, min_samples_split=4, random_state=42
121
- )
122
- regressor.fit(X_train_reg, y_train_reg)
123
- except Exception as e:
124
- raise HTTPException(status_code=500, detail=f"Error training regressor: {str(e)}")
125
-
126
- # Prepare future dataframe
127
- future_df = test_df[['timestamp', 'job_type', 'job_type_encoded',
128
- 'feedback_score', 'years_of_experience']].rename(columns={'timestamp': 'ds'})
129
 
130
- future_df['dayofweek'] = future_df['ds'].dt.dayofweek
131
- future_df['month'] = future_df['ds'].dt.month
132
- future_df['year'] = future_df['ds'].dt.year
133
- future_df['dayofyear'] = future_df['ds'].dt.dayofyear
134
- future_df['is_weekend'] = future_df['dayofweek'].isin([5, 6]).astype(int)
135
 
136
- try:
137
- future_df[['job_type_scaled', 'years_exp_scaled']] = scaler.transform(
138
- future_df[['job_type_encoded', 'years_of_experience']]
139
- )
140
- future_df['job_exp_interaction'] = future_df['job_type_scaled'] * future_df['years_exp_scaled']
141
- except Exception as e:
142
- raise HTTPException(status_code=500, detail=f"Error transforming future dataframe: {str(e)}")
143
 
144
- # Predict job/no-job
145
- try:
146
- future_df['has_job_predicted'] = classifier.predict(
147
- future_df[['dayofweek', 'month', 'year', 'dayofyear',
148
- 'is_weekend', 'job_type_encoded', 'feedback_score',
149
- 'years_of_experience']]
150
- )
151
- except Exception as e:
152
- raise HTTPException(status_code=500, detail=f"Error predicting has_job: {str(e)}")
153
-
154
- # Evaluate classifier accuracy
155
- test_df['has_job'] = (test_df['job_type'] != "No Job").astype(int)
156
- try:
157
- acc = accuracy_score(test_df['has_job'], future_df['has_job_predicted'])
158
- results['classification_metrics']['accuracy'] = round(acc * 100, 2)
159
- except Exception as e:
160
- raise HTTPException(status_code=500, detail=f"Error calculating accuracy: {str(e)}")
161
-
162
- # Predict wages
163
- try:
164
- future_df['yhat'] = regressor.predict(
165
- future_df[['dayofweek', 'month', 'year', 'dayofyear',
166
- 'is_weekend', 'job_type_scaled', 'feedback_score',
167
- 'years_exp_scaled', 'job_exp_interaction']]
168
- )
169
- except Exception as e:
170
- raise HTTPException(status_code=500, detail=f"Error predicting wages: {str(e)}")
171
-
172
- # Apply job prediction mask
173
- final_forecast_df = future_df.copy()
174
- final_forecast_df['yhat'] = np.where(final_forecast_df['has_job_predicted'] == 0, 0, final_forecast_df['yhat'])
175
- final_forecast_df['yhat'] = np.minimum(final_forecast_df['yhat'], wage_cap)
176
-
177
- # Uncertainty intervals
178
- try:
179
- predictions = regressor.predict(X_train_reg)
180
- std_dev = np.std([tree.predict(X_train_reg) for tree in regressor.estimators_], axis=0)
181
- future_df['yhat_lower'] = np.maximum(final_forecast_df['yhat'] - 1.96 * std_dev.mean(), 0)
182
- future_df['yhat_upper'] = final_forecast_df['yhat'] + 1.96 * std_dev.mean()
183
- final_forecast_df['yhat_lower'] = np.where(final_forecast_df['has_job_predicted'] == 0, 0, future_df['yhat_lower'])
184
- final_forecast_df['yhat_upper'] = np.where(final_forecast_df['has_job_predicted'] == 0, 0, future_df['yhat_upper'])
185
- except Exception as e:
186
- raise HTTPException(status_code=500, detail=f"Error calculating uncertainty intervals: {str(e)}")
187
-
188
- # Evaluation
189
- try:
190
- comparison_df = pd.merge(
191
- test_df[['timestamp', 'contracted_wage']].rename(columns={'timestamp': 'ds', 'contracted_wage': 'y'}),
192
- final_forecast_df[['ds', 'yhat', 'yhat_lower', 'yhat_upper']], on='ds'
193
- )
194
-
195
- valid_comparison_df = comparison_df[comparison_df['y'] > 0]
196
- if not valid_comparison_df.empty:
197
- weights = valid_comparison_df['y'] / valid_comparison_df['y'].mean()
198
- mae = np.average([abs(a - p) for a, p in zip(valid_comparison_df['y'], valid_comparison_df['yhat'])], weights=weights)
199
- mape = np.average([abs((a - p) / a) * 100 for a, p in zip(valid_comparison_df['y'], valid_comparison_df['yhat'])], weights=weights)
200
- else:
201
- mae = np.nan
202
- mape = np.nan
203
-
204
- results['classification_metrics']['mae'] = round(mae, 2) if not np.isnan(mae) else None
205
- results['classification_metrics']['mape'] = round(mape, 2) if not np.isnan(mape) else None
206
- except Exception as e:
207
- raise HTTPException(status_code=500, detail=f"Error evaluating predictions: {str(e)}")
208
-
209
- # Plot results
210
- try:
211
- plt.figure(figsize=(12, 6))
212
- plt.plot(comparison_df['ds'], comparison_df['y'], 'o-', label='Actual Values', markersize=4)
213
- plt.plot(comparison_df['ds'], comparison_df['yhat'], '-', label='Forecasted Values')
214
- plt.fill_between(comparison_df['ds'], comparison_df['yhat_lower'], comparison_df['yhat_upper'],
215
- color='gray', alpha=0.2, label='Uncertainty Interval')
216
- plt.title('Actual vs. Forecasted Daily Earnings (Last 20% of Dataset)')
217
- plt.xlabel('Date')
218
- plt.ylabel('Contracted Wage')
219
- plt.legend()
220
- plt.grid(True)
221
- plt.xticks(rotation=45)
222
- plt.tight_layout()
223
-
224
- buffer = BytesIO()
225
- plt.savefig(buffer, format='png')
226
- buffer.seek(0)
227
- plot_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
228
- results['plot'] = f'data:image/png;base64,{plot_base64}'
229
- plt.close()
230
- except Exception as e:
231
- raise HTTPException(status_code=500, detail=f"Error generating plot: {str(e)}")
232
-
233
- # Worker Profile for Microfinance
234
- try:
235
- worker_data = df.copy()
236
-
237
- avg_daily_earning = worker_data[worker_data['contracted_wage'] > 0]['contracted_wage'].mean()
238
- avg_monthly_earning = avg_daily_earning * 30 if not np.isnan(avg_daily_earning) else 0
239
-
240
- job_distribution = worker_data['job_type'].value_counts(normalize=True) * 100
241
-
242
- avg_feedback = worker_data['feedback_score'].mean()
243
-
244
- workholic_index = job_distribution.drop(labels=['No Job'], errors='ignore').sum() / 100
245
-
246
- if avg_daily_earning > 0:
247
- earning_stability = worker_data[worker_data['contracted_wage'] > 0]['contracted_wage'].std() / avg_daily_earning
248
- else:
249
- earning_stability = np.nan
250
-
251
- results['worker_profile'] = {
252
- 'average_daily_earning': round(avg_daily_earning, 2) if not np.isnan(avg_daily_earning) else None,
253
- 'estimated_monthly_earning': round(avg_monthly_earning, 2) if not np.isnan(avg_monthly_earning) else None,
254
- 'job_distribution': job_distribution.round(2).to_dict(),
255
- 'average_feedback_score': round(avg_feedback, 2) if not np.isnan(avg_feedback) else None,
256
- 'workholic_index': round(workholic_index, 2) if not np.isnan(workholic_index) else None,
257
- 'earning_stability': round(earning_stability, 2) if not np.isnan(earning_stability) else None
258
- }
259
- except Exception as e:
260
- raise HTTPException(status_code=500, detail=f"Error generating worker profile: {str(e)}")
261
-
262
- def convert_to_serializable(obj):
263
- if isinstance(obj, np.floating):
264
- return float(obj)
265
- if isinstance(obj, np.integer):
266
- return int(obj)
267
- if isinstance(obj, np.ndarray):
268
- return obj.tolist()
269
- return obj
270
 
271
- return json.loads(json.dumps(results, default=convert_to_serializable))
 
272
 
273
  except Exception as e:
 
 
 
274
  raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
275
-
276
- if __name__ == "__main__":
277
- import uvicorn
278
- uvicorn.run(app, host="0.0.0.0", port=8000)
279
- # ```
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  import pandas as pd
4
+ import traceback
 
 
 
 
 
 
 
 
 
 
5
 
6
  app = FastAPI()
7
 
8
+ # Request model
9
  class WorkerIdRequest(BaseModel):
10
  worker_id: int
11
 
12
+ # Load dataset once on startup
13
+ CSV_PATH = "/app/extended_worker_dataset.csv" # adjust if you use /app/data
14
+
15
+ try:
16
+ df = pd.read_csv(CSV_PATH)
17
+ except Exception as e:
18
+ print("==== ERROR LOADING CSV ====")
19
+ print(str(e))
20
+ traceback.print_exc()
21
+ raise RuntimeError(f"Failed to load dataset from {CSV_PATH}: {str(e)}")
22
+
23
  @app.post("/predict_worker_earnings/")
24
  async def predict_worker_earnings(request: WorkerIdRequest):
25
  try:
26
  worker_id = request.worker_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Ensure worker exists
29
+ if worker_id not in df['worker_id'].values:
30
+ raise HTTPException(status_code=404, detail=f"Worker ID {worker_id} not found")
 
 
31
 
32
+ # Dummy earnings calculation (replace with ML model later)
33
+ worker_data = df[df['worker_id'] == worker_id].iloc[0]
34
+ base_salary = worker_data.get("base_salary", 10000)
35
+ experience_years = worker_data.get("experience_years", 1)
36
+ predicted_earnings = base_salary + (experience_years * 500)
 
 
37
 
38
+ return {
39
+ "worker_id": worker_id,
40
+ "predicted_earnings": predicted_earnings
41
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ except HTTPException:
44
+ raise # let FastAPI handle cleanly
45
 
46
  except Exception as e:
47
+ print("==== SERVER ERROR ====")
48
+ print(str(e))
49
+ traceback.print_exc()
50
  raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")