agentbaba commited on
Commit
0fb85a0
·
verified ·
1 Parent(s): c316e15

Upload data_pipeline/inference.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. data_pipeline/inference.py +76 -7
data_pipeline/inference.py CHANGED
@@ -57,15 +57,34 @@ class CopilotModels:
57
  if self._loaded:
58
  return
59
  print("Loading Copilot models...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- self.failure_model = joblib.load(ARTIFACTS / "failure_predictor.pkl")
62
- self.scaler = joblib.load(ARTIFACTS / "feature_scaler.pkl")
63
- self.style_model = joblib.load(ARTIFACTS / "work_style_classifier.pkl")
64
- self.style_encoder = joblib.load(ARTIFACTS / "work_style_label_encoder.pkl")
65
- self.distraction_model = joblib.load(ARTIFACTS / "distraction_scorer.pkl")
66
 
67
- with open(FEATURE_JSON) as f:
68
- self._features = json.load(f)
 
 
69
 
70
  self._load_rag()
71
  self._loaded = True
@@ -134,6 +153,21 @@ class CopilotModels:
134
  "study_hours_weekly": 20,
135
  }
136
  row = {**DEFAULTS, **user_data}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  X = np.array([[row.get(f, DEFAULTS.get(f, 0)) for f in self.failure_features]])
138
  X_scaled = self.scaler.transform(X)
139
 
@@ -163,6 +197,28 @@ class CopilotModels:
163
  "deadline_days_remaining": 3,
164
  }
165
  row = {**DEFAULTS, **user_data}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  X = np.array([[row.get(f, DEFAULTS.get(f, 0)) for f in self.style_features]])
167
  pred = self.style_model.predict(X)[0]
168
  proba = self.style_model.predict_proba(X)[0]
@@ -187,6 +243,19 @@ class CopilotModels:
187
  "focus_score": 0.65,
188
  }
189
  row = {**DEFAULTS, **user_data}
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  X = np.array([[row.get(f, DEFAULTS.get(f, 0)) for f in self.distraction_features]])
191
  score = float(np.clip(self.distraction_model.predict(X)[0], 0, 1))
192
 
 
57
  if self._loaded:
58
  return
59
  print("Loading Copilot models...")
60
+ self.failure_model = None
61
+ self.scaler = None
62
+ self.style_model = None
63
+ self.style_encoder = None
64
+ self.distraction_model = None
65
+ self._features = {
66
+ "failure_predictor": {"features": []},
67
+ "work_style_classifier": {"features": []},
68
+ "distraction_scorer": {"features": []},
69
+ }
70
+ self._collection = None
71
+ self._embedder = None
72
+ self._ml_available = False
73
+
74
+ try:
75
+ self.failure_model = joblib.load(ARTIFACTS / "failure_predictor.pkl")
76
+ self.scaler = joblib.load(ARTIFACTS / "feature_scaler.pkl")
77
+ self.style_model = joblib.load(ARTIFACTS / "work_style_classifier.pkl")
78
+ self.style_encoder = joblib.load(ARTIFACTS / "work_style_label_encoder.pkl")
79
+ self.distraction_model = joblib.load(ARTIFACTS / "distraction_scorer.pkl")
80
 
81
+ with open(FEATURE_JSON) as f:
82
+ self._features = json.load(f)
 
 
 
83
 
84
+ self._ml_available = True
85
+ print("Loaded ML artifacts.")
86
+ except Exception as e:
87
+ print(f"Warning: ML artifacts unavailable, using heuristic fallbacks. Error: {e}")
88
 
89
  self._load_rag()
90
  self._loaded = True
 
153
  "study_hours_weekly": 20,
154
  }
155
  row = {**DEFAULTS, **user_data}
156
+ if not self._ml_available or self.failure_model is None or self.scaler is None or not self.failure_features:
157
+ stress = float(row["stress_level"]) / 10.0
158
+ distractions = min(float(row["distraction_events"]) / 20.0, 1.0)
159
+ deadline_pressure = max(0.0, 1.0 - min(float(row["deadline_days_remaining"]) / 3.0, 1.0))
160
+ motivation = 1.0 - min(float(row["motivation_level"]) / 10.0, 1.0)
161
+ risk_score = max(0.0, min(1.0, 0.35 * stress + 0.25 * distractions + 0.25 * deadline_pressure + 0.15 * motivation))
162
+ return {
163
+ "failure_probability": round(risk_score, 4),
164
+ "risk_level": (
165
+ "high" if risk_score >= 0.65 else
166
+ "medium" if risk_score >= 0.40 else
167
+ "low"
168
+ ),
169
+ "should_intervene": risk_score >= 0.65,
170
+ }
171
  X = np.array([[row.get(f, DEFAULTS.get(f, 0)) for f in self.failure_features]])
172
  X_scaled = self.scaler.transform(X)
173
 
 
197
  "deadline_days_remaining": 3,
198
  }
199
  row = {**DEFAULTS, **user_data}
200
+ if not self._ml_available or self.style_model is None or self.style_encoder is None or not self.style_features:
201
+ stress = float(row["stress_level"])
202
+ distraction_events = float(row["distraction_events"])
203
+ completion = float(row["previous_completion_rate"])
204
+ if stress <= 4 and completion >= 0.8:
205
+ label = "turtle"
206
+ confidence = 0.72
207
+ elif distraction_events >= 8 and stress >= 6:
208
+ label = "hare"
209
+ confidence = 0.68
210
+ else:
211
+ label = "hybrid"
212
+ confidence = 0.64
213
+ return {
214
+ "work_style": label,
215
+ "confidence": confidence,
216
+ "scores": {
217
+ "turtle": 0.2 if label != "turtle" else confidence,
218
+ "hare": 0.2 if label != "hare" else confidence,
219
+ "hybrid": 0.2 if label != "hybrid" else confidence,
220
+ },
221
+ }
222
  X = np.array([[row.get(f, DEFAULTS.get(f, 0)) for f in self.style_features]])
223
  pred = self.style_model.predict(X)[0]
224
  proba = self.style_model.predict_proba(X)[0]
 
243
  "focus_score": 0.65,
244
  }
245
  row = {**DEFAULTS, **user_data}
246
+ if not self._ml_available or self.distraction_model is None or not self.distraction_features:
247
+ distractions = min(float(row["distraction_events"]) / 20.0, 1.0)
248
+ social = min(float(row["social_media_minutes_before"]) / 120.0, 1.0)
249
+ focus = 1.0 - min(max(float(row["focus_score"]), 0.0), 1.0)
250
+ score = max(0.0, min(1.0, 0.45 * distractions + 0.35 * social + 0.20 * focus))
251
+ return {
252
+ "distraction_score": round(score, 4),
253
+ "level": (
254
+ "high" if score >= 0.65 else
255
+ "medium" if score >= 0.35 else
256
+ "low"
257
+ ),
258
+ }
259
  X = np.array([[row.get(f, DEFAULTS.get(f, 0)) for f in self.distraction_features]])
260
  score = float(np.clip(self.distraction_model.predict(X)[0], 0, 1))
261