Pulastya B commited on
Commit
4a3a3e8
Β·
1 Parent(s): 6f57124

Fix: Better SBERT error handling + suppress invalid hand-off warnings

Browse files
src/orchestrator.py CHANGED
@@ -1316,7 +1316,7 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
1316
  Dictionary with hand-off details
1317
  """
1318
  if target_agent not in self.specialist_agents:
1319
- print(f"⚠️ Invalid hand-off target: {target_agent}")
1320
  return {"success": False, "error": "Invalid target agent"}
1321
 
1322
  # Update active agent
@@ -1999,6 +1999,11 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
1999
  models_val = arguments.pop("models")
2000
  print(f" βœ“ Stripped invalid parameter 'models': {models_val}")
2001
  print(f" ℹ️ train_baseline_models trains all baseline models automatically")
 
 
 
 
 
2002
 
2003
  if tool_name == "generate_model_report":
2004
  # LLM uses 'file_path' instead of 'test_data_path'
@@ -3480,25 +3485,57 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
3480
  print(f"⚠️ INVALID TOOL NAME: '{tool_name}' (original: {tool_call.function.name})")
3481
  print(f" Available tools: {', '.join(list(self.tool_functions.keys())[:10])}...")
3482
 
3483
- # Try fuzzy matching to recover
3484
- from difflib import get_close_matches
3485
- close_matches = get_close_matches(tool_name, self.tool_functions.keys(), n=1, cutoff=0.6)
3486
- if close_matches:
3487
- tool_name = close_matches[0]
3488
- print(f" βœ“ Recovered using fuzzy match: {tool_name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3489
  else:
3490
- print(f" ❌ Cannot recover tool name, skipping")
3491
- messages.append({
3492
- "role": "tool",
3493
- "tool_call_id": tool_call_id,
3494
- "name": "invalid_tool",
3495
- "content": json.dumps({
3496
- "error": f"Invalid tool: {tool_call.function.name}",
3497
- "message": "Tool does not exist in registry. Available tools can be found in the tools list.",
3498
- "hint": "Check spelling and use exact tool names from the tools registry."
 
 
 
 
 
 
 
 
3499
  })
3500
- })
3501
- continue
3502
 
3503
  # CRITICAL FIX 3: Check for corrupted tool names (length check)
3504
  if len(str(tool_call.function.name)) > 100:
 
1316
  Dictionary with hand-off details
1317
  """
1318
  if target_agent not in self.specialist_agents:
1319
+ # Silently skip invalid hand-off targets (common during workflow transitions)
1320
  return {"success": False, "error": "Invalid target agent"}
1321
 
1322
  # Update active agent
 
1999
  models_val = arguments.pop("models")
2000
  print(f" βœ“ Stripped invalid parameter 'models': {models_val}")
2001
  print(f" ℹ️ train_baseline_models trains all baseline models automatically")
2002
+ # LLM often adds 'feature_columns' parameter that doesn't exist
2003
+ if "feature_columns" in arguments:
2004
+ feature_cols = arguments.pop("feature_columns")
2005
+ print(f" βœ“ Stripped invalid parameter 'feature_columns': {feature_cols}")
2006
+ print(f" ℹ️ train_baseline_models uses all numeric columns automatically")
2007
 
2008
  if tool_name == "generate_model_report":
2009
  # LLM uses 'file_path' instead of 'test_data_path'
 
3485
  print(f"⚠️ INVALID TOOL NAME: '{tool_name}' (original: {tool_call.function.name})")
3486
  print(f" Available tools: {', '.join(list(self.tool_functions.keys())[:10])}...")
3487
 
3488
+ # Explicit mappings for common LLM hallucinations
3489
+ tool_name_mappings = {
3490
+ "drop_columns": "execute_python_code", # No drop_columns tool, use code
3491
+ "select_columns": "execute_python_code", # No select_columns tool, use code
3492
+ "rename_columns": "execute_python_code", # No rename_columns tool, use code
3493
+ "encode_categorical_variables": "encode_categorical",
3494
+ "train_model": "train_baseline_models",
3495
+ "train_models": "train_baseline_models",
3496
+ "baseline_models": "train_baseline_models",
3497
+ "tune_hyperparameters": "hyperparameter_tuning",
3498
+ "hyperparameter_search": "hyperparameter_tuning",
3499
+ }
3500
+
3501
+ if tool_name in tool_name_mappings:
3502
+ mapped_tool = tool_name_mappings[tool_name]
3503
+ if mapped_tool == "execute_python_code":
3504
+ print(f" βœ“ Tool '{tool_name}' not available - LLM should use execute_python_code instead")
3505
+ # Skip and let LLM handle with code
3506
+ messages.append({
3507
+ "role": "tool",
3508
+ "tool_call_id": tool_call_id,
3509
+ "name": tool_name,
3510
+ "content": json.dumps({
3511
+ "error": f"Tool '{tool_name}' does not exist",
3512
+ "hint": "Use execute_python_code with pandas to perform this operation. Example: df.drop(columns=['col1', 'col2'])"
3513
+ })
3514
+ })
3515
+ continue
3516
+ else:
3517
+ tool_name = mapped_tool
3518
+ print(f" βœ“ Mapped to: {tool_name}")
3519
  else:
3520
+ # Try fuzzy matching to recover
3521
+ from difflib import get_close_matches
3522
+ close_matches = get_close_matches(tool_name, self.tool_functions.keys(), n=1, cutoff=0.6)
3523
+ if close_matches:
3524
+ tool_name = close_matches[0]
3525
+ print(f" βœ“ Recovered using fuzzy match: {tool_name}")
3526
+ else:
3527
+ print(f" ❌ Cannot recover tool name, skipping")
3528
+ messages.append({
3529
+ "role": "tool",
3530
+ "tool_call_id": tool_call_id,
3531
+ "name": "invalid_tool",
3532
+ "content": json.dumps({
3533
+ "error": f"Invalid tool: {tool_call.function.name}",
3534
+ "message": "Tool does not exist in registry. Available tools can be found in the tools list.",
3535
+ "hint": "Check spelling and use exact tool names from the tools registry."
3536
+ })
3537
  })
3538
+ continue
 
3539
 
3540
  # CRITICAL FIX 3: Check for corrupted tool names (length check)
3541
  if len(str(tool_call.function.name)) > 100:
src/tools/model_training.py CHANGED
@@ -128,6 +128,12 @@ def train_baseline_models(file_path: str, target_col: str,
128
  }
129
 
130
  # Train models based on task type
 
 
 
 
 
 
131
  if task_type == "classification":
132
  models = {
133
  "logistic_regression": LogisticRegression(max_iter=1000, random_state=random_state),
@@ -137,10 +143,17 @@ def train_baseline_models(file_path: str, target_col: str,
137
  "catboost": CatBoostClassifier(iterations=100, random_state=random_state, verbose=0, allow_writing_files=False)
138
  }
139
 
140
- for model_name, model in models.items():
141
  try:
142
  # Train
 
 
 
 
143
  model.fit(X_train, y_train)
 
 
 
144
 
145
  # Predict
146
  y_pred_train = model.predict(X_train)
@@ -206,10 +219,18 @@ def train_baseline_models(file_path: str, target_col: str,
206
  "catboost": CatBoostRegressor(iterations=100, random_state=random_state, verbose=0, allow_writing_files=False)
207
  }
208
 
209
- for model_name, model in models.items():
210
  try:
211
  # Train
 
 
 
 
 
212
  model.fit(X_train, y_train)
 
 
 
213
 
214
  # Predict
215
  y_pred_train = model.predict(X_train)
@@ -358,6 +379,20 @@ def train_baseline_models(file_path: str, target_col: str,
358
  else:
359
  results["visualization_generated"] = False
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  return results
362
 
363
 
 
128
  }
129
 
130
  # Train models based on task type
131
+ import sys
132
+ print(f"\nπŸš€ Training {6 if task_type == 'classification' else 6} baseline models...", flush=True)
133
+ print(f" πŸ“Š Training set: {len(X_train):,} samples Γ— {X_train.shape[1]} features", flush=True)
134
+ print(f" πŸ“Š Test set: {len(X_test):,} samples", flush=True)
135
+ sys.stdout.flush()
136
+
137
  if task_type == "classification":
138
  models = {
139
  "logistic_regression": LogisticRegression(max_iter=1000, random_state=random_state),
 
143
  "catboost": CatBoostClassifier(iterations=100, random_state=random_state, verbose=0, allow_writing_files=False)
144
  }
145
 
146
+ for idx, (model_name, model) in enumerate(models.items(), 1):
147
  try:
148
  # Train
149
+ print(f"\n [{idx}/{len(models)}] Training {model_name}...", flush=True)
150
+ sys.stdout.flush()
151
+ import time
152
+ start_time = time.time()
153
  model.fit(X_train, y_train)
154
+ elapsed = time.time() - start_time
155
+ print(f" βœ“ {model_name} trained in {elapsed:.1f}s", flush=True)
156
+ sys.stdout.flush()
157
 
158
  # Predict
159
  y_pred_train = model.predict(X_train)
 
219
  "catboost": CatBoostRegressor(iterations=100, random_state=random_state, verbose=0, allow_writing_files=False)
220
  }
221
 
222
+ for idx, (model_name, model) in enumerate(models.items(), 1):
223
  try:
224
  # Train
225
+ import sys
226
+ print(f"\n [{idx}/{len(models)}] Training {model_name}...", flush=True)
227
+ sys.stdout.flush()
228
+ import time
229
+ start_time = time.time()
230
  model.fit(X_train, y_train)
231
+ elapsed = time.time() - start_time
232
+ print(f" βœ“ {model_name} trained in {elapsed:.1f}s", flush=True)
233
+ sys.stdout.flush()
234
 
235
  # Predict
236
  y_pred_train = model.predict(X_train)
 
379
  else:
380
  results["visualization_generated"] = False
381
 
382
+ # Print final summary
383
+ print(f"\n{'='*60}")
384
+ print(f"βœ… TRAINING COMPLETE")
385
+ print(f"{'='*60}")
386
+ print(f"πŸ“Š Best Model: {best_model_name}")
387
+ if task_type == "regression":
388
+ print(f"πŸ“ˆ Test RΒ²: {best_score:.4f}")
389
+ print(f"πŸ“‰ Test RMSE: {results['models'][best_model_name]['test_metrics']['rmse']:.4f}")
390
+ else:
391
+ print(f"πŸ“ˆ Test F1: {best_score:.4f}")
392
+ print(f"πŸ“‰ Test Accuracy: {results['models'][best_model_name]['test_metrics']['accuracy']:.4f}")
393
+ print(f"πŸ’Ύ Model saved: {results['best_model']['model_path']}")
394
+ print(f"{'='*60}\\n")
395
+
396
  return results
397
 
398
 
src/utils/semantic_layer.py CHANGED
@@ -56,7 +56,8 @@ class SemanticLayer:
56
  if self.enabled:
57
  try:
58
  print(f"🧠 Loading SBERT model: {model_name}...")
59
- self.model = SentenceTransformer(model_name)
 
60
  # Use GPU if available
61
  if torch.cuda.is_available():
62
  self.model = self.model.to('cuda')
@@ -65,6 +66,7 @@ class SemanticLayer:
65
  print("βœ… SBERT loaded on CPU")
66
  except Exception as e:
67
  print(f"⚠️ Failed to load SBERT model: {e}")
 
68
  self.enabled = False
69
  else:
70
  print("⚠️ SBERT semantic layer disabled (missing dependencies)")
 
56
  if self.enabled:
57
  try:
58
  print(f"🧠 Loading SBERT model: {model_name}...")
59
+ # Try loading with trust_remote_code for better compatibility
60
+ self.model = SentenceTransformer(model_name, trust_remote_code=True)
61
  # Use GPU if available
62
  if torch.cuda.is_available():
63
  self.model = self.model.to('cuda')
 
66
  print("βœ… SBERT loaded on CPU")
67
  except Exception as e:
68
  print(f"⚠️ Failed to load SBERT model: {e}")
69
+ print(f" Falling back to keyword-based routing (semantic features disabled)")
70
  self.enabled = False
71
  else:
72
  print("⚠️ SBERT semantic layer disabled (missing dependencies)")