Spaces:
Running
Running
Pulastya B commited on
Commit Β·
4a3a3e8
1
Parent(s): 6f57124
Fix: Better SBERT error handling + suppress invalid hand-off warnings
Browse files- src/orchestrator.py +55 -18
- src/tools/model_training.py +37 -2
- src/utils/semantic_layer.py +3 -1
src/orchestrator.py
CHANGED
|
@@ -1316,7 +1316,7 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
|
|
| 1316 |
Dictionary with hand-off details
|
| 1317 |
"""
|
| 1318 |
if target_agent not in self.specialist_agents:
|
| 1319 |
-
|
| 1320 |
return {"success": False, "error": "Invalid target agent"}
|
| 1321 |
|
| 1322 |
# Update active agent
|
|
@@ -1999,6 +1999,11 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
|
|
| 1999 |
models_val = arguments.pop("models")
|
| 2000 |
print(f" β Stripped invalid parameter 'models': {models_val}")
|
| 2001 |
print(f" βΉοΈ train_baseline_models trains all baseline models automatically")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2002 |
|
| 2003 |
if tool_name == "generate_model_report":
|
| 2004 |
# LLM uses 'file_path' instead of 'test_data_path'
|
|
@@ -3480,25 +3485,57 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
|
|
| 3480 |
print(f"β οΈ INVALID TOOL NAME: '{tool_name}' (original: {tool_call.function.name})")
|
| 3481 |
print(f" Available tools: {', '.join(list(self.tool_functions.keys())[:10])}...")
|
| 3482 |
|
| 3483 |
-
#
|
| 3484 |
-
|
| 3485 |
-
|
| 3486 |
-
|
| 3487 |
-
|
| 3488 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3489 |
else:
|
| 3490 |
-
|
| 3491 |
-
|
| 3492 |
-
|
| 3493 |
-
|
| 3494 |
-
|
| 3495 |
-
"
|
| 3496 |
-
|
| 3497 |
-
|
| 3498 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3499 |
})
|
| 3500 |
-
|
| 3501 |
-
continue
|
| 3502 |
|
| 3503 |
# CRITICAL FIX 3: Check for corrupted tool names (length check)
|
| 3504 |
if len(str(tool_call.function.name)) > 100:
|
|
|
|
| 1316 |
Dictionary with hand-off details
|
| 1317 |
"""
|
| 1318 |
if target_agent not in self.specialist_agents:
|
| 1319 |
+
# Silently skip invalid hand-off targets (common during workflow transitions)
|
| 1320 |
return {"success": False, "error": "Invalid target agent"}
|
| 1321 |
|
| 1322 |
# Update active agent
|
|
|
|
| 1999 |
models_val = arguments.pop("models")
|
| 2000 |
print(f" β Stripped invalid parameter 'models': {models_val}")
|
| 2001 |
print(f" βΉοΈ train_baseline_models trains all baseline models automatically")
|
| 2002 |
+
# LLM often adds 'feature_columns' parameter that doesn't exist
|
| 2003 |
+
if "feature_columns" in arguments:
|
| 2004 |
+
feature_cols = arguments.pop("feature_columns")
|
| 2005 |
+
print(f" β Stripped invalid parameter 'feature_columns': {feature_cols}")
|
| 2006 |
+
print(f" βΉοΈ train_baseline_models uses all numeric columns automatically")
|
| 2007 |
|
| 2008 |
if tool_name == "generate_model_report":
|
| 2009 |
# LLM uses 'file_path' instead of 'test_data_path'
|
|
|
|
| 3485 |
print(f"β οΈ INVALID TOOL NAME: '{tool_name}' (original: {tool_call.function.name})")
|
| 3486 |
print(f" Available tools: {', '.join(list(self.tool_functions.keys())[:10])}...")
|
| 3487 |
|
| 3488 |
+
# Explicit mappings for common LLM hallucinations
|
| 3489 |
+
tool_name_mappings = {
|
| 3490 |
+
"drop_columns": "execute_python_code", # No drop_columns tool, use code
|
| 3491 |
+
"select_columns": "execute_python_code", # No select_columns tool, use code
|
| 3492 |
+
"rename_columns": "execute_python_code", # No rename_columns tool, use code
|
| 3493 |
+
"encode_categorical_variables": "encode_categorical",
|
| 3494 |
+
"train_model": "train_baseline_models",
|
| 3495 |
+
"train_models": "train_baseline_models",
|
| 3496 |
+
"baseline_models": "train_baseline_models",
|
| 3497 |
+
"tune_hyperparameters": "hyperparameter_tuning",
|
| 3498 |
+
"hyperparameter_search": "hyperparameter_tuning",
|
| 3499 |
+
}
|
| 3500 |
+
|
| 3501 |
+
if tool_name in tool_name_mappings:
|
| 3502 |
+
mapped_tool = tool_name_mappings[tool_name]
|
| 3503 |
+
if mapped_tool == "execute_python_code":
|
| 3504 |
+
print(f" β Tool '{tool_name}' not available - LLM should use execute_python_code instead")
|
| 3505 |
+
# Skip and let LLM handle with code
|
| 3506 |
+
messages.append({
|
| 3507 |
+
"role": "tool",
|
| 3508 |
+
"tool_call_id": tool_call_id,
|
| 3509 |
+
"name": tool_name,
|
| 3510 |
+
"content": json.dumps({
|
| 3511 |
+
"error": f"Tool '{tool_name}' does not exist",
|
| 3512 |
+
"hint": "Use execute_python_code with pandas to perform this operation. Example: df.drop(columns=['col1', 'col2'])"
|
| 3513 |
+
})
|
| 3514 |
+
})
|
| 3515 |
+
continue
|
| 3516 |
+
else:
|
| 3517 |
+
tool_name = mapped_tool
|
| 3518 |
+
print(f" β Mapped to: {tool_name}")
|
| 3519 |
else:
|
| 3520 |
+
# Try fuzzy matching to recover
|
| 3521 |
+
from difflib import get_close_matches
|
| 3522 |
+
close_matches = get_close_matches(tool_name, self.tool_functions.keys(), n=1, cutoff=0.6)
|
| 3523 |
+
if close_matches:
|
| 3524 |
+
tool_name = close_matches[0]
|
| 3525 |
+
print(f" β Recovered using fuzzy match: {tool_name}")
|
| 3526 |
+
else:
|
| 3527 |
+
print(f" β Cannot recover tool name, skipping")
|
| 3528 |
+
messages.append({
|
| 3529 |
+
"role": "tool",
|
| 3530 |
+
"tool_call_id": tool_call_id,
|
| 3531 |
+
"name": "invalid_tool",
|
| 3532 |
+
"content": json.dumps({
|
| 3533 |
+
"error": f"Invalid tool: {tool_call.function.name}",
|
| 3534 |
+
"message": "Tool does not exist in registry. Available tools can be found in the tools list.",
|
| 3535 |
+
"hint": "Check spelling and use exact tool names from the tools registry."
|
| 3536 |
+
})
|
| 3537 |
})
|
| 3538 |
+
continue
|
|
|
|
| 3539 |
|
| 3540 |
# CRITICAL FIX 3: Check for corrupted tool names (length check)
|
| 3541 |
if len(str(tool_call.function.name)) > 100:
|
src/tools/model_training.py
CHANGED
|
@@ -128,6 +128,12 @@ def train_baseline_models(file_path: str, target_col: str,
|
|
| 128 |
}
|
| 129 |
|
| 130 |
# Train models based on task type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
if task_type == "classification":
|
| 132 |
models = {
|
| 133 |
"logistic_regression": LogisticRegression(max_iter=1000, random_state=random_state),
|
|
@@ -137,10 +143,17 @@ def train_baseline_models(file_path: str, target_col: str,
|
|
| 137 |
"catboost": CatBoostClassifier(iterations=100, random_state=random_state, verbose=0, allow_writing_files=False)
|
| 138 |
}
|
| 139 |
|
| 140 |
-
for model_name, model in models.items():
|
| 141 |
try:
|
| 142 |
# Train
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
model.fit(X_train, y_train)
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
# Predict
|
| 146 |
y_pred_train = model.predict(X_train)
|
|
@@ -206,10 +219,18 @@ def train_baseline_models(file_path: str, target_col: str,
|
|
| 206 |
"catboost": CatBoostRegressor(iterations=100, random_state=random_state, verbose=0, allow_writing_files=False)
|
| 207 |
}
|
| 208 |
|
| 209 |
-
for model_name, model in models.items():
|
| 210 |
try:
|
| 211 |
# Train
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
model.fit(X_train, y_train)
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
# Predict
|
| 215 |
y_pred_train = model.predict(X_train)
|
|
@@ -358,6 +379,20 @@ def train_baseline_models(file_path: str, target_col: str,
|
|
| 358 |
else:
|
| 359 |
results["visualization_generated"] = False
|
| 360 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
return results
|
| 362 |
|
| 363 |
|
|
|
|
| 128 |
}
|
| 129 |
|
| 130 |
# Train models based on task type
|
| 131 |
+
import sys
|
| 132 |
+
print(f"\nπ Training {6 if task_type == 'classification' else 6} baseline models...", flush=True)
|
| 133 |
+
print(f" π Training set: {len(X_train):,} samples Γ {X_train.shape[1]} features", flush=True)
|
| 134 |
+
print(f" π Test set: {len(X_test):,} samples", flush=True)
|
| 135 |
+
sys.stdout.flush()
|
| 136 |
+
|
| 137 |
if task_type == "classification":
|
| 138 |
models = {
|
| 139 |
"logistic_regression": LogisticRegression(max_iter=1000, random_state=random_state),
|
|
|
|
| 143 |
"catboost": CatBoostClassifier(iterations=100, random_state=random_state, verbose=0, allow_writing_files=False)
|
| 144 |
}
|
| 145 |
|
| 146 |
+
for idx, (model_name, model) in enumerate(models.items(), 1):
|
| 147 |
try:
|
| 148 |
# Train
|
| 149 |
+
print(f"\n [{idx}/{len(models)}] Training {model_name}...", flush=True)
|
| 150 |
+
sys.stdout.flush()
|
| 151 |
+
import time
|
| 152 |
+
start_time = time.time()
|
| 153 |
model.fit(X_train, y_train)
|
| 154 |
+
elapsed = time.time() - start_time
|
| 155 |
+
print(f" β {model_name} trained in {elapsed:.1f}s", flush=True)
|
| 156 |
+
sys.stdout.flush()
|
| 157 |
|
| 158 |
# Predict
|
| 159 |
y_pred_train = model.predict(X_train)
|
|
|
|
| 219 |
"catboost": CatBoostRegressor(iterations=100, random_state=random_state, verbose=0, allow_writing_files=False)
|
| 220 |
}
|
| 221 |
|
| 222 |
+
for idx, (model_name, model) in enumerate(models.items(), 1):
|
| 223 |
try:
|
| 224 |
# Train
|
| 225 |
+
import sys
|
| 226 |
+
print(f"\n [{idx}/{len(models)}] Training {model_name}...", flush=True)
|
| 227 |
+
sys.stdout.flush()
|
| 228 |
+
import time
|
| 229 |
+
start_time = time.time()
|
| 230 |
model.fit(X_train, y_train)
|
| 231 |
+
elapsed = time.time() - start_time
|
| 232 |
+
print(f" β {model_name} trained in {elapsed:.1f}s", flush=True)
|
| 233 |
+
sys.stdout.flush()
|
| 234 |
|
| 235 |
# Predict
|
| 236 |
y_pred_train = model.predict(X_train)
|
|
|
|
| 379 |
else:
|
| 380 |
results["visualization_generated"] = False
|
| 381 |
|
| 382 |
+
# Print final summary
|
| 383 |
+
print(f"\n{'='*60}")
|
| 384 |
+
print(f"β
TRAINING COMPLETE")
|
| 385 |
+
print(f"{'='*60}")
|
| 386 |
+
print(f"π Best Model: {best_model_name}")
|
| 387 |
+
if task_type == "regression":
|
| 388 |
+
print(f"π Test RΒ²: {best_score:.4f}")
|
| 389 |
+
print(f"π Test RMSE: {results['models'][best_model_name]['test_metrics']['rmse']:.4f}")
|
| 390 |
+
else:
|
| 391 |
+
print(f"π Test F1: {best_score:.4f}")
|
| 392 |
+
print(f"π Test Accuracy: {results['models'][best_model_name]['test_metrics']['accuracy']:.4f}")
|
| 393 |
+
print(f"πΎ Model saved: {results['best_model']['model_path']}")
|
| 394 |
+
print(f"{'='*60}\\n")
|
| 395 |
+
|
| 396 |
return results
|
| 397 |
|
| 398 |
|
src/utils/semantic_layer.py
CHANGED
|
@@ -56,7 +56,8 @@ class SemanticLayer:
|
|
| 56 |
if self.enabled:
|
| 57 |
try:
|
| 58 |
print(f"π§ Loading SBERT model: {model_name}...")
|
| 59 |
-
|
|
|
|
| 60 |
# Use GPU if available
|
| 61 |
if torch.cuda.is_available():
|
| 62 |
self.model = self.model.to('cuda')
|
|
@@ -65,6 +66,7 @@ class SemanticLayer:
|
|
| 65 |
print("β
SBERT loaded on CPU")
|
| 66 |
except Exception as e:
|
| 67 |
print(f"β οΈ Failed to load SBERT model: {e}")
|
|
|
|
| 68 |
self.enabled = False
|
| 69 |
else:
|
| 70 |
print("β οΈ SBERT semantic layer disabled (missing dependencies)")
|
|
|
|
| 56 |
if self.enabled:
|
| 57 |
try:
|
| 58 |
print(f"π§ Loading SBERT model: {model_name}...")
|
| 59 |
+
# Try loading with trust_remote_code for better compatibility
|
| 60 |
+
self.model = SentenceTransformer(model_name, trust_remote_code=True)
|
| 61 |
# Use GPU if available
|
| 62 |
if torch.cuda.is_available():
|
| 63 |
self.model = self.model.to('cuda')
|
|
|
|
| 66 |
print("β
SBERT loaded on CPU")
|
| 67 |
except Exception as e:
|
| 68 |
print(f"β οΈ Failed to load SBERT model: {e}")
|
| 69 |
+
print(f" Falling back to keyword-based routing (semantic features disabled)")
|
| 70 |
self.enabled = False
|
| 71 |
else:
|
| 72 |
print("β οΈ SBERT semantic layer disabled (missing dependencies)")
|