Spaces:

Pulastya0
/

Data-Science-Agent

Running

Pulastya B commited on 22 days ago

Commit

f47adf5

1 Parent(s): f1ace72

Fix hyperparameter_tuning output_path bug: save to both artifact store AND user-requested path

CRITICAL BUG FIX:
- hyperparameter_tuning was saving models to artifact store temp paths (e.g., tmpa4t7tgci.pkl)
- But LLM was looking for models at user-specified output_path (e.g., hyperparameter_tuned_model.pkl)
- This caused generate_model_report and subsequent tools to fail with 'File not found'

SOLUTION:
- Don't overwrite output_path variable with artifact store's temp path
- Save to BOTH locations: artifact store (for internal tracking) AND user-requested path (for LLM)
- Return user-requested path in model_path field so LLM can find it

CHANGES:
1. hyperparameter_tuning(): Save to both artifact store AND output_path
2. ensemble_models() voting/stacking: Save to both locations
3. ensemble_models() blending: Save to both locations

All three functions now:
- Use actual_model_path for artifact store internal path
- Keep output_path unchanged for LLM
- Save model to output_path so LLM can find it
- Return output_path in result's model_path field

This was NOT a streaming bug - tool implementation bug that existed before streaming.

Files changed (1) hide show

src/tools/advanced_training.py +29 -5

src/tools/advanced_training.py CHANGED Viewed

@@ -293,9 +293,11 @@ def hyperparameter_tuning(
         }
     # Save model if output path provided
     if output_path:
         if ARTIFACT_STORE_AVAILABLE:
-            output_path = save_model_with_store(
                 model_data=final_model,
                 filename=os.path.basename(output_path),
                 metadata={
@@ -306,10 +308,15 @@ def hyperparameter_tuning(
                     "test_metrics": test_metrics
                 }
             )
         else:
             os.makedirs(os.path.dirname(output_path), exist_ok=True)
             joblib.dump(final_model, output_path)
-        print(f"💾 Model saved to: {output_path}")
     return {
         'status': 'success',
@@ -512,9 +519,11 @@ def train_ensemble_models(
             }
         # Save for blending
         if output_path:
             if ARTIFACT_STORE_AVAILABLE:
-                output_path = save_model_with_store(
                     model_data={
                         'base_models': dict(base_models),
                         'meta_model': meta_model,
@@ -528,6 +537,13 @@ def train_ensemble_models(
                         "num_base_models": len(base_models)
                     }
                 )
             else:
                 os.makedirs(os.path.dirname(output_path), exist_ok=True)
                 joblib.dump({
@@ -535,6 +551,7 @@ def train_ensemble_models(
                     'meta_model': meta_model,
                     'ensemble_type': 'blending'
                 }, output_path)
         return {
             'status': 'success',
@@ -573,9 +590,11 @@ def train_ensemble_models(
         improvement = ensemble_metrics['r2'] - best_individual_metric
     # Save model
     if output_path:
         if ARTIFACT_STORE_AVAILABLE:
-            output_path = save_model_with_store(
                 model_data=ensemble,
                 filename=os.path.basename(output_path),
                 metadata={
@@ -585,10 +604,15 @@ def train_ensemble_models(
                     "improvement_pct": float(improvement * 100)
                 }
             )
         else:
             os.makedirs(os.path.dirname(output_path), exist_ok=True)
             joblib.dump(ensemble, output_path)
-        print(f"💾 Ensemble model saved to: {output_path}")
     return {
         'status': 'success',

         }
     # Save model if output path provided
+    actual_model_path = None
     if output_path:
         if ARTIFACT_STORE_AVAILABLE:
+            # Save using artifact store (returns internal storage path)
+            actual_model_path = save_model_with_store(
                 model_data=final_model,
                 filename=os.path.basename(output_path),
                 metadata={
                     "test_metrics": test_metrics
                 }
             )
+            # Also save to user-requested path for LLM to find it
+            os.makedirs(os.path.dirname(output_path), exist_ok=True)
+            joblib.dump(final_model, output_path)
+            print(f"💾 Model saved to: {output_path} (artifact store: {actual_model_path})")
         else:
             os.makedirs(os.path.dirname(output_path), exist_ok=True)
             joblib.dump(final_model, output_path)
+            actual_model_path = output_path
+            print(f"💾 Model saved to: {output_path}")
     return {
         'status': 'success',
             }
         # Save for blending
+        actual_model_path = None
         if output_path:
             if ARTIFACT_STORE_AVAILABLE:
+                # Save using artifact store (returns internal storage path)
+                actual_model_path = save_model_with_store(
                     model_data={
                         'base_models': dict(base_models),
                         'meta_model': meta_model,
                         "num_base_models": len(base_models)
                     }
                 )
+                # Also save to user-requested path for LLM to find it
+                os.makedirs(os.path.dirname(output_path), exist_ok=True)
+                joblib.dump({
+                    'base_models': dict(base_models),
+                    'meta_model': meta_model,
+                    'ensemble_type': 'blending'
+                }, output_path)
             else:
                 os.makedirs(os.path.dirname(output_path), exist_ok=True)
                 joblib.dump({
                     'meta_model': meta_model,
                     'ensemble_type': 'blending'
                 }, output_path)
+                actual_model_path = output_path
         return {
             'status': 'success',
         improvement = ensemble_metrics['r2'] - best_individual_metric
     # Save model
+    actual_model_path = None
     if output_path:
         if ARTIFACT_STORE_AVAILABLE:
+            # Save using artifact store (returns internal storage path)
+            actual_model_path = save_model_with_store(
                 model_data=ensemble,
                 filename=os.path.basename(output_path),
                 metadata={
                     "improvement_pct": float(improvement * 100)
                 }
             )
+            # Also save to user-requested path for LLM to find it
+            os.makedirs(os.path.dirname(output_path), exist_ok=True)
+            joblib.dump(ensemble, output_path)
+            print(f"💾 Ensemble model saved to: {output_path} (artifact store: {actual_model_path})")
         else:
             os.makedirs(os.path.dirname(output_path), exist_ok=True)
             joblib.dump(ensemble, output_path)
+            actual_model_path = output_path
+            print(f"💾 Ensemble model saved to: {output_path}")
     return {
         'status': 'success',