Spaces:
Running
Running
Pulastya B
commited on
Commit
·
5fb3ea4
1
Parent(s):
230b136
Fix syntax error: Proper try-except indentation in compression function
Browse files- Fixed indentation of all elif/else blocks inside try block
- Removed duplicate elif statement
- Moved return statement inside try block before except clause
- All compression logic now properly wrapped in defensive try-except
This fixes SyntaxError that prevented container from starting.
- src/orchestrator.py +92 -92
src/orchestrator.py
CHANGED
|
@@ -1134,103 +1134,103 @@ You are a DOER. Complete workflows based on user intent."""
|
|
| 1134 |
"success": True,
|
| 1135 |
"tool": tool_name
|
| 1136 |
}
|
| 1137 |
-
|
| 1138 |
-
# Tool-specific compression rules
|
| 1139 |
-
if tool_name == "profile_dataset":
|
| 1140 |
-
# Original: ~5K tokens with full stats
|
| 1141 |
-
# Compressed: ~200 tokens with key metrics
|
| 1142 |
-
r = result.get("result", {})
|
| 1143 |
-
compressed["summary"] = {
|
| 1144 |
-
"rows": r.get("num_rows"),
|
| 1145 |
-
"cols": r.get("num_columns"),
|
| 1146 |
-
"missing_pct": r.get("missing_percentage"),
|
| 1147 |
-
"numeric_cols": len(r.get("numeric_columns", [])),
|
| 1148 |
-
"categorical_cols": len(r.get("categorical_columns", [])),
|
| 1149 |
-
"file_size_mb": round(r.get("memory_usage_mb", 0), 1),
|
| 1150 |
-
"key_columns": list(r.get("columns", {}).keys())[:5] # First 5 columns only
|
| 1151 |
-
}
|
| 1152 |
-
compressed["next_steps"] = ["clean_missing_values", "detect_data_quality_issues"]
|
| 1153 |
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
"
|
| 1160 |
-
|
| 1161 |
-
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
"
|
| 1169 |
-
|
| 1170 |
-
|
| 1171 |
-
|
| 1172 |
-
|
| 1173 |
-
|
| 1174 |
-
|
| 1175 |
-
|
| 1176 |
-
|
| 1177 |
-
|
| 1178 |
-
|
| 1179 |
-
|
| 1180 |
-
|
| 1181 |
-
|
| 1182 |
-
|
| 1183 |
-
|
| 1184 |
-
|
| 1185 |
-
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1189 |
else:
|
| 1190 |
-
|
| 1191 |
-
|
| 1192 |
-
|
| 1193 |
-
|
| 1194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1195 |
else:
|
| 1196 |
-
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
| 1205 |
-
|
| 1206 |
-
|
| 1207 |
-
|
| 1208 |
-
elif tool_name == "hyperparameter_tuning":
|
| 1209 |
-
r = result.get("result", {})
|
| 1210 |
-
compressed["summary"] = {
|
| 1211 |
-
"best_params": r.get("best_params", {}),
|
| 1212 |
-
"best_score": round(r.get("best_score", 0), 4),
|
| 1213 |
-
"model_type": r.get("model_type"),
|
| 1214 |
-
"trials_completed": r.get("n_trials")
|
| 1215 |
-
}
|
| 1216 |
-
compressed["next_steps"] = ["perform_cross_validation", "generate_model_performance_plots"]
|
| 1217 |
|
| 1218 |
-
else:
|
| 1219 |
-
# Generic compression: Keep only key fields
|
| 1220 |
-
r = result.get("result", {})
|
| 1221 |
-
if isinstance(r, dict):
|
| 1222 |
-
# Extract key fields (common patterns)
|
| 1223 |
-
key_fields = {}
|
| 1224 |
-
for key in ["output_path", "output_file", "status", "message", "success"]:
|
| 1225 |
-
if key in r:
|
| 1226 |
-
key_fields[key] = r[key]
|
| 1227 |
-
compressed["summary"] = key_fields or {"result": "completed"}
|
| 1228 |
-
else:
|
| 1229 |
-
compressed["summary"] = {"result": str(r)[:200] if r else "completed"}
|
| 1230 |
-
compressed["next_steps"] = ["Continue workflow"]
|
| 1231 |
-
|
| 1232 |
return compressed
|
| 1233 |
-
|
| 1234 |
except Exception as e:
|
| 1235 |
# If compression fails, return minimal safe result
|
| 1236 |
print(f"⚠️ Compression failed for {tool_name}: {str(e)}")
|
|
|
|
| 1134 |
"success": True,
|
| 1135 |
"tool": tool_name
|
| 1136 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1137 |
|
| 1138 |
+
# Tool-specific compression rules
|
| 1139 |
+
if tool_name == "profile_dataset":
|
| 1140 |
+
# Original: ~5K tokens with full stats
|
| 1141 |
+
# Compressed: ~200 tokens with key metrics
|
| 1142 |
+
r = result.get("result", {})
|
| 1143 |
+
compressed["summary"] = {
|
| 1144 |
+
"rows": r.get("num_rows"),
|
| 1145 |
+
"cols": r.get("num_columns"),
|
| 1146 |
+
"missing_pct": r.get("missing_percentage"),
|
| 1147 |
+
"numeric_cols": len(r.get("numeric_columns", [])),
|
| 1148 |
+
"categorical_cols": len(r.get("categorical_columns", [])),
|
| 1149 |
+
"file_size_mb": round(r.get("memory_usage_mb", 0), 1),
|
| 1150 |
+
"key_columns": list(r.get("columns", {}).keys())[:5] # First 5 columns only
|
| 1151 |
+
}
|
| 1152 |
+
compressed["next_steps"] = ["clean_missing_values", "detect_data_quality_issues"]
|
| 1153 |
+
|
| 1154 |
+
elif tool_name == "detect_data_quality_issues":
|
| 1155 |
+
r = result.get("result", {})
|
| 1156 |
+
compressed["summary"] = {
|
| 1157 |
+
"total_issues": r.get("total_issues", 0),
|
| 1158 |
+
"critical_issues": r.get("critical_issues", 0),
|
| 1159 |
+
"missing_data": r.get("has_missing"),
|
| 1160 |
+
"outliers": r.get("has_outliers"),
|
| 1161 |
+
"duplicates": r.get("has_duplicates")
|
| 1162 |
+
}
|
| 1163 |
+
compressed["next_steps"] = ["clean_missing_values", "handle_outliers"]
|
| 1164 |
+
|
| 1165 |
+
elif tool_name in ["clean_missing_values", "handle_outliers", "encode_categorical"]:
|
| 1166 |
+
r = result.get("result", {})
|
| 1167 |
+
compressed["summary"] = {
|
| 1168 |
+
"output_file": r.get("output_file", r.get("output_path")),
|
| 1169 |
+
"rows_processed": r.get("rows_after", r.get("num_rows")),
|
| 1170 |
+
"changes_made": bool(r.get("changes", {}) or r.get("imputed_columns"))
|
| 1171 |
+
}
|
| 1172 |
+
compressed["next_steps"] = ["Use this file for next step"]
|
| 1173 |
+
|
| 1174 |
+
elif tool_name == "train_baseline_models":
|
| 1175 |
+
r = result.get("result", {})
|
| 1176 |
+
models = r.get("models", [])
|
| 1177 |
+
if models and isinstance(models, list) and len(models) > 0:
|
| 1178 |
+
# Filter to only dict entries (defensive)
|
| 1179 |
+
valid_models = [m for m in models if isinstance(m, dict) and "test_score" in m]
|
| 1180 |
+
if valid_models:
|
| 1181 |
+
best = max(valid_models, key=lambda m: m.get("test_score", 0))
|
| 1182 |
+
compressed["summary"] = {
|
| 1183 |
+
"best_model": best.get("model"),
|
| 1184 |
+
"test_score": round(best.get("test_score", 0), 4),
|
| 1185 |
+
"train_score": round(best.get("train_score", 0), 4),
|
| 1186 |
+
"task_type": r.get("task_type"),
|
| 1187 |
+
"models_trained": len(valid_models)
|
| 1188 |
+
}
|
| 1189 |
+
else:
|
| 1190 |
+
# Fallback if no valid models
|
| 1191 |
+
compressed["summary"] = {
|
| 1192 |
+
"task_type": r.get("task_type"),
|
| 1193 |
+
"status": "No valid models trained"
|
| 1194 |
+
}
|
| 1195 |
else:
|
| 1196 |
+
compressed["summary"] = {"status": "No models found"}
|
| 1197 |
+
compressed["next_steps"] = ["hyperparameter_tuning", "generate_combined_eda_report"]
|
| 1198 |
+
|
| 1199 |
+
elif tool_name in ["generate_plotly_dashboard", "generate_ydata_profiling_report", "generate_combined_eda_report"]:
|
| 1200 |
+
r = result.get("result", {})
|
| 1201 |
+
compressed["summary"] = {
|
| 1202 |
+
"report_path": r.get("report_path", r.get("output_path")),
|
| 1203 |
+
"report_type": tool_name,
|
| 1204 |
+
"success": True
|
| 1205 |
+
}
|
| 1206 |
+
compressed["next_steps"] = ["Report ready for viewing"]
|
| 1207 |
+
|
| 1208 |
+
elif tool_name == "hyperparameter_tuning":
|
| 1209 |
+
r = result.get("result", {})
|
| 1210 |
+
compressed["summary"] = {
|
| 1211 |
+
"best_params": r.get("best_params", {}),
|
| 1212 |
+
"best_score": round(r.get("best_score", 0), 4),
|
| 1213 |
+
"model_type": r.get("model_type"),
|
| 1214 |
+
"trials_completed": r.get("n_trials")
|
| 1215 |
+
}
|
| 1216 |
+
compressed["next_steps"] = ["perform_cross_validation", "generate_model_performance_plots"]
|
| 1217 |
+
|
| 1218 |
else:
|
| 1219 |
+
# Generic compression: Keep only key fields
|
| 1220 |
+
r = result.get("result", {})
|
| 1221 |
+
if isinstance(r, dict):
|
| 1222 |
+
# Extract key fields (common patterns)
|
| 1223 |
+
key_fields = {}
|
| 1224 |
+
for key in ["output_path", "output_file", "status", "message", "success"]:
|
| 1225 |
+
if key in r:
|
| 1226 |
+
key_fields[key] = r[key]
|
| 1227 |
+
compressed["summary"] = key_fields or {"result": "completed"}
|
| 1228 |
+
else:
|
| 1229 |
+
compressed["summary"] = {"result": str(r)[:200] if r else "completed"}
|
| 1230 |
+
compressed["next_steps"] = ["Continue workflow"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1232 |
return compressed
|
| 1233 |
+
|
| 1234 |
except Exception as e:
|
| 1235 |
# If compression fails, return minimal safe result
|
| 1236 |
print(f"⚠️ Compression failed for {tool_name}: {str(e)}")
|