Pulastya B commited on
Commit
5fb3ea4
·
1 Parent(s): 230b136

Fix syntax error: Proper try-except indentation in compression function

Browse files

- Fixed indentation of all elif/else blocks inside try block
- Removed duplicate elif statement
- Moved return statement inside try block before except clause
- All compression logic now properly wrapped in defensive try-except

This fixes SyntaxError that prevented container from starting.

Files changed (1) hide show
  1. src/orchestrator.py +92 -92
src/orchestrator.py CHANGED
@@ -1134,103 +1134,103 @@ You are a DOER. Complete workflows based on user intent."""
1134
  "success": True,
1135
  "tool": tool_name
1136
  }
1137
-
1138
- # Tool-specific compression rules
1139
- if tool_name == "profile_dataset":
1140
- # Original: ~5K tokens with full stats
1141
- # Compressed: ~200 tokens with key metrics
1142
- r = result.get("result", {})
1143
- compressed["summary"] = {
1144
- "rows": r.get("num_rows"),
1145
- "cols": r.get("num_columns"),
1146
- "missing_pct": r.get("missing_percentage"),
1147
- "numeric_cols": len(r.get("numeric_columns", [])),
1148
- "categorical_cols": len(r.get("categorical_columns", [])),
1149
- "file_size_mb": round(r.get("memory_usage_mb", 0), 1),
1150
- "key_columns": list(r.get("columns", {}).keys())[:5] # First 5 columns only
1151
- }
1152
- compressed["next_steps"] = ["clean_missing_values", "detect_data_quality_issues"]
1153
 
1154
- elif tool_name == "detect_data_quality_issues":
1155
- r = result.get("result", {})
1156
- compressed["summary"] = {
1157
- "total_issues": r.get("total_issues", 0),
1158
- "critical_issues": r.get("critical_issues", 0),
1159
- "missing_data": r.get("has_missing"),
1160
- "outliers": r.get("has_outliers"),
1161
- "duplicates": r.get("has_duplicates")
1162
- }
1163
- compressed["next_steps"] = ["clean_missing_values", "handle_outliers"]
1164
-
1165
- elif tool_name in ["clean_missing_values", "handle_outliers", "encode_categorical"]:
1166
- r = result.get("result", {})
1167
- compressed["summary"] = {
1168
- "output_file": r.get("output_file", r.get("output_path")),
1169
- "rows_processed": r.get("rows_after", r.get("num_rows")),
1170
- "changes_made": bool(r.get("changes", {}) or r.get("imputed_columns"))
1171
- }
1172
- compressed["next_steps"] = ["Use this file for next step"]
1173
-
1174
- elif tool_name == "train_baseline_models":
1175
- r = result.get("result", {})
1176
- models = r.get("models", [])
1177
- if models and isinstance(models, list) and len(models) > 0:
1178
- # Filter to only dict entries (defensive)
1179
- valid_models = [m for m in models if isinstance(m, dict) and "test_score" in m]
1180
- if valid_models:
1181
- best = max(valid_models, key=lambda m: m.get("test_score", 0))
1182
- compressed["summary"] = {
1183
- "best_model": best.get("model"),
1184
- "test_score": round(best.get("test_score", 0), 4),
1185
- "train_score": round(best.get("train_score", 0), 4),
1186
- "task_type": r.get("task_type"),
1187
- "models_trained": len(valid_models)
1188
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1189
  else:
1190
- # Fallback if no valid models
1191
- compressed["summary"] = {
1192
- "task_type": r.get("task_type"),
1193
- "status": "No valid models trained"
1194
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1195
  else:
1196
- compressed["summary"] = {"status": "No models found"}
1197
- compressed["next_steps"] = ["hyperparameter_tuning", "generate_combined_eda_report"]
1198
-
1199
- elif tool_name in ["generate_plotly_dashboard", "generate_ydata_profiling_report", "generate_combined_eda_report"]:
1200
- r = result.get("result", {})
1201
- compressed["summary"] = {
1202
- "report_path": r.get("report_path", r.get("output_path")),
1203
- "report_type": tool_name,
1204
- "success": True
1205
- }
1206
- compressed["next_steps"] = ["Report ready for viewing"]
1207
-
1208
- elif tool_name == "hyperparameter_tuning":
1209
- r = result.get("result", {})
1210
- compressed["summary"] = {
1211
- "best_params": r.get("best_params", {}),
1212
- "best_score": round(r.get("best_score", 0), 4),
1213
- "model_type": r.get("model_type"),
1214
- "trials_completed": r.get("n_trials")
1215
- }
1216
- compressed["next_steps"] = ["perform_cross_validation", "generate_model_performance_plots"]
1217
 
1218
- else:
1219
- # Generic compression: Keep only key fields
1220
- r = result.get("result", {})
1221
- if isinstance(r, dict):
1222
- # Extract key fields (common patterns)
1223
- key_fields = {}
1224
- for key in ["output_path", "output_file", "status", "message", "success"]:
1225
- if key in r:
1226
- key_fields[key] = r[key]
1227
- compressed["summary"] = key_fields or {"result": "completed"}
1228
- else:
1229
- compressed["summary"] = {"result": str(r)[:200] if r else "completed"}
1230
- compressed["next_steps"] = ["Continue workflow"]
1231
-
1232
  return compressed
1233
-
1234
  except Exception as e:
1235
  # If compression fails, return minimal safe result
1236
  print(f"⚠️ Compression failed for {tool_name}: {str(e)}")
 
1134
  "success": True,
1135
  "tool": tool_name
1136
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1137
 
1138
+ # Tool-specific compression rules
1139
+ if tool_name == "profile_dataset":
1140
+ # Original: ~5K tokens with full stats
1141
+ # Compressed: ~200 tokens with key metrics
1142
+ r = result.get("result", {})
1143
+ compressed["summary"] = {
1144
+ "rows": r.get("num_rows"),
1145
+ "cols": r.get("num_columns"),
1146
+ "missing_pct": r.get("missing_percentage"),
1147
+ "numeric_cols": len(r.get("numeric_columns", [])),
1148
+ "categorical_cols": len(r.get("categorical_columns", [])),
1149
+ "file_size_mb": round(r.get("memory_usage_mb", 0), 1),
1150
+ "key_columns": list(r.get("columns", {}).keys())[:5] # First 5 columns only
1151
+ }
1152
+ compressed["next_steps"] = ["clean_missing_values", "detect_data_quality_issues"]
1153
+
1154
+ elif tool_name == "detect_data_quality_issues":
1155
+ r = result.get("result", {})
1156
+ compressed["summary"] = {
1157
+ "total_issues": r.get("total_issues", 0),
1158
+ "critical_issues": r.get("critical_issues", 0),
1159
+ "missing_data": r.get("has_missing"),
1160
+ "outliers": r.get("has_outliers"),
1161
+ "duplicates": r.get("has_duplicates")
1162
+ }
1163
+ compressed["next_steps"] = ["clean_missing_values", "handle_outliers"]
1164
+
1165
+ elif tool_name in ["clean_missing_values", "handle_outliers", "encode_categorical"]:
1166
+ r = result.get("result", {})
1167
+ compressed["summary"] = {
1168
+ "output_file": r.get("output_file", r.get("output_path")),
1169
+ "rows_processed": r.get("rows_after", r.get("num_rows")),
1170
+ "changes_made": bool(r.get("changes", {}) or r.get("imputed_columns"))
1171
+ }
1172
+ compressed["next_steps"] = ["Use this file for next step"]
1173
+
1174
+ elif tool_name == "train_baseline_models":
1175
+ r = result.get("result", {})
1176
+ models = r.get("models", [])
1177
+ if models and isinstance(models, list) and len(models) > 0:
1178
+ # Filter to only dict entries (defensive)
1179
+ valid_models = [m for m in models if isinstance(m, dict) and "test_score" in m]
1180
+ if valid_models:
1181
+ best = max(valid_models, key=lambda m: m.get("test_score", 0))
1182
+ compressed["summary"] = {
1183
+ "best_model": best.get("model"),
1184
+ "test_score": round(best.get("test_score", 0), 4),
1185
+ "train_score": round(best.get("train_score", 0), 4),
1186
+ "task_type": r.get("task_type"),
1187
+ "models_trained": len(valid_models)
1188
+ }
1189
+ else:
1190
+ # Fallback if no valid models
1191
+ compressed["summary"] = {
1192
+ "task_type": r.get("task_type"),
1193
+ "status": "No valid models trained"
1194
+ }
1195
  else:
1196
+ compressed["summary"] = {"status": "No models found"}
1197
+ compressed["next_steps"] = ["hyperparameter_tuning", "generate_combined_eda_report"]
1198
+
1199
+ elif tool_name in ["generate_plotly_dashboard", "generate_ydata_profiling_report", "generate_combined_eda_report"]:
1200
+ r = result.get("result", {})
1201
+ compressed["summary"] = {
1202
+ "report_path": r.get("report_path", r.get("output_path")),
1203
+ "report_type": tool_name,
1204
+ "success": True
1205
+ }
1206
+ compressed["next_steps"] = ["Report ready for viewing"]
1207
+
1208
+ elif tool_name == "hyperparameter_tuning":
1209
+ r = result.get("result", {})
1210
+ compressed["summary"] = {
1211
+ "best_params": r.get("best_params", {}),
1212
+ "best_score": round(r.get("best_score", 0), 4),
1213
+ "model_type": r.get("model_type"),
1214
+ "trials_completed": r.get("n_trials")
1215
+ }
1216
+ compressed["next_steps"] = ["perform_cross_validation", "generate_model_performance_plots"]
1217
+
1218
  else:
1219
+ # Generic compression: Keep only key fields
1220
+ r = result.get("result", {})
1221
+ if isinstance(r, dict):
1222
+ # Extract key fields (common patterns)
1223
+ key_fields = {}
1224
+ for key in ["output_path", "output_file", "status", "message", "success"]:
1225
+ if key in r:
1226
+ key_fields[key] = r[key]
1227
+ compressed["summary"] = key_fields or {"result": "completed"}
1228
+ else:
1229
+ compressed["summary"] = {"result": str(r)[:200] if r else "completed"}
1230
+ compressed["next_steps"] = ["Continue workflow"]
 
 
 
 
 
 
 
 
 
1231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1232
  return compressed
1233
+
1234
  except Exception as e:
1235
  # If compression fails, return minimal safe result
1236
  print(f"⚠️ Compression failed for {tool_name}: {str(e)}")