Pulastya B commited on
Commit
74cd041
·
1 Parent(s): f35ddc4

fix: Correct indentation in _compress_tool_result method

Browse files
Files changed (1) hide show
  1. src/orchestrator.py +106 -106
src/orchestrator.py CHANGED
@@ -1095,118 +1095,118 @@ You are a DOER. Complete workflows based on user intent."""
1095
  return compressed
1096
 
1097
  def _compress_tool_result(self, tool_name: str, result: Dict[str, Any]) -> Dict[str, Any]:
1098
- """
1099
- Compress tool results for small context models (production-grade approach).
1100
-
1101
- Keep only:
1102
- - Status (success/failure)
1103
- - Key metrics (5-10 most important numbers)
1104
- - File paths created
1105
- - Next action hints
1106
-
1107
- Full results stored in workflow_history and session memory.
1108
- LLM doesn't need verbose output - only decision-making info.
1109
-
1110
- Args:
1111
- tool_name: Name of the tool executed
1112
- result: Full tool result dict
1113
 
1114
- Returns:
1115
- Compressed result dict (typically 100-500 tokens vs 5K-10K)
1116
- """
1117
- if not result.get("success", True):
1118
- # Keep full error info (critical for debugging)
1119
- return result
1120
-
1121
- compressed = {
1122
- "success": True,
1123
- "tool": tool_name
1124
- }
1125
-
1126
- # Tool-specific compression rules
1127
- if tool_name == "profile_dataset":
1128
- # Original: ~5K tokens with full stats
1129
- # Compressed: ~200 tokens with key metrics
1130
- r = result.get("result", {})
1131
- compressed["summary"] = {
1132
- "rows": r.get("num_rows"),
1133
- "cols": r.get("num_columns"),
1134
- "missing_pct": r.get("missing_percentage"),
1135
- "numeric_cols": len(r.get("numeric_columns", [])),
1136
- "categorical_cols": len(r.get("categorical_columns", [])),
1137
- "file_size_mb": round(r.get("memory_usage_mb", 0), 1),
1138
- "key_columns": list(r.get("columns", {}).keys())[:5] # First 5 columns only
1139
- }
1140
- compressed["next_steps"] = ["clean_missing_values", "detect_data_quality_issues"]
1141
-
1142
- elif tool_name == "detect_data_quality_issues":
1143
- r = result.get("result", {})
1144
- compressed["summary"] = {
1145
- "total_issues": r.get("total_issues", 0),
1146
- "critical_issues": r.get("critical_issues", 0),
1147
- "missing_data": r.get("has_missing"),
1148
- "outliers": r.get("has_outliers"),
1149
- "duplicates": r.get("has_duplicates")
1150
- }
1151
- compressed["next_steps"] = ["clean_missing_values", "handle_outliers"]
1152
-
1153
- elif tool_name in ["clean_missing_values", "handle_outliers", "encode_categorical"]:
1154
- r = result.get("result", {})
1155
- compressed["summary"] = {
1156
- "output_file": r.get("output_file", r.get("output_path")),
1157
- "rows_processed": r.get("rows_after", r.get("num_rows")),
1158
- "changes_made": bool(r.get("changes", {}) or r.get("imputed_columns"))
1159
  }
1160
- compressed["next_steps"] = ["Use this file for next step"]
1161
 
1162
- elif tool_name == "train_baseline_models":
1163
- r = result.get("result", {})
1164
- models = r.get("models", [])
1165
- if models:
1166
- best = max(models, key=lambda m: m.get("test_score", 0))
1167
  compressed["summary"] = {
1168
- "best_model": best.get("model"),
1169
- "test_score": round(best.get("test_score", 0), 4),
1170
- "train_score": round(best.get("train_score", 0), 4),
1171
- "task_type": r.get("task_type"),
1172
- "models_trained": len(models)
 
 
1173
  }
1174
- compressed["next_steps"] = ["hyperparameter_tuning", "generate_combined_eda_report"]
1175
-
1176
- elif tool_name in ["generate_plotly_dashboard", "generate_ydata_profiling_report", "generate_combined_eda_report"]:
1177
- r = result.get("result", {})
1178
- compressed["summary"] = {
1179
- "report_path": r.get("report_path", r.get("output_path")),
1180
- "report_type": tool_name,
1181
- "success": True
1182
- }
1183
- compressed["next_steps"] = ["Report ready for viewing"]
1184
-
1185
- elif tool_name == "hyperparameter_tuning":
1186
- r = result.get("result", {})
1187
- compressed["summary"] = {
1188
- "best_params": r.get("best_params", {}),
1189
- "best_score": round(r.get("best_score", 0), 4),
1190
- "model_type": r.get("model_type"),
1191
- "trials_completed": r.get("n_trials")
1192
- }
1193
- compressed["next_steps"] = ["perform_cross_validation", "generate_model_performance_plots"]
1194
-
1195
- else:
1196
- # Generic compression: Keep only key fields
1197
- r = result.get("result", {})
1198
- if isinstance(r, dict):
1199
- # Extract key fields (common patterns)
1200
- key_fields = {}
1201
- for key in ["output_path", "output_file", "status", "message", "success"]:
1202
- if key in r:
1203
- key_fields[key] = r[key]
1204
- compressed["summary"] = key_fields or {"result": "completed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1205
  else:
1206
- compressed["summary"] = {"result": str(r)[:200] if r else "completed"}
1207
- compressed["next_steps"] = ["Continue workflow"]
1208
-
1209
- return compressed
 
 
 
 
 
 
 
 
 
 
1210
 
1211
 
1212
  def _parse_text_tool_calls(self, text_response: str) -> List[Dict[str, Any]]:
 
1095
  return compressed
1096
 
1097
  def _compress_tool_result(self, tool_name: str, result: Dict[str, Any]) -> Dict[str, Any]:
1098
+ """
1099
+ Compress tool results for small context models (production-grade approach).
 
 
 
 
 
 
 
 
 
 
 
 
 
1100
 
1101
+ Keep only:
1102
+ - Status (success/failure)
1103
+ - Key metrics (5-10 most important numbers)
1104
+ - File paths created
1105
+ - Next action hints
1106
+
1107
+ Full results stored in workflow_history and session memory.
1108
+ LLM doesn't need verbose output - only decision-making info.
1109
+
1110
+ Args:
1111
+ tool_name: Name of the tool executed
1112
+ result: Full tool result dict
1113
+
1114
+ Returns:
1115
+ Compressed result dict (typically 100-500 tokens vs 5K-10K)
1116
+ """
1117
+ if not result.get("success", True):
1118
+ # Keep full error info (critical for debugging)
1119
+ return result
1120
+
1121
+ compressed = {
1122
+ "success": True,
1123
+ "tool": tool_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1124
  }
 
1125
 
1126
+ # Tool-specific compression rules
1127
+ if tool_name == "profile_dataset":
1128
+ # Original: ~5K tokens with full stats
1129
+ # Compressed: ~200 tokens with key metrics
1130
+ r = result.get("result", {})
1131
  compressed["summary"] = {
1132
+ "rows": r.get("num_rows"),
1133
+ "cols": r.get("num_columns"),
1134
+ "missing_pct": r.get("missing_percentage"),
1135
+ "numeric_cols": len(r.get("numeric_columns", [])),
1136
+ "categorical_cols": len(r.get("categorical_columns", [])),
1137
+ "file_size_mb": round(r.get("memory_usage_mb", 0), 1),
1138
+ "key_columns": list(r.get("columns", {}).keys())[:5] # First 5 columns only
1139
  }
1140
+ compressed["next_steps"] = ["clean_missing_values", "detect_data_quality_issues"]
1141
+
1142
+ elif tool_name == "detect_data_quality_issues":
1143
+ r = result.get("result", {})
1144
+ compressed["summary"] = {
1145
+ "total_issues": r.get("total_issues", 0),
1146
+ "critical_issues": r.get("critical_issues", 0),
1147
+ "missing_data": r.get("has_missing"),
1148
+ "outliers": r.get("has_outliers"),
1149
+ "duplicates": r.get("has_duplicates")
1150
+ }
1151
+ compressed["next_steps"] = ["clean_missing_values", "handle_outliers"]
1152
+
1153
+ elif tool_name in ["clean_missing_values", "handle_outliers", "encode_categorical"]:
1154
+ r = result.get("result", {})
1155
+ compressed["summary"] = {
1156
+ "output_file": r.get("output_file", r.get("output_path")),
1157
+ "rows_processed": r.get("rows_after", r.get("num_rows")),
1158
+ "changes_made": bool(r.get("changes", {}) or r.get("imputed_columns"))
1159
+ }
1160
+ compressed["next_steps"] = ["Use this file for next step"]
1161
+
1162
+ elif tool_name == "train_baseline_models":
1163
+ r = result.get("result", {})
1164
+ models = r.get("models", [])
1165
+ if models:
1166
+ best = max(models, key=lambda m: m.get("test_score", 0))
1167
+ compressed["summary"] = {
1168
+ "best_model": best.get("model"),
1169
+ "test_score": round(best.get("test_score", 0), 4),
1170
+ "train_score": round(best.get("train_score", 0), 4),
1171
+ "task_type": r.get("task_type"),
1172
+ "models_trained": len(models)
1173
+ }
1174
+ compressed["next_steps"] = ["hyperparameter_tuning", "generate_combined_eda_report"]
1175
+
1176
+ elif tool_name in ["generate_plotly_dashboard", "generate_ydata_profiling_report", "generate_combined_eda_report"]:
1177
+ r = result.get("result", {})
1178
+ compressed["summary"] = {
1179
+ "report_path": r.get("report_path", r.get("output_path")),
1180
+ "report_type": tool_name,
1181
+ "success": True
1182
+ }
1183
+ compressed["next_steps"] = ["Report ready for viewing"]
1184
+
1185
+ elif tool_name == "hyperparameter_tuning":
1186
+ r = result.get("result", {})
1187
+ compressed["summary"] = {
1188
+ "best_params": r.get("best_params", {}),
1189
+ "best_score": round(r.get("best_score", 0), 4),
1190
+ "model_type": r.get("model_type"),
1191
+ "trials_completed": r.get("n_trials")
1192
+ }
1193
+ compressed["next_steps"] = ["perform_cross_validation", "generate_model_performance_plots"]
1194
+
1195
  else:
1196
+ # Generic compression: Keep only key fields
1197
+ r = result.get("result", {})
1198
+ if isinstance(r, dict):
1199
+ # Extract key fields (common patterns)
1200
+ key_fields = {}
1201
+ for key in ["output_path", "output_file", "status", "message", "success"]:
1202
+ if key in r:
1203
+ key_fields[key] = r[key]
1204
+ compressed["summary"] = key_fields or {"result": "completed"}
1205
+ else:
1206
+ compressed["summary"] = {"result": str(r)[:200] if r else "completed"}
1207
+ compressed["next_steps"] = ["Continue workflow"]
1208
+
1209
+ return compressed
1210
 
1211
 
1212
  def _parse_text_tool_calls(self, text_response: str) -> List[Dict[str, Any]]: