Spaces:
Running
Running
Pulastya B
commited on
Commit
·
74cd041
1
Parent(s):
f35ddc4
fix: Correct indentation in _compress_tool_result method
Browse files- src/orchestrator.py +106 -106
src/orchestrator.py
CHANGED
|
@@ -1095,118 +1095,118 @@ You are a DOER. Complete workflows based on user intent."""
|
|
| 1095 |
return compressed
|
| 1096 |
|
| 1097 |
def _compress_tool_result(self, tool_name: str, result: Dict[str, Any]) -> Dict[str, Any]:
|
| 1098 |
-
|
| 1099 |
-
|
| 1100 |
-
|
| 1101 |
-
Keep only:
|
| 1102 |
-
- Status (success/failure)
|
| 1103 |
-
- Key metrics (5-10 most important numbers)
|
| 1104 |
-
- File paths created
|
| 1105 |
-
- Next action hints
|
| 1106 |
-
|
| 1107 |
-
Full results stored in workflow_history and session memory.
|
| 1108 |
-
LLM doesn't need verbose output - only decision-making info.
|
| 1109 |
-
|
| 1110 |
-
Args:
|
| 1111 |
-
tool_name: Name of the tool executed
|
| 1112 |
-
result: Full tool result dict
|
| 1113 |
|
| 1114 |
-
|
| 1115 |
-
|
| 1116 |
-
|
| 1117 |
-
|
| 1118 |
-
|
| 1119 |
-
|
| 1120 |
-
|
| 1121 |
-
|
| 1122 |
-
|
| 1123 |
-
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
|
| 1127 |
-
|
| 1128 |
-
|
| 1129 |
-
|
| 1130 |
-
|
| 1131 |
-
|
| 1132 |
-
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
"
|
| 1136 |
-
"
|
| 1137 |
-
"file_size_mb": round(r.get("memory_usage_mb", 0), 1),
|
| 1138 |
-
"key_columns": list(r.get("columns", {}).keys())[:5] # First 5 columns only
|
| 1139 |
-
}
|
| 1140 |
-
compressed["next_steps"] = ["clean_missing_values", "detect_data_quality_issues"]
|
| 1141 |
-
|
| 1142 |
-
elif tool_name == "detect_data_quality_issues":
|
| 1143 |
-
r = result.get("result", {})
|
| 1144 |
-
compressed["summary"] = {
|
| 1145 |
-
"total_issues": r.get("total_issues", 0),
|
| 1146 |
-
"critical_issues": r.get("critical_issues", 0),
|
| 1147 |
-
"missing_data": r.get("has_missing"),
|
| 1148 |
-
"outliers": r.get("has_outliers"),
|
| 1149 |
-
"duplicates": r.get("has_duplicates")
|
| 1150 |
-
}
|
| 1151 |
-
compressed["next_steps"] = ["clean_missing_values", "handle_outliers"]
|
| 1152 |
-
|
| 1153 |
-
elif tool_name in ["clean_missing_values", "handle_outliers", "encode_categorical"]:
|
| 1154 |
-
r = result.get("result", {})
|
| 1155 |
-
compressed["summary"] = {
|
| 1156 |
-
"output_file": r.get("output_file", r.get("output_path")),
|
| 1157 |
-
"rows_processed": r.get("rows_after", r.get("num_rows")),
|
| 1158 |
-
"changes_made": bool(r.get("changes", {}) or r.get("imputed_columns"))
|
| 1159 |
}
|
| 1160 |
-
compressed["next_steps"] = ["Use this file for next step"]
|
| 1161 |
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
compressed["summary"] = {
|
| 1168 |
-
"
|
| 1169 |
-
"
|
| 1170 |
-
"
|
| 1171 |
-
"
|
| 1172 |
-
"
|
|
|
|
|
|
|
| 1173 |
}
|
| 1174 |
-
|
| 1175 |
-
|
| 1176 |
-
|
| 1177 |
-
|
| 1178 |
-
|
| 1179 |
-
|
| 1180 |
-
|
| 1181 |
-
|
| 1182 |
-
|
| 1183 |
-
|
| 1184 |
-
|
| 1185 |
-
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
"
|
| 1190 |
-
|
| 1191 |
-
|
| 1192 |
-
|
| 1193 |
-
|
| 1194 |
-
|
| 1195 |
-
|
| 1196 |
-
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1205 |
else:
|
| 1206 |
-
|
| 1207 |
-
|
| 1208 |
-
|
| 1209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1210 |
|
| 1211 |
|
| 1212 |
def _parse_text_tool_calls(self, text_response: str) -> List[Dict[str, Any]]:
|
|
|
|
| 1095 |
return compressed
|
| 1096 |
|
| 1097 |
def _compress_tool_result(self, tool_name: str, result: Dict[str, Any]) -> Dict[str, Any]:
|
| 1098 |
+
"""
|
| 1099 |
+
Compress tool results for small context models (production-grade approach).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1100 |
|
| 1101 |
+
Keep only:
|
| 1102 |
+
- Status (success/failure)
|
| 1103 |
+
- Key metrics (5-10 most important numbers)
|
| 1104 |
+
- File paths created
|
| 1105 |
+
- Next action hints
|
| 1106 |
+
|
| 1107 |
+
Full results stored in workflow_history and session memory.
|
| 1108 |
+
LLM doesn't need verbose output - only decision-making info.
|
| 1109 |
+
|
| 1110 |
+
Args:
|
| 1111 |
+
tool_name: Name of the tool executed
|
| 1112 |
+
result: Full tool result dict
|
| 1113 |
+
|
| 1114 |
+
Returns:
|
| 1115 |
+
Compressed result dict (typically 100-500 tokens vs 5K-10K)
|
| 1116 |
+
"""
|
| 1117 |
+
if not result.get("success", True):
|
| 1118 |
+
# Keep full error info (critical for debugging)
|
| 1119 |
+
return result
|
| 1120 |
+
|
| 1121 |
+
compressed = {
|
| 1122 |
+
"success": True,
|
| 1123 |
+
"tool": tool_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1124 |
}
|
|
|
|
| 1125 |
|
| 1126 |
+
# Tool-specific compression rules
|
| 1127 |
+
if tool_name == "profile_dataset":
|
| 1128 |
+
# Original: ~5K tokens with full stats
|
| 1129 |
+
# Compressed: ~200 tokens with key metrics
|
| 1130 |
+
r = result.get("result", {})
|
| 1131 |
compressed["summary"] = {
|
| 1132 |
+
"rows": r.get("num_rows"),
|
| 1133 |
+
"cols": r.get("num_columns"),
|
| 1134 |
+
"missing_pct": r.get("missing_percentage"),
|
| 1135 |
+
"numeric_cols": len(r.get("numeric_columns", [])),
|
| 1136 |
+
"categorical_cols": len(r.get("categorical_columns", [])),
|
| 1137 |
+
"file_size_mb": round(r.get("memory_usage_mb", 0), 1),
|
| 1138 |
+
"key_columns": list(r.get("columns", {}).keys())[:5] # First 5 columns only
|
| 1139 |
}
|
| 1140 |
+
compressed["next_steps"] = ["clean_missing_values", "detect_data_quality_issues"]
|
| 1141 |
+
|
| 1142 |
+
elif tool_name == "detect_data_quality_issues":
|
| 1143 |
+
r = result.get("result", {})
|
| 1144 |
+
compressed["summary"] = {
|
| 1145 |
+
"total_issues": r.get("total_issues", 0),
|
| 1146 |
+
"critical_issues": r.get("critical_issues", 0),
|
| 1147 |
+
"missing_data": r.get("has_missing"),
|
| 1148 |
+
"outliers": r.get("has_outliers"),
|
| 1149 |
+
"duplicates": r.get("has_duplicates")
|
| 1150 |
+
}
|
| 1151 |
+
compressed["next_steps"] = ["clean_missing_values", "handle_outliers"]
|
| 1152 |
+
|
| 1153 |
+
elif tool_name in ["clean_missing_values", "handle_outliers", "encode_categorical"]:
|
| 1154 |
+
r = result.get("result", {})
|
| 1155 |
+
compressed["summary"] = {
|
| 1156 |
+
"output_file": r.get("output_file", r.get("output_path")),
|
| 1157 |
+
"rows_processed": r.get("rows_after", r.get("num_rows")),
|
| 1158 |
+
"changes_made": bool(r.get("changes", {}) or r.get("imputed_columns"))
|
| 1159 |
+
}
|
| 1160 |
+
compressed["next_steps"] = ["Use this file for next step"]
|
| 1161 |
+
|
| 1162 |
+
elif tool_name == "train_baseline_models":
|
| 1163 |
+
r = result.get("result", {})
|
| 1164 |
+
models = r.get("models", [])
|
| 1165 |
+
if models:
|
| 1166 |
+
best = max(models, key=lambda m: m.get("test_score", 0))
|
| 1167 |
+
compressed["summary"] = {
|
| 1168 |
+
"best_model": best.get("model"),
|
| 1169 |
+
"test_score": round(best.get("test_score", 0), 4),
|
| 1170 |
+
"train_score": round(best.get("train_score", 0), 4),
|
| 1171 |
+
"task_type": r.get("task_type"),
|
| 1172 |
+
"models_trained": len(models)
|
| 1173 |
+
}
|
| 1174 |
+
compressed["next_steps"] = ["hyperparameter_tuning", "generate_combined_eda_report"]
|
| 1175 |
+
|
| 1176 |
+
elif tool_name in ["generate_plotly_dashboard", "generate_ydata_profiling_report", "generate_combined_eda_report"]:
|
| 1177 |
+
r = result.get("result", {})
|
| 1178 |
+
compressed["summary"] = {
|
| 1179 |
+
"report_path": r.get("report_path", r.get("output_path")),
|
| 1180 |
+
"report_type": tool_name,
|
| 1181 |
+
"success": True
|
| 1182 |
+
}
|
| 1183 |
+
compressed["next_steps"] = ["Report ready for viewing"]
|
| 1184 |
+
|
| 1185 |
+
elif tool_name == "hyperparameter_tuning":
|
| 1186 |
+
r = result.get("result", {})
|
| 1187 |
+
compressed["summary"] = {
|
| 1188 |
+
"best_params": r.get("best_params", {}),
|
| 1189 |
+
"best_score": round(r.get("best_score", 0), 4),
|
| 1190 |
+
"model_type": r.get("model_type"),
|
| 1191 |
+
"trials_completed": r.get("n_trials")
|
| 1192 |
+
}
|
| 1193 |
+
compressed["next_steps"] = ["perform_cross_validation", "generate_model_performance_plots"]
|
| 1194 |
+
|
| 1195 |
else:
|
| 1196 |
+
# Generic compression: Keep only key fields
|
| 1197 |
+
r = result.get("result", {})
|
| 1198 |
+
if isinstance(r, dict):
|
| 1199 |
+
# Extract key fields (common patterns)
|
| 1200 |
+
key_fields = {}
|
| 1201 |
+
for key in ["output_path", "output_file", "status", "message", "success"]:
|
| 1202 |
+
if key in r:
|
| 1203 |
+
key_fields[key] = r[key]
|
| 1204 |
+
compressed["summary"] = key_fields or {"result": "completed"}
|
| 1205 |
+
else:
|
| 1206 |
+
compressed["summary"] = {"result": str(r)[:200] if r else "completed"}
|
| 1207 |
+
compressed["next_steps"] = ["Continue workflow"]
|
| 1208 |
+
|
| 1209 |
+
return compressed
|
| 1210 |
|
| 1211 |
|
| 1212 |
def _parse_text_tool_calls(self, text_response: str) -> List[Dict[str, Any]]:
|