Spaces:
Running
Running
Pulastya B commited on
Commit ·
6b731f7
1
Parent(s): fea1598
Made Major Changes to the Overall User Experience by adding a pipeline visualizer
Browse files- FRRONTEEEND/components/ChatInterface.tsx +124 -18
- FRRONTEEEND/components/PipelineView.tsx +348 -0
- src/orchestrator.py +601 -0
- src/reasoning/__init__.py +15 -0
- src/reasoning/evaluator.py +267 -0
- src/reasoning/findings.py +302 -0
- src/reasoning/reasoner.py +344 -0
- src/reasoning/synthesizer.py +195 -0
- src/routing/__init__.py +12 -0
- src/routing/intent_classifier.py +249 -0
FRRONTEEEND/components/ChatInterface.tsx
CHANGED
|
@@ -9,6 +9,7 @@ import remarkGfm from 'remark-gfm';
|
|
| 9 |
import { useAuth } from '../lib/AuthContext';
|
| 10 |
import { trackQuery, incrementSessionQueries, getHuggingFaceStatus } from '../lib/supabase';
|
| 11 |
import { SettingsModal } from './SettingsModal';
|
|
|
|
| 12 |
|
| 13 |
// HuggingFace logo SVG component for the export button
|
| 14 |
const HuggingFaceLogo = ({ className = "w-4 h-4" }: { className?: string }) => (
|
|
@@ -214,6 +215,12 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
|
|
| 214 |
const processedAnalysisRef = useRef<Set<string>>(new Set()); // Track processed analysis_complete events
|
| 215 |
const [sseReconnectTrigger, setSseReconnectTrigger] = useState(0); // Force SSE reconnection for follow-up queries
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
// Auth context for user tracking
|
| 218 |
const { user, isAuthenticated, dbSessionId, signOut } = useAuth();
|
| 219 |
|
|
@@ -349,17 +356,115 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
|
|
| 349 |
console.log(`🤖 Agent assigned: ${data.agent}`);
|
| 350 |
} else if (data.type === 'tool_executing') {
|
| 351 |
setCurrentStep(data.message || `🔧 Executing: ${data.tool}`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
} else if (data.type === 'tool_completed') {
|
| 353 |
setCurrentStep(data.message || `✓ Completed: ${data.tool}`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
} else if (data.type === 'tool_failed') {
|
| 355 |
setCurrentStep(data.message || `❌ Failed: ${data.tool}`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
} else if (data.type === 'token_update') {
|
| 357 |
// Optional: Display token budget updates
|
| 358 |
console.log('💰 Token update:', data.message);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
} else if (data.type === 'analysis_failed') {
|
| 360 |
console.log('❌ Analysis failed', data);
|
| 361 |
setIsTyping(false);
|
| 362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
// Show error message to user - add to sessions
|
| 364 |
setSessions(prev => prev.map(s => {
|
| 365 |
if (s.id === activeSessionId) {
|
|
@@ -382,6 +487,11 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
|
|
| 382 |
console.log('✅ Analysis completed', data.result);
|
| 383 |
setIsTyping(false);
|
| 384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
// Create a unique key based on actual workflow content to prevent duplicates
|
| 386 |
// Use the last tool executed + summary hash for uniqueness
|
| 387 |
const lastTool = data.result?.workflow_history?.[data.result.workflow_history.length - 1]?.tool || 'unknown';
|
|
@@ -539,6 +649,12 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
|
|
| 539 |
|
| 540 |
// Show loading indicator immediately (for UI feedback)
|
| 541 |
setIsTyping(true);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
|
| 543 |
try {
|
| 544 |
|
|
@@ -1197,23 +1313,13 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
|
|
| 1197 |
))
|
| 1198 |
)}
|
| 1199 |
{isTyping && (
|
| 1200 |
-
<
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
| 1205 |
-
|
| 1206 |
-
|
| 1207 |
-
<span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.3s]"></span>
|
| 1208 |
-
<span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.15s]"></span>
|
| 1209 |
-
<span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce"></span>
|
| 1210 |
-
</div>
|
| 1211 |
-
<span className="text-sm text-white/60">
|
| 1212 |
-
{currentStep || '🔧 Starting analysis...'}
|
| 1213 |
-
</span>
|
| 1214 |
-
</div>
|
| 1215 |
-
</div>
|
| 1216 |
-
</div>
|
| 1217 |
)}
|
| 1218 |
</div>
|
| 1219 |
|
|
@@ -1394,7 +1500,7 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
|
|
| 1394 |
const allPlots: Array<{title: string, url: string, type?: string}> = [];
|
| 1395 |
const allReports: Array<{name: string, path: string}> = [];
|
| 1396 |
const allDataFiles: string[] = [];
|
| 1397 |
-
const baselineModels = ['xgboost', 'random_forest', '
|
| 1398 |
const foundModels = new Set<string>();
|
| 1399 |
|
| 1400 |
activeSession.messages.forEach(msg => {
|
|
|
|
| 9 |
import { useAuth } from '../lib/AuthContext';
|
| 10 |
import { trackQuery, incrementSessionQueries, getHuggingFaceStatus } from '../lib/supabase';
|
| 11 |
import { SettingsModal } from './SettingsModal';
|
| 12 |
+
import { PipelineView, PipelineStep } from './PipelineView';
|
| 13 |
|
| 14 |
// HuggingFace logo SVG component for the export button
|
| 15 |
const HuggingFaceLogo = ({ className = "w-4 h-4" }: { className?: string }) => (
|
|
|
|
| 215 |
const processedAnalysisRef = useRef<Set<string>>(new Set()); // Track processed analysis_complete events
|
| 216 |
const [sseReconnectTrigger, setSseReconnectTrigger] = useState(0); // Force SSE reconnection for follow-up queries
|
| 217 |
|
| 218 |
+
// Pipeline visualization state (reasoning loop)
|
| 219 |
+
const [pipelineSteps, setPipelineSteps] = useState<PipelineStep[]>([]);
|
| 220 |
+
const [pipelineMode, setPipelineMode] = useState<string | null>(null);
|
| 221 |
+
const [pipelineHypotheses, setPipelineHypotheses] = useState<string[]>([]);
|
| 222 |
+
const pipelineStepCounterRef = useRef(0); // Unique step ID counter
|
| 223 |
+
|
| 224 |
// Auth context for user tracking
|
| 225 |
const { user, isAuthenticated, dbSessionId, signOut } = useAuth();
|
| 226 |
|
|
|
|
| 356 |
console.log(`🤖 Agent assigned: ${data.agent}`);
|
| 357 |
} else if (data.type === 'tool_executing') {
|
| 358 |
setCurrentStep(data.message || `🔧 Executing: ${data.tool}`);
|
| 359 |
+
// Add pipeline step if in reasoning mode
|
| 360 |
+
if (pipelineMode) {
|
| 361 |
+
const stepId = `act-${++pipelineStepCounterRef.current}`;
|
| 362 |
+
setPipelineSteps(prev => [...prev, {
|
| 363 |
+
id: stepId,
|
| 364 |
+
type: 'act',
|
| 365 |
+
status: 'active',
|
| 366 |
+
title: `Executing: ${data.tool}`,
|
| 367 |
+
subtitle: data.message || '',
|
| 368 |
+
tool: data.tool,
|
| 369 |
+
timestamp: new Date()
|
| 370 |
+
}]);
|
| 371 |
+
}
|
| 372 |
} else if (data.type === 'tool_completed') {
|
| 373 |
setCurrentStep(data.message || `✓ Completed: ${data.tool}`);
|
| 374 |
+
// Update pipeline step status
|
| 375 |
+
if (pipelineMode) {
|
| 376 |
+
setPipelineSteps(prev => prev.map(s =>
|
| 377 |
+
s.type === 'act' && s.status === 'active' ? { ...s, status: 'completed' as const } : s
|
| 378 |
+
));
|
| 379 |
+
}
|
| 380 |
} else if (data.type === 'tool_failed') {
|
| 381 |
setCurrentStep(data.message || `❌ Failed: ${data.tool}`);
|
| 382 |
+
// Update pipeline step status
|
| 383 |
+
if (pipelineMode) {
|
| 384 |
+
setPipelineSteps(prev => prev.map(s =>
|
| 385 |
+
s.type === 'act' && s.status === 'active' ? { ...s, status: 'failed' as const, subtitle: data.message || 'Tool failed' } : s
|
| 386 |
+
));
|
| 387 |
+
}
|
| 388 |
} else if (data.type === 'token_update') {
|
| 389 |
// Optional: Display token budget updates
|
| 390 |
console.log('💰 Token update:', data.message);
|
| 391 |
+
} else if (data.type === 'intent_classified') {
|
| 392 |
+
// 🎯 Reasoning Loop: Intent classification result
|
| 393 |
+
console.log(`🎯 Intent: ${data.mode} (${Math.round(data.confidence * 100)}%)`);
|
| 394 |
+
setPipelineMode(data.mode);
|
| 395 |
+
const stepId = `intent-${++pipelineStepCounterRef.current}`;
|
| 396 |
+
setPipelineSteps(prev => [...prev, {
|
| 397 |
+
id: stepId,
|
| 398 |
+
type: 'intent',
|
| 399 |
+
status: 'completed',
|
| 400 |
+
title: `Intent: ${data.mode.charAt(0).toUpperCase() + data.mode.slice(1)}`,
|
| 401 |
+
subtitle: data.sub_intent || data.reasoning,
|
| 402 |
+
detail: data.reasoning,
|
| 403 |
+
confidence: data.confidence,
|
| 404 |
+
timestamp: new Date()
|
| 405 |
+
}]);
|
| 406 |
+
} else if (data.type === 'reasoning_mode') {
|
| 407 |
+
// 🧠 Reasoning Loop activated
|
| 408 |
+
console.log(`🧠 Reasoning mode: ${data.mode}`);
|
| 409 |
+
setPipelineMode(data.mode);
|
| 410 |
+
setCurrentStep(data.message || `🧠 Reasoning Loop (${data.mode})`);
|
| 411 |
+
} else if (data.type === 'hypotheses_generated') {
|
| 412 |
+
// 💡 Exploratory mode: hypotheses generated
|
| 413 |
+
console.log(`💡 ${data.count} hypotheses generated`);
|
| 414 |
+
setPipelineHypotheses(data.hypotheses || []);
|
| 415 |
+
const stepId = `hyp-${++pipelineStepCounterRef.current}`;
|
| 416 |
+
setPipelineSteps(prev => [...prev, {
|
| 417 |
+
id: stepId,
|
| 418 |
+
type: 'hypothesis',
|
| 419 |
+
status: 'completed',
|
| 420 |
+
title: `${data.count} Hypotheses Generated`,
|
| 421 |
+
subtitle: data.hypotheses?.[0] || '',
|
| 422 |
+
detail: (data.hypotheses || []).map((h: string, i: number) => `${i + 1}. ${h}`).join('\n'),
|
| 423 |
+
timestamp: new Date()
|
| 424 |
+
}]);
|
| 425 |
+
} else if (data.type === 'reasoning_step') {
|
| 426 |
+
// 🤔 Reasoning step: LLM decided next action
|
| 427 |
+
console.log(`🤔 Iteration ${data.iteration}: ${data.tool}`);
|
| 428 |
+
// Mark previous "reason" steps as completed
|
| 429 |
+
setPipelineSteps(prev => prev.map(s =>
|
| 430 |
+
s.type === 'reason' && s.status === 'active' ? { ...s, status: 'completed' as const } : s
|
| 431 |
+
));
|
| 432 |
+
const stepId = `reason-${++pipelineStepCounterRef.current}`;
|
| 433 |
+
setPipelineSteps(prev => [...prev, {
|
| 434 |
+
id: stepId,
|
| 435 |
+
type: 'reason',
|
| 436 |
+
status: 'completed',
|
| 437 |
+
title: `Reason → ${data.tool}`,
|
| 438 |
+
subtitle: data.hypothesis || '',
|
| 439 |
+
detail: data.reasoning,
|
| 440 |
+
iteration: data.iteration,
|
| 441 |
+
tool: data.tool,
|
| 442 |
+
timestamp: new Date()
|
| 443 |
+
}]);
|
| 444 |
+
} else if (data.type === 'finding_discovered') {
|
| 445 |
+
// 🔬 Finding from evaluation step
|
| 446 |
+
console.log(`🔬 Finding (confidence: ${Math.round(data.confidence * 100)}%)`);
|
| 447 |
+
const stepId = `finding-${++pipelineStepCounterRef.current}`;
|
| 448 |
+
setPipelineSteps(prev => [...prev, {
|
| 449 |
+
id: stepId,
|
| 450 |
+
type: 'finding',
|
| 451 |
+
status: 'completed',
|
| 452 |
+
title: data.answered ? '✓ Question Answered' : 'Finding Discovered',
|
| 453 |
+
subtitle: data.interpretation?.substring(0, 100) || '',
|
| 454 |
+
detail: data.interpretation,
|
| 455 |
+
confidence: data.confidence,
|
| 456 |
+
iteration: data.iteration,
|
| 457 |
+
timestamp: new Date()
|
| 458 |
+
}]);
|
| 459 |
} else if (data.type === 'analysis_failed') {
|
| 460 |
console.log('❌ Analysis failed', data);
|
| 461 |
setIsTyping(false);
|
| 462 |
|
| 463 |
+
// Reset pipeline state
|
| 464 |
+
setPipelineSteps([]);
|
| 465 |
+
setPipelineMode(null);
|
| 466 |
+
setPipelineHypotheses([]);
|
| 467 |
+
|
| 468 |
// Show error message to user - add to sessions
|
| 469 |
setSessions(prev => prev.map(s => {
|
| 470 |
if (s.id === activeSessionId) {
|
|
|
|
| 487 |
console.log('✅ Analysis completed', data.result);
|
| 488 |
setIsTyping(false);
|
| 489 |
|
| 490 |
+
// Reset pipeline state
|
| 491 |
+
setPipelineSteps([]);
|
| 492 |
+
setPipelineMode(null);
|
| 493 |
+
setPipelineHypotheses([]);
|
| 494 |
+
|
| 495 |
// Create a unique key based on actual workflow content to prevent duplicates
|
| 496 |
// Use the last tool executed + summary hash for uniqueness
|
| 497 |
const lastTool = data.result?.workflow_history?.[data.result.workflow_history.length - 1]?.tool || 'unknown';
|
|
|
|
| 649 |
|
| 650 |
// Show loading indicator immediately (for UI feedback)
|
| 651 |
setIsTyping(true);
|
| 652 |
+
|
| 653 |
+
// Reset pipeline state for new analysis
|
| 654 |
+
setPipelineSteps([]);
|
| 655 |
+
setPipelineMode(null);
|
| 656 |
+
setPipelineHypotheses([]);
|
| 657 |
+
pipelineStepCounterRef.current = 0;
|
| 658 |
|
| 659 |
try {
|
| 660 |
|
|
|
|
| 1313 |
))
|
| 1314 |
)}
|
| 1315 |
{isTyping && (
|
| 1316 |
+
<PipelineView
|
| 1317 |
+
steps={pipelineSteps}
|
| 1318 |
+
mode={pipelineMode}
|
| 1319 |
+
currentStep={currentStep}
|
| 1320 |
+
isActive={isTyping}
|
| 1321 |
+
hypotheses={pipelineHypotheses}
|
| 1322 |
+
/>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1323 |
)}
|
| 1324 |
</div>
|
| 1325 |
|
|
|
|
| 1500 |
const allPlots: Array<{title: string, url: string, type?: string}> = [];
|
| 1501 |
const allReports: Array<{name: string, path: string}> = [];
|
| 1502 |
const allDataFiles: string[] = [];
|
| 1503 |
+
const baselineModels = ['xgboost', 'random_forest', 'lightgbm', 'ridge', 'lasso'];
|
| 1504 |
const foundModels = new Set<string>();
|
| 1505 |
|
| 1506 |
activeSession.messages.forEach(msg => {
|
FRRONTEEEND/components/PipelineView.tsx
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import React from 'react';
|
| 2 |
+
import { motion, AnimatePresence } from 'framer-motion';
|
| 3 |
+
import {
|
| 4 |
+
Brain, Zap, BarChart3, CheckCircle2, XCircle,
|
| 5 |
+
Loader2, ChevronDown, ChevronUp, Lightbulb,
|
| 6 |
+
Search, FlaskConical, FileText, Target, ArrowRight
|
| 7 |
+
} from 'lucide-react';
|
| 8 |
+
import { cn } from '../lib/utils';
|
| 9 |
+
|
| 10 |
+
// ─── Types ───────────────────────────────────────────────────
|
| 11 |
+
|
| 12 |
+
export interface PipelineStep {
|
| 13 |
+
id: string;
|
| 14 |
+
type: 'intent' | 'hypothesis' | 'reason' | 'act' | 'evaluate' | 'finding' | 'synthesize';
|
| 15 |
+
status: 'pending' | 'active' | 'completed' | 'failed';
|
| 16 |
+
title: string;
|
| 17 |
+
subtitle?: string;
|
| 18 |
+
detail?: string; // Extended info (shown on expand)
|
| 19 |
+
confidence?: number; // 0-1
|
| 20 |
+
timestamp?: Date;
|
| 21 |
+
tool?: string;
|
| 22 |
+
iteration?: number;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
interface PipelineViewProps {
|
| 26 |
+
steps: PipelineStep[];
|
| 27 |
+
mode: string | null; // "direct" | "investigative" | "exploratory" | null
|
| 28 |
+
currentStep: string; // Existing currentStep string from ChatInterface
|
| 29 |
+
isActive: boolean; // Whether analysis is running
|
| 30 |
+
hypotheses?: string[];
|
| 31 |
+
className?: string;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
// ─── Icons per step type ─────────────────────────────────────
|
| 35 |
+
|
| 36 |
+
const stepIcons: Record<PipelineStep['type'], React.ElementType> = {
|
| 37 |
+
intent: Target,
|
| 38 |
+
hypothesis: Lightbulb,
|
| 39 |
+
reason: Brain,
|
| 40 |
+
act: Zap,
|
| 41 |
+
evaluate: Search,
|
| 42 |
+
finding: FlaskConical,
|
| 43 |
+
synthesize: FileText,
|
| 44 |
+
};
|
| 45 |
+
|
| 46 |
+
const stepColors: Record<PipelineStep['type'], string> = {
|
| 47 |
+
intent: 'text-violet-400 bg-violet-500/10 border-violet-500/20',
|
| 48 |
+
hypothesis: 'text-amber-400 bg-amber-500/10 border-amber-500/20',
|
| 49 |
+
reason: 'text-cyan-400 bg-cyan-500/10 border-cyan-500/20',
|
| 50 |
+
act: 'text-emerald-400 bg-emerald-500/10 border-emerald-500/20',
|
| 51 |
+
evaluate: 'text-blue-400 bg-blue-500/10 border-blue-500/20',
|
| 52 |
+
finding: 'text-pink-400 bg-pink-500/10 border-pink-500/20',
|
| 53 |
+
synthesize: 'text-orange-400 bg-orange-500/10 border-orange-500/20',
|
| 54 |
+
};
|
| 55 |
+
|
| 56 |
+
const statusDotColors: Record<PipelineStep['status'], string> = {
|
| 57 |
+
pending: 'bg-white/20',
|
| 58 |
+
active: 'bg-emerald-500',
|
| 59 |
+
completed: 'bg-emerald-500',
|
| 60 |
+
failed: 'bg-red-500',
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
// ─── Confidence Bar ──────────────────────────────────────────
|
| 64 |
+
|
| 65 |
+
const ConfidenceBar: React.FC<{ value: number }> = ({ value }) => (
|
| 66 |
+
<div className="flex items-center gap-2 mt-1">
|
| 67 |
+
<div className="flex-1 h-1 bg-white/5 rounded-full overflow-hidden">
|
| 68 |
+
<motion.div
|
| 69 |
+
className={cn(
|
| 70 |
+
"h-full rounded-full",
|
| 71 |
+
value >= 0.7 ? "bg-emerald-500" : value >= 0.4 ? "bg-amber-500" : "bg-red-400"
|
| 72 |
+
)}
|
| 73 |
+
initial={{ width: 0 }}
|
| 74 |
+
animate={{ width: `${Math.round(value * 100)}%` }}
|
| 75 |
+
transition={{ duration: 0.6, ease: "easeOut" }}
|
| 76 |
+
/>
|
| 77 |
+
</div>
|
| 78 |
+
<span className="text-[10px] font-mono text-white/30 w-8 text-right">
|
| 79 |
+
{Math.round(value * 100)}%
|
| 80 |
+
</span>
|
| 81 |
+
</div>
|
| 82 |
+
);
|
| 83 |
+
|
| 84 |
+
// ─── Mode Badge ──────────────────────────────────────────────
|
| 85 |
+
|
| 86 |
+
const ModeBadge: React.FC<{ mode: string }> = ({ mode }) => {
|
| 87 |
+
const config: Record<string, { label: string; color: string; icon: React.ElementType }> = {
|
| 88 |
+
direct: { label: 'Direct', color: 'bg-emerald-500/10 text-emerald-400 border-emerald-500/20', icon: Zap },
|
| 89 |
+
investigative: { label: 'Investigative', color: 'bg-cyan-500/10 text-cyan-400 border-cyan-500/20', icon: Search },
|
| 90 |
+
exploratory: { label: 'Exploratory', color: 'bg-violet-500/10 text-violet-400 border-violet-500/20', icon: FlaskConical },
|
| 91 |
+
};
|
| 92 |
+
const { label, color, icon: Icon } = config[mode] || config.direct;
|
| 93 |
+
|
| 94 |
+
return (
|
| 95 |
+
<span className={cn("inline-flex items-center gap-1.5 px-2 py-0.5 text-[10px] font-medium rounded-full border", color)}>
|
| 96 |
+
<Icon className="w-3 h-3" />
|
| 97 |
+
{label} Mode
|
| 98 |
+
</span>
|
| 99 |
+
);
|
| 100 |
+
};
|
| 101 |
+
|
| 102 |
+
// ─── Single Step Row ─────────────────────────────────────────
|
| 103 |
+
|
| 104 |
+
const StepRow: React.FC<{ step: PipelineStep; isLast: boolean }> = ({ step, isLast }) => {
|
| 105 |
+
const [expanded, setExpanded] = React.useState(false);
|
| 106 |
+
const Icon = stepIcons[step.type] || Zap;
|
| 107 |
+
const colorClass = stepColors[step.type] || stepColors.act;
|
| 108 |
+
const isActive = step.status === 'active';
|
| 109 |
+
const isCompleted = step.status === 'completed';
|
| 110 |
+
const isFailed = step.status === 'failed';
|
| 111 |
+
|
| 112 |
+
return (
|
| 113 |
+
<div className="relative">
|
| 114 |
+
{/* Connector line */}
|
| 115 |
+
{!isLast && (
|
| 116 |
+
<div className={cn(
|
| 117 |
+
"absolute left-4 top-10 w-px h-[calc(100%-16px)]",
|
| 118 |
+
isCompleted ? "bg-emerald-500/30" : "bg-white/5"
|
| 119 |
+
)} />
|
| 120 |
+
)}
|
| 121 |
+
|
| 122 |
+
<motion.div
|
| 123 |
+
initial={{ opacity: 0, x: -12 }}
|
| 124 |
+
animate={{ opacity: 1, x: 0 }}
|
| 125 |
+
transition={{ duration: 0.3 }}
|
| 126 |
+
className={cn(
|
| 127 |
+
"relative flex items-start gap-3 p-2 rounded-lg cursor-pointer transition-colors",
|
| 128 |
+
isActive && "bg-white/[0.03]",
|
| 129 |
+
expanded && "bg-white/[0.02]"
|
| 130 |
+
)}
|
| 131 |
+
onClick={() => step.detail && setExpanded(!expanded)}
|
| 132 |
+
>
|
| 133 |
+
{/* Icon circle */}
|
| 134 |
+
<div className={cn(
|
| 135 |
+
"w-8 h-8 rounded-lg flex items-center justify-center shrink-0 border",
|
| 136 |
+
colorClass,
|
| 137 |
+
isActive && "animate-pulse"
|
| 138 |
+
)}>
|
| 139 |
+
{isActive ? (
|
| 140 |
+
<Loader2 className="w-4 h-4 animate-spin" />
|
| 141 |
+
) : isCompleted ? (
|
| 142 |
+
<CheckCircle2 className="w-4 h-4 text-emerald-400" />
|
| 143 |
+
) : isFailed ? (
|
| 144 |
+
<XCircle className="w-4 h-4 text-red-400" />
|
| 145 |
+
) : (
|
| 146 |
+
<Icon className="w-4 h-4" />
|
| 147 |
+
)}
|
| 148 |
+
</div>
|
| 149 |
+
|
| 150 |
+
{/* Content */}
|
| 151 |
+
<div className="flex-1 min-w-0">
|
| 152 |
+
<div className="flex items-center gap-2">
|
| 153 |
+
<span className={cn(
|
| 154 |
+
"text-xs font-medium truncate",
|
| 155 |
+
isActive ? "text-white" : isCompleted ? "text-white/70" : "text-white/40"
|
| 156 |
+
)}>
|
| 157 |
+
{step.title}
|
| 158 |
+
</span>
|
| 159 |
+
{step.iteration && (
|
| 160 |
+
<span className="text-[10px] font-mono text-white/20 shrink-0">
|
| 161 |
+
#{step.iteration}
|
| 162 |
+
</span>
|
| 163 |
+
)}
|
| 164 |
+
{step.detail && (
|
| 165 |
+
expanded
|
| 166 |
+
? <ChevronUp className="w-3 h-3 text-white/20 shrink-0" />
|
| 167 |
+
: <ChevronDown className="w-3 h-3 text-white/20 shrink-0" />
|
| 168 |
+
)}
|
| 169 |
+
</div>
|
| 170 |
+
|
| 171 |
+
{step.subtitle && (
|
| 172 |
+
<p className="text-[11px] text-white/30 mt-0.5 truncate">
|
| 173 |
+
{step.subtitle}
|
| 174 |
+
</p>
|
| 175 |
+
)}
|
| 176 |
+
|
| 177 |
+
{step.confidence !== undefined && step.confidence > 0 && (
|
| 178 |
+
<ConfidenceBar value={step.confidence} />
|
| 179 |
+
)}
|
| 180 |
+
</div>
|
| 181 |
+
|
| 182 |
+
{/* Status dot */}
|
| 183 |
+
<div className={cn(
|
| 184 |
+
"w-2 h-2 rounded-full shrink-0 mt-2",
|
| 185 |
+
statusDotColors[step.status]
|
| 186 |
+
)} />
|
| 187 |
+
</motion.div>
|
| 188 |
+
|
| 189 |
+
{/* Expanded detail */}
|
| 190 |
+
<AnimatePresence>
|
| 191 |
+
{expanded && step.detail && (
|
| 192 |
+
<motion.div
|
| 193 |
+
initial={{ height: 0, opacity: 0 }}
|
| 194 |
+
animate={{ height: 'auto', opacity: 1 }}
|
| 195 |
+
exit={{ height: 0, opacity: 0 }}
|
| 196 |
+
transition={{ duration: 0.2 }}
|
| 197 |
+
className="overflow-hidden"
|
| 198 |
+
>
|
| 199 |
+
<div className="ml-11 mr-2 mb-2 p-2 rounded-lg bg-white/[0.02] border border-white/5">
|
| 200 |
+
<p className="text-[11px] text-white/40 leading-relaxed whitespace-pre-wrap">
|
| 201 |
+
{step.detail}
|
| 202 |
+
</p>
|
| 203 |
+
</div>
|
| 204 |
+
</motion.div>
|
| 205 |
+
)}
|
| 206 |
+
</AnimatePresence>
|
| 207 |
+
</div>
|
| 208 |
+
);
|
| 209 |
+
};
|
| 210 |
+
|
| 211 |
+
// ─── Hypotheses Panel ────────────────────────────────────────
|
| 212 |
+
|
| 213 |
+
const HypothesesPanel: React.FC<{ hypotheses: string[] }> = ({ hypotheses }) => {
|
| 214 |
+
const [collapsed, setCollapsed] = React.useState(false);
|
| 215 |
+
|
| 216 |
+
if (!hypotheses.length) return null;
|
| 217 |
+
|
| 218 |
+
return (
|
| 219 |
+
<div className="mb-3">
|
| 220 |
+
<button
|
| 221 |
+
onClick={() => setCollapsed(!collapsed)}
|
| 222 |
+
className="flex items-center gap-1.5 text-[10px] font-medium text-amber-400/70 hover:text-amber-400 transition-colors mb-1.5"
|
| 223 |
+
>
|
| 224 |
+
<Lightbulb className="w-3 h-3" />
|
| 225 |
+
<span>{hypotheses.length} Hypotheses</span>
|
| 226 |
+
{collapsed ? <ChevronDown className="w-3 h-3" /> : <ChevronUp className="w-3 h-3" />}
|
| 227 |
+
</button>
|
| 228 |
+
<AnimatePresence>
|
| 229 |
+
{!collapsed && (
|
| 230 |
+
<motion.div
|
| 231 |
+
initial={{ height: 0, opacity: 0 }}
|
| 232 |
+
animate={{ height: 'auto', opacity: 1 }}
|
| 233 |
+
exit={{ height: 0, opacity: 0 }}
|
| 234 |
+
className="overflow-hidden"
|
| 235 |
+
>
|
| 236 |
+
<div className="space-y-1 ml-4">
|
| 237 |
+
{hypotheses.map((h, i) => (
|
| 238 |
+
<div key={i} className="flex items-start gap-1.5">
|
| 239 |
+
<ArrowRight className="w-3 h-3 text-amber-500/30 mt-0.5 shrink-0" />
|
| 240 |
+
<span className="text-[11px] text-white/30">{h}</span>
|
| 241 |
+
</div>
|
| 242 |
+
))}
|
| 243 |
+
</div>
|
| 244 |
+
</motion.div>
|
| 245 |
+
)}
|
| 246 |
+
</AnimatePresence>
|
| 247 |
+
</div>
|
| 248 |
+
);
|
| 249 |
+
};
|
| 250 |
+
|
| 251 |
+
// ─── Main Pipeline View ──────────────────────────────────────
|
| 252 |
+
|
| 253 |
+
export const PipelineView: React.FC<PipelineViewProps> = ({
|
| 254 |
+
steps,
|
| 255 |
+
mode,
|
| 256 |
+
currentStep,
|
| 257 |
+
isActive,
|
| 258 |
+
hypotheses = [],
|
| 259 |
+
className
|
| 260 |
+
}) => {
|
| 261 |
+
// If no steps yet and not in reasoning mode, show the simple fallback
|
| 262 |
+
if (!steps.length && !mode) {
|
| 263 |
+
return (
|
| 264 |
+
<div className={cn("flex gap-4", className)}>
|
| 265 |
+
<div className="w-8 h-8 rounded-lg flex items-center justify-center shrink-0 bg-white/5 border border-white/10">
|
| 266 |
+
<Loader2 className="w-4 h-4 text-indigo-400 animate-spin" />
|
| 267 |
+
</div>
|
| 268 |
+
<div className="bg-white/[0.03] p-4 rounded-2xl border border-white/5">
|
| 269 |
+
<div className="flex items-center gap-3">
|
| 270 |
+
<div className="flex gap-1">
|
| 271 |
+
<span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.3s]" />
|
| 272 |
+
<span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.15s]" />
|
| 273 |
+
<span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce" />
|
| 274 |
+
</div>
|
| 275 |
+
<span className="text-sm text-white/60">
|
| 276 |
+
{currentStep || '🔧 Starting analysis...'}
|
| 277 |
+
</span>
|
| 278 |
+
</div>
|
| 279 |
+
</div>
|
| 280 |
+
</div>
|
| 281 |
+
);
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
// Count completed steps
|
| 285 |
+
const completedCount = steps.filter(s => s.status === 'completed').length;
|
| 286 |
+
const totalCount = steps.length;
|
| 287 |
+
const progressPct = totalCount > 0 ? (completedCount / totalCount) * 100 : 0;
|
| 288 |
+
|
| 289 |
+
return (
|
| 290 |
+
<div className={cn("flex gap-4", className)}>
|
| 291 |
+
{/* Bot avatar */}
|
| 292 |
+
<div className="w-8 h-8 rounded-lg flex items-center justify-center shrink-0 bg-white/5 border border-white/10">
|
| 293 |
+
<Brain className="w-4 h-4 text-cyan-400" />
|
| 294 |
+
</div>
|
| 295 |
+
|
| 296 |
+
{/* Pipeline card */}
|
| 297 |
+
<div className="flex-1 bg-white/[0.03] p-4 rounded-2xl border border-white/5 max-w-lg">
|
| 298 |
+
{/* Header */}
|
| 299 |
+
<div className="flex items-center justify-between mb-3">
|
| 300 |
+
<div className="flex items-center gap-2">
|
| 301 |
+
<span className="text-xs font-semibold text-white/80">Reasoning Pipeline</span>
|
| 302 |
+
{mode && <ModeBadge mode={mode} />}
|
| 303 |
+
</div>
|
| 304 |
+
{isActive && (
|
| 305 |
+
<div className="flex items-center gap-1.5 text-[10px] text-emerald-400">
|
| 306 |
+
<Loader2 className="w-3 h-3 animate-spin" />
|
| 307 |
+
<span>Running</span>
|
| 308 |
+
</div>
|
| 309 |
+
)}
|
| 310 |
+
</div>
|
| 311 |
+
|
| 312 |
+
{/* Progress bar */}
|
| 313 |
+
<div className="h-1 bg-white/5 rounded-full overflow-hidden mb-3">
|
| 314 |
+
<motion.div
|
| 315 |
+
className="h-full bg-gradient-to-r from-cyan-500 to-emerald-500 rounded-full"
|
| 316 |
+
initial={{ width: 0 }}
|
| 317 |
+
animate={{ width: `${progressPct}%` }}
|
| 318 |
+
transition={{ duration: 0.4, ease: "easeOut" }}
|
| 319 |
+
/>
|
| 320 |
+
</div>
|
| 321 |
+
|
| 322 |
+
{/* Hypotheses (exploratory mode) */}
|
| 323 |
+
{hypotheses.length > 0 && <HypothesesPanel hypotheses={hypotheses} />}
|
| 324 |
+
|
| 325 |
+
{/* Steps timeline */}
|
| 326 |
+
<div className="space-y-0.5 max-h-[320px] overflow-y-auto pr-1 scrollbar-thin scrollbar-thumb-white/5">
|
| 327 |
+
{steps.map((step, i) => (
|
| 328 |
+
<StepRow key={step.id} step={step} isLast={i === steps.length - 1} />
|
| 329 |
+
))}
|
| 330 |
+
</div>
|
| 331 |
+
|
| 332 |
+
{/* Footer summary */}
|
| 333 |
+
{!isActive && completedCount > 0 && (
|
| 334 |
+
<div className="mt-3 pt-2 border-t border-white/5 flex items-center justify-between">
|
| 335 |
+
<span className="text-[10px] text-white/20">
|
| 336 |
+
{completedCount} step{completedCount !== 1 ? 's' : ''} completed
|
| 337 |
+
</span>
|
| 338 |
+
<span className="text-[10px] text-white/20 font-mono">
|
| 339 |
+
{steps.filter(s => s.type === 'finding').length} finding{steps.filter(s => s.type === 'finding').length !== 1 ? 's' : ''}
|
| 340 |
+
</span>
|
| 341 |
+
</div>
|
| 342 |
+
)}
|
| 343 |
+
</div>
|
| 344 |
+
</div>
|
| 345 |
+
);
|
| 346 |
+
};
|
| 347 |
+
|
| 348 |
+
export default PipelineView;
|
src/orchestrator.py
CHANGED
|
@@ -21,6 +21,11 @@ from .tools.tools_registry import TOOLS, get_all_tool_names, get_tools_by_catego
|
|
| 21 |
from .tools.agent_tool_mapping import (get_tools_for_agent, filter_tools_by_names,
|
| 22 |
get_agent_description, suggest_next_agent)
|
| 23 |
from .reasoning.reasoning_trace import get_reasoning_trace, reset_reasoning_trace
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
from .session_memory import SessionMemory
|
| 25 |
from .session_store import SessionStore
|
| 26 |
from .workflow_state import WorkflowState
|
|
@@ -2898,6 +2903,526 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
|
|
| 2898 |
"task_type": result_data.get("task_type")
|
| 2899 |
})
|
| 2900 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2901 |
def analyze(self, file_path: str, task_description: str,
|
| 2902 |
target_col: Optional[str] = None,
|
| 2903 |
use_cache: bool = True,
|
|
@@ -3032,6 +3557,82 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
|
|
| 3032 |
print("✓ Using cached results")
|
| 3033 |
return cached
|
| 3034 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3035 |
# Build initial messages
|
| 3036 |
# Use dynamic prompts for small context models
|
| 3037 |
if self.use_compact_prompts:
|
|
|
|
| 21 |
from .tools.agent_tool_mapping import (get_tools_for_agent, filter_tools_by_names,
|
| 22 |
get_agent_description, suggest_next_agent)
|
| 23 |
from .reasoning.reasoning_trace import get_reasoning_trace, reset_reasoning_trace
|
| 24 |
+
from .reasoning.findings import FindingsAccumulator, Finding
|
| 25 |
+
from .reasoning.reasoner import Reasoner, ReasoningOutput
|
| 26 |
+
from .reasoning.evaluator import Evaluator, EvaluationOutput
|
| 27 |
+
from .reasoning.synthesizer import Synthesizer
|
| 28 |
+
from .routing.intent_classifier import IntentClassifier, IntentResult
|
| 29 |
from .session_memory import SessionMemory
|
| 30 |
from .session_store import SessionStore
|
| 31 |
from .workflow_state import WorkflowState
|
|
|
|
| 2903 |
"task_type": result_data.get("task_type")
|
| 2904 |
})
|
| 2905 |
|
| 2906 |
+
# ═══════════════════════════════════════════════════════════════════════════
|
| 2907 |
+
# REASONING LOOP INFRASTRUCTURE
|
| 2908 |
+
# Three new methods that power the hypothesis-driven analysis mode:
|
| 2909 |
+
# _llm_text_call → Provider-agnostic text LLM call (no tool schemas)
|
| 2910 |
+
# _get_tools_description → Lightweight text description of available tools
|
| 2911 |
+
# _run_reasoning_loop → The core Reason → Act → Evaluate → Loop/Stop cycle
|
| 2912 |
+
# ═══════════════════════════════════════════════════════════════════════════
|
| 2913 |
+
|
| 2914 |
+
def _llm_text_call(self, system_prompt: str, user_prompt: str, max_tokens: int = 2048) -> str:
|
| 2915 |
+
"""
|
| 2916 |
+
Simple text-only LLM call (no tool schemas).
|
| 2917 |
+
|
| 2918 |
+
Used by Reasoner, Evaluator, and Synthesizer for lightweight
|
| 2919 |
+
reasoning calls. Much cheaper than full tool-calling API calls.
|
| 2920 |
+
|
| 2921 |
+
Args:
|
| 2922 |
+
system_prompt: System prompt for the LLM
|
| 2923 |
+
user_prompt: User prompt for the LLM
|
| 2924 |
+
max_tokens: Maximum response tokens
|
| 2925 |
+
|
| 2926 |
+
Returns:
|
| 2927 |
+
Plain text response from the LLM
|
| 2928 |
+
"""
|
| 2929 |
+
messages = [
|
| 2930 |
+
{"role": "system", "content": system_prompt},
|
| 2931 |
+
{"role": "user", "content": user_prompt}
|
| 2932 |
+
]
|
| 2933 |
+
|
| 2934 |
+
# Rate limiting
|
| 2935 |
+
if self.min_api_call_interval > 0:
|
| 2936 |
+
time_since_last_call = time.time() - self.last_api_call_time
|
| 2937 |
+
if time_since_last_call < self.min_api_call_interval:
|
| 2938 |
+
wait_time = self.min_api_call_interval - time_since_last_call
|
| 2939 |
+
time.sleep(wait_time)
|
| 2940 |
+
|
| 2941 |
+
try:
|
| 2942 |
+
if self.provider == "mistral":
|
| 2943 |
+
if hasattr(self.mistral_client, 'chat') and hasattr(self.mistral_client.chat, 'complete'):
|
| 2944 |
+
response = self.mistral_client.chat.complete(
|
| 2945 |
+
model=self.model,
|
| 2946 |
+
messages=messages,
|
| 2947 |
+
temperature=0.1,
|
| 2948 |
+
max_tokens=max_tokens
|
| 2949 |
+
)
|
| 2950 |
+
else:
|
| 2951 |
+
response = self.mistral_client.chat(
|
| 2952 |
+
model=self.model,
|
| 2953 |
+
messages=messages,
|
| 2954 |
+
temperature=0.1,
|
| 2955 |
+
max_tokens=max_tokens
|
| 2956 |
+
)
|
| 2957 |
+
self.api_calls_made += 1
|
| 2958 |
+
self.last_api_call_time = time.time()
|
| 2959 |
+
|
| 2960 |
+
if hasattr(response, 'usage') and response.usage:
|
| 2961 |
+
self.tokens_this_minute += response.usage.total_tokens
|
| 2962 |
+
|
| 2963 |
+
return self._extract_content_text(response.choices[0].message.content)
|
| 2964 |
+
|
| 2965 |
+
elif self.provider == "groq":
|
| 2966 |
+
response = self.groq_client.chat.completions.create(
|
| 2967 |
+
model=self.model,
|
| 2968 |
+
messages=messages,
|
| 2969 |
+
temperature=0.1,
|
| 2970 |
+
max_tokens=max_tokens
|
| 2971 |
+
)
|
| 2972 |
+
self.api_calls_made += 1
|
| 2973 |
+
self.last_api_call_time = time.time()
|
| 2974 |
+
|
| 2975 |
+
if hasattr(response, 'usage') and response.usage:
|
| 2976 |
+
self.tokens_this_minute += response.usage.total_tokens
|
| 2977 |
+
|
| 2978 |
+
return self._extract_content_text(response.choices[0].message.content)
|
| 2979 |
+
|
| 2980 |
+
elif self.provider == "gemini":
|
| 2981 |
+
full_prompt = f"{system_prompt}\n\n{user_prompt}"
|
| 2982 |
+
response = self.gemini_model.generate_content(
|
| 2983 |
+
full_prompt,
|
| 2984 |
+
generation_config={
|
| 2985 |
+
"temperature": 0.1,
|
| 2986 |
+
"max_output_tokens": max_tokens
|
| 2987 |
+
}
|
| 2988 |
+
)
|
| 2989 |
+
self.api_calls_made += 1
|
| 2990 |
+
self.last_api_call_time = time.time()
|
| 2991 |
+
return response.text
|
| 2992 |
+
|
| 2993 |
+
else:
|
| 2994 |
+
raise ValueError(f"Unsupported provider: {self.provider}")
|
| 2995 |
+
|
| 2996 |
+
except Exception as e:
|
| 2997 |
+
error_str = str(e)
|
| 2998 |
+
# Handle rate limits
|
| 2999 |
+
if "429" in error_str or "rate_limit" in error_str.lower():
|
| 3000 |
+
print(f"⏳ Rate limit in reasoning call, waiting 10s...")
|
| 3001 |
+
time.sleep(10)
|
| 3002 |
+
return self._llm_text_call(system_prompt, user_prompt, max_tokens)
|
| 3003 |
+
raise
|
| 3004 |
+
|
| 3005 |
+
def _get_tools_description(self, tool_names: Optional[List[str]] = None) -> str:
|
| 3006 |
+
"""
|
| 3007 |
+
Build a lightweight text description of available tools.
|
| 3008 |
+
|
| 3009 |
+
Used in Reasoner prompts instead of sending full JSON tool schemas.
|
| 3010 |
+
This is much more token-efficient than the OpenAI tools format.
|
| 3011 |
+
|
| 3012 |
+
Args:
|
| 3013 |
+
tool_names: Optional list of tool names to include (None = all tools)
|
| 3014 |
+
|
| 3015 |
+
Returns:
|
| 3016 |
+
Formatted text like:
|
| 3017 |
+
- profile_dataset(file_path): Profile a dataset to understand structure
|
| 3018 |
+
- analyze_correlations(file_path, target_col): Analyze column correlations
|
| 3019 |
+
...
|
| 3020 |
+
"""
|
| 3021 |
+
import inspect
|
| 3022 |
+
|
| 3023 |
+
lines = []
|
| 3024 |
+
tool_map = self.tool_functions
|
| 3025 |
+
|
| 3026 |
+
# Filter to specific tools if requested
|
| 3027 |
+
if tool_names:
|
| 3028 |
+
tool_map = {k: v for k, v in tool_map.items() if k in tool_names}
|
| 3029 |
+
|
| 3030 |
+
for name, func in sorted(tool_map.items()):
|
| 3031 |
+
# Get function signature
|
| 3032 |
+
try:
|
| 3033 |
+
sig = inspect.signature(func)
|
| 3034 |
+
params = []
|
| 3035 |
+
for param_name, param in sig.parameters.items():
|
| 3036 |
+
if param_name in ("kwargs", "args"):
|
| 3037 |
+
continue
|
| 3038 |
+
if param.default is inspect.Parameter.empty:
|
| 3039 |
+
params.append(param_name)
|
| 3040 |
+
else:
|
| 3041 |
+
params.append(f"{param_name}=...")
|
| 3042 |
+
params_str = ", ".join(params[:5]) # Max 5 params shown
|
| 3043 |
+
if len(sig.parameters) > 5:
|
| 3044 |
+
params_str += ", ..."
|
| 3045 |
+
except (ValueError, TypeError):
|
| 3046 |
+
params_str = "..."
|
| 3047 |
+
|
| 3048 |
+
# Get first line of docstring
|
| 3049 |
+
doc = (func.__doc__ or "").strip().split("\n")[0][:100]
|
| 3050 |
+
|
| 3051 |
+
lines.append(f"- {name}({params_str}): {doc}")
|
| 3052 |
+
|
| 3053 |
+
return "\n".join(lines)
|
| 3054 |
+
|
| 3055 |
+
def _run_reasoning_loop(
|
| 3056 |
+
self,
|
| 3057 |
+
question: str,
|
| 3058 |
+
file_path: str,
|
| 3059 |
+
dataset_info: Dict[str, Any],
|
| 3060 |
+
target_col: Optional[str] = None,
|
| 3061 |
+
mode: str = "investigative",
|
| 3062 |
+
max_iterations: int = 7,
|
| 3063 |
+
tool_names: Optional[List[str]] = None
|
| 3064 |
+
) -> Dict[str, Any]:
|
| 3065 |
+
"""
|
| 3066 |
+
Run the Reasoning Loop: Reason → Act → Evaluate → Loop/Stop → Synthesize.
|
| 3067 |
+
|
| 3068 |
+
This is the core of the hypothesis-driven analysis mode.
|
| 3069 |
+
Instead of a pipeline, the agent:
|
| 3070 |
+
1. REASONS about what to investigate next
|
| 3071 |
+
2. ACTS (executes one tool)
|
| 3072 |
+
3. EVALUATES the result
|
| 3073 |
+
4. Decides to LOOP (investigate more) or STOP
|
| 3074 |
+
5. SYNTHESIZES all findings into a coherent answer
|
| 3075 |
+
|
| 3076 |
+
Args:
|
| 3077 |
+
question: User's question or "Analyze this data"
|
| 3078 |
+
file_path: Path to the dataset
|
| 3079 |
+
dataset_info: Schema info from local extraction
|
| 3080 |
+
target_col: Optional target column
|
| 3081 |
+
mode: "investigative" or "exploratory"
|
| 3082 |
+
max_iterations: Max reasoning iterations (default 7)
|
| 3083 |
+
tool_names: Optional subset of tools to use
|
| 3084 |
+
|
| 3085 |
+
Returns:
|
| 3086 |
+
Dict with status, summary, findings, workflow_history, etc.
|
| 3087 |
+
"""
|
| 3088 |
+
start_time = time.time()
|
| 3089 |
+
|
| 3090 |
+
# Initialize reasoning components (pass our LLM caller)
|
| 3091 |
+
reasoner = Reasoner(llm_caller=self._llm_text_call)
|
| 3092 |
+
evaluator = Evaluator(llm_caller=self._llm_text_call)
|
| 3093 |
+
synthesizer = Synthesizer(llm_caller=self._llm_text_call)
|
| 3094 |
+
findings = FindingsAccumulator(question=question, mode=mode)
|
| 3095 |
+
|
| 3096 |
+
# Get tools description for the reasoner
|
| 3097 |
+
tools_desc = self._get_tools_description(tool_names)
|
| 3098 |
+
|
| 3099 |
+
# Track for API response
|
| 3100 |
+
workflow_history = []
|
| 3101 |
+
current_file = file_path # Tracks the latest output file
|
| 3102 |
+
|
| 3103 |
+
# Emit mode info for UI
|
| 3104 |
+
if hasattr(self, 'session') and self.session:
|
| 3105 |
+
progress_manager.emit(self.session.session_id, {
|
| 3106 |
+
'type': 'reasoning_mode',
|
| 3107 |
+
'mode': mode,
|
| 3108 |
+
'message': f"🧠 Reasoning Loop activated ({mode} mode)",
|
| 3109 |
+
'question': question
|
| 3110 |
+
})
|
| 3111 |
+
|
| 3112 |
+
print(f"\n{'='*60}")
|
| 3113 |
+
print(f"🧠 REASONING LOOP ({mode.upper()} mode)")
|
| 3114 |
+
print(f" Question: {question}")
|
| 3115 |
+
print(f" Max iterations: {max_iterations}")
|
| 3116 |
+
print(f"{'='*60}")
|
| 3117 |
+
|
| 3118 |
+
# ── EXPLORATORY MODE: Generate hypotheses first ──
|
| 3119 |
+
if mode == "exploratory":
|
| 3120 |
+
print(f"\n🔬 Generating hypotheses from data profile...")
|
| 3121 |
+
|
| 3122 |
+
# Profile the dataset first if not already done
|
| 3123 |
+
profile_result = self._execute_tool("profile_dataset", {"file_path": file_path})
|
| 3124 |
+
profile_summary = ""
|
| 3125 |
+
if profile_result.get("success", True):
|
| 3126 |
+
profile_summary = json.dumps(
|
| 3127 |
+
self._compress_tool_result("profile_dataset",
|
| 3128 |
+
self._make_json_serializable(profile_result)),
|
| 3129 |
+
default=str
|
| 3130 |
+
)[:2000]
|
| 3131 |
+
|
| 3132 |
+
workflow_history.append({
|
| 3133 |
+
"iteration": 0,
|
| 3134 |
+
"tool": "profile_dataset",
|
| 3135 |
+
"arguments": {"file_path": file_path},
|
| 3136 |
+
"result": profile_result
|
| 3137 |
+
})
|
| 3138 |
+
self._update_workflow_state("profile_dataset", profile_result)
|
| 3139 |
+
|
| 3140 |
+
# Generate hypotheses
|
| 3141 |
+
hypotheses = reasoner.generate_hypotheses(
|
| 3142 |
+
dataset_info=dataset_info,
|
| 3143 |
+
file_path=file_path,
|
| 3144 |
+
target_col=target_col,
|
| 3145 |
+
profile_summary=profile_summary
|
| 3146 |
+
)
|
| 3147 |
+
|
| 3148 |
+
print(f" Generated {len(hypotheses)} hypotheses:")
|
| 3149 |
+
for i, h in enumerate(hypotheses):
|
| 3150 |
+
text = h.get("text", str(h))
|
| 3151 |
+
priority = h.get("priority", 0.5)
|
| 3152 |
+
findings.add_hypothesis(text, priority=priority, source_iteration=0)
|
| 3153 |
+
print(f" {i+1}. [{priority:.1f}] {text}")
|
| 3154 |
+
|
| 3155 |
+
# Emit hypothesis info
|
| 3156 |
+
if hasattr(self, 'session') and self.session:
|
| 3157 |
+
progress_manager.emit(self.session.session_id, {
|
| 3158 |
+
'type': 'hypotheses_generated',
|
| 3159 |
+
'hypotheses': [h.get("text", str(h)) for h in hypotheses],
|
| 3160 |
+
'count': len(hypotheses)
|
| 3161 |
+
})
|
| 3162 |
+
|
| 3163 |
+
# ── MAIN REASONING LOOP ──
|
| 3164 |
+
for iteration in range(1, max_iterations + 1):
|
| 3165 |
+
print(f"\n── Iteration {iteration}/{max_iterations} ──")
|
| 3166 |
+
|
| 3167 |
+
# STEP 1: REASON - What should we investigate next?
|
| 3168 |
+
print(f"🤔 REASON: Deciding next action...")
|
| 3169 |
+
|
| 3170 |
+
reasoning_output = reasoner.reason(
|
| 3171 |
+
question=question,
|
| 3172 |
+
dataset_info=dataset_info,
|
| 3173 |
+
findings=findings,
|
| 3174 |
+
available_tools=tools_desc,
|
| 3175 |
+
file_path=current_file,
|
| 3176 |
+
target_col=target_col
|
| 3177 |
+
)
|
| 3178 |
+
|
| 3179 |
+
print(f" Status: {reasoning_output.status}")
|
| 3180 |
+
print(f" Reasoning: {reasoning_output.reasoning}")
|
| 3181 |
+
|
| 3182 |
+
# Check if done
|
| 3183 |
+
if reasoning_output.status == "done":
|
| 3184 |
+
print(f"✅ Reasoner says: DONE (confidence: {reasoning_output.confidence:.0%})")
|
| 3185 |
+
print(f" Reason: {reasoning_output.reasoning}")
|
| 3186 |
+
break
|
| 3187 |
+
|
| 3188 |
+
tool_name = reasoning_output.tool_name
|
| 3189 |
+
tool_args = reasoning_output.arguments
|
| 3190 |
+
hypothesis = reasoning_output.hypothesis
|
| 3191 |
+
|
| 3192 |
+
if not tool_name or tool_name not in self.tool_functions:
|
| 3193 |
+
print(f"⚠️ Invalid tool: {tool_name}, skipping iteration")
|
| 3194 |
+
continue
|
| 3195 |
+
|
| 3196 |
+
print(f" Tool: {tool_name}")
|
| 3197 |
+
print(f" Hypothesis: {hypothesis}")
|
| 3198 |
+
|
| 3199 |
+
# Emit reasoning step for UI
|
| 3200 |
+
if hasattr(self, 'session') and self.session:
|
| 3201 |
+
progress_manager.emit(self.session.session_id, {
|
| 3202 |
+
'type': 'reasoning_step',
|
| 3203 |
+
'iteration': iteration,
|
| 3204 |
+
'tool': tool_name,
|
| 3205 |
+
'hypothesis': hypothesis,
|
| 3206 |
+
'reasoning': reasoning_output.reasoning
|
| 3207 |
+
})
|
| 3208 |
+
|
| 3209 |
+
# STEP 2: ACT - Execute the tool
|
| 3210 |
+
print(f"⚡ ACT: Executing {tool_name}...")
|
| 3211 |
+
|
| 3212 |
+
# Emit tool execution event
|
| 3213 |
+
if hasattr(self, 'session') and self.session:
|
| 3214 |
+
progress_manager.emit(self.session.session_id, {
|
| 3215 |
+
'type': 'tool_executing',
|
| 3216 |
+
'tool': tool_name,
|
| 3217 |
+
'message': f"🔧 Executing: {tool_name}",
|
| 3218 |
+
'arguments': tool_args
|
| 3219 |
+
})
|
| 3220 |
+
|
| 3221 |
+
tool_result = self._execute_tool(tool_name, tool_args)
|
| 3222 |
+
|
| 3223 |
+
# Track output file for next iteration
|
| 3224 |
+
if tool_result.get("success", True):
|
| 3225 |
+
result_data = tool_result.get("result", {})
|
| 3226 |
+
if isinstance(result_data, dict):
|
| 3227 |
+
new_file = result_data.get("output_file") or result_data.get("output_path")
|
| 3228 |
+
if new_file:
|
| 3229 |
+
current_file = new_file
|
| 3230 |
+
|
| 3231 |
+
# Emit success
|
| 3232 |
+
if hasattr(self, 'session') and self.session:
|
| 3233 |
+
progress_manager.emit(self.session.session_id, {
|
| 3234 |
+
'type': 'tool_completed',
|
| 3235 |
+
'tool': tool_name,
|
| 3236 |
+
'message': f"✓ Completed: {tool_name}"
|
| 3237 |
+
})
|
| 3238 |
+
print(f" ✓ Tool completed successfully")
|
| 3239 |
+
else:
|
| 3240 |
+
error_msg = tool_result.get("error", "Unknown error")
|
| 3241 |
+
print(f" ❌ Tool failed: {error_msg}")
|
| 3242 |
+
if hasattr(self, 'session') and self.session:
|
| 3243 |
+
progress_manager.emit(self.session.session_id, {
|
| 3244 |
+
'type': 'tool_failed',
|
| 3245 |
+
'tool': tool_name,
|
| 3246 |
+
'message': f"❌ FAILED: {tool_name}",
|
| 3247 |
+
'error': error_msg
|
| 3248 |
+
})
|
| 3249 |
+
|
| 3250 |
+
# Track in workflow history
|
| 3251 |
+
workflow_history.append({
|
| 3252 |
+
"iteration": iteration,
|
| 3253 |
+
"tool": tool_name,
|
| 3254 |
+
"arguments": tool_args,
|
| 3255 |
+
"result": tool_result
|
| 3256 |
+
})
|
| 3257 |
+
|
| 3258 |
+
# Update workflow state
|
| 3259 |
+
self._update_workflow_state(tool_name, tool_result)
|
| 3260 |
+
|
| 3261 |
+
# Checkpoint
|
| 3262 |
+
if tool_result.get("success", True):
|
| 3263 |
+
session_id = self.http_session_key or "default"
|
| 3264 |
+
self.recovery_manager.checkpoint_manager.save_checkpoint(
|
| 3265 |
+
session_id=session_id,
|
| 3266 |
+
workflow_state={
|
| 3267 |
+
'iteration': iteration,
|
| 3268 |
+
'workflow_history': workflow_history,
|
| 3269 |
+
'current_file': file_path,
|
| 3270 |
+
'task_description': question,
|
| 3271 |
+
'target_col': target_col
|
| 3272 |
+
},
|
| 3273 |
+
last_tool=tool_name,
|
| 3274 |
+
iteration=iteration
|
| 3275 |
+
)
|
| 3276 |
+
|
| 3277 |
+
# STEP 3: EVALUATE - What did we learn?
|
| 3278 |
+
print(f"📊 EVALUATE: Interpreting results...")
|
| 3279 |
+
|
| 3280 |
+
evaluation = evaluator.evaluate(
|
| 3281 |
+
question=question,
|
| 3282 |
+
tool_name=tool_name,
|
| 3283 |
+
arguments=tool_args,
|
| 3284 |
+
result=tool_result,
|
| 3285 |
+
findings=findings,
|
| 3286 |
+
result_compressor=lambda tn, r: self._compress_tool_result(
|
| 3287 |
+
tn, self._make_json_serializable(r)
|
| 3288 |
+
)
|
| 3289 |
+
)
|
| 3290 |
+
|
| 3291 |
+
print(f" Interpretation: {evaluation.interpretation}")
|
| 3292 |
+
print(f" Answered: {evaluation.answered} (confidence: {evaluation.confidence:.0%})")
|
| 3293 |
+
print(f" Should stop: {evaluation.should_stop}")
|
| 3294 |
+
if evaluation.next_questions:
|
| 3295 |
+
print(f" Next questions: {evaluation.next_questions}")
|
| 3296 |
+
|
| 3297 |
+
# Build finding and add to accumulator
|
| 3298 |
+
compressed_result = json.dumps(
|
| 3299 |
+
self._compress_tool_result(tool_name, self._make_json_serializable(tool_result)),
|
| 3300 |
+
default=str
|
| 3301 |
+
)
|
| 3302 |
+
|
| 3303 |
+
finding = evaluator.build_finding(
|
| 3304 |
+
iteration=iteration,
|
| 3305 |
+
hypothesis=hypothesis,
|
| 3306 |
+
tool_name=tool_name,
|
| 3307 |
+
arguments=tool_args,
|
| 3308 |
+
result_summary=compressed_result,
|
| 3309 |
+
evaluation=evaluation
|
| 3310 |
+
)
|
| 3311 |
+
findings.add_finding(finding)
|
| 3312 |
+
|
| 3313 |
+
# Emit finding for UI
|
| 3314 |
+
if hasattr(self, 'session') and self.session:
|
| 3315 |
+
progress_manager.emit(self.session.session_id, {
|
| 3316 |
+
'type': 'finding_discovered',
|
| 3317 |
+
'iteration': iteration,
|
| 3318 |
+
'interpretation': evaluation.interpretation,
|
| 3319 |
+
'confidence': evaluation.confidence,
|
| 3320 |
+
'answered': evaluation.answered
|
| 3321 |
+
})
|
| 3322 |
+
|
| 3323 |
+
# Check if we should stop
|
| 3324 |
+
if evaluation.should_stop:
|
| 3325 |
+
print(f"\n✅ Evaluator says: STOP (confidence: {evaluation.confidence:.0%})")
|
| 3326 |
+
break
|
| 3327 |
+
|
| 3328 |
+
# ── STEP 4: SYNTHESIZE - Build the final answer ──
|
| 3329 |
+
print(f"\n{'='*60}")
|
| 3330 |
+
print(f"📝 SYNTHESIZE: Building final answer from {len(findings.findings)} findings...")
|
| 3331 |
+
print(f"{'='*60}")
|
| 3332 |
+
|
| 3333 |
+
# Collect artifacts from workflow history
|
| 3334 |
+
artifacts = self._collect_artifacts(workflow_history)
|
| 3335 |
+
|
| 3336 |
+
# Generate synthesis
|
| 3337 |
+
if mode == "exploratory":
|
| 3338 |
+
summary_text = synthesizer.synthesize_exploratory(
|
| 3339 |
+
findings=findings,
|
| 3340 |
+
artifacts=artifacts
|
| 3341 |
+
)
|
| 3342 |
+
else:
|
| 3343 |
+
summary_text = synthesizer.synthesize(
|
| 3344 |
+
findings=findings,
|
| 3345 |
+
artifacts=artifacts
|
| 3346 |
+
)
|
| 3347 |
+
|
| 3348 |
+
# Also generate enhanced summary for plots/metrics extraction
|
| 3349 |
+
try:
|
| 3350 |
+
enhanced = self._generate_enhanced_summary(
|
| 3351 |
+
workflow_history, summary_text, question
|
| 3352 |
+
)
|
| 3353 |
+
plots_data = enhanced.get("plots", [])
|
| 3354 |
+
metrics_data = enhanced.get("metrics", {})
|
| 3355 |
+
artifacts_data = enhanced.get("artifacts", {})
|
| 3356 |
+
except Exception as e:
|
| 3357 |
+
print(f"⚠️ Enhanced summary generation failed: {e}")
|
| 3358 |
+
plots_data = []
|
| 3359 |
+
metrics_data = {}
|
| 3360 |
+
artifacts_data = {}
|
| 3361 |
+
|
| 3362 |
+
# Save to session
|
| 3363 |
+
if self.session:
|
| 3364 |
+
self.session.add_conversation(question, summary_text)
|
| 3365 |
+
self.session_store.save(self.session)
|
| 3366 |
+
|
| 3367 |
+
result = {
|
| 3368 |
+
"status": "success",
|
| 3369 |
+
"summary": summary_text,
|
| 3370 |
+
"metrics": metrics_data,
|
| 3371 |
+
"artifacts": artifacts_data,
|
| 3372 |
+
"plots": plots_data,
|
| 3373 |
+
"workflow_history": workflow_history,
|
| 3374 |
+
"findings": findings.to_dict(),
|
| 3375 |
+
"reasoning_trace": self.reasoning_trace.get_trace(),
|
| 3376 |
+
"reasoning_summary": self.reasoning_trace.get_trace_summary(),
|
| 3377 |
+
"execution_mode": mode,
|
| 3378 |
+
"iterations": findings.iteration_count,
|
| 3379 |
+
"api_calls": self.api_calls_made,
|
| 3380 |
+
"execution_time": round(time.time() - start_time, 2)
|
| 3381 |
+
}
|
| 3382 |
+
|
| 3383 |
+
print(f"\n✅ Reasoning loop completed in {result['execution_time']}s")
|
| 3384 |
+
print(f" Iterations: {findings.iteration_count}")
|
| 3385 |
+
print(f" Tools used: {', '.join(findings.tools_used)}")
|
| 3386 |
+
print(f" API calls: {self.api_calls_made}")
|
| 3387 |
+
|
| 3388 |
+
return result
|
| 3389 |
+
|
| 3390 |
+
def _collect_artifacts(self, workflow_history: List[Dict]) -> Dict[str, Any]:
|
| 3391 |
+
"""Collect plots, files, and other artifacts from workflow history."""
|
| 3392 |
+
plots = []
|
| 3393 |
+
files = []
|
| 3394 |
+
|
| 3395 |
+
for step in workflow_history:
|
| 3396 |
+
result = step.get("result", {})
|
| 3397 |
+
if not isinstance(result, dict):
|
| 3398 |
+
continue
|
| 3399 |
+
|
| 3400 |
+
result_data = result.get("result", result)
|
| 3401 |
+
if isinstance(result_data, dict):
|
| 3402 |
+
# Collect output files
|
| 3403 |
+
for key in ["output_file", "output_path", "report_path"]:
|
| 3404 |
+
if key in result_data and result_data[key]:
|
| 3405 |
+
files.append(result_data[key])
|
| 3406 |
+
|
| 3407 |
+
# Collect plots
|
| 3408 |
+
if "plots" in result_data:
|
| 3409 |
+
for plot in result_data["plots"]:
|
| 3410 |
+
if isinstance(plot, dict):
|
| 3411 |
+
plots.append(plot)
|
| 3412 |
+
elif isinstance(plot, str):
|
| 3413 |
+
plots.append({"path": plot, "title": step.get("tool", "Plot")})
|
| 3414 |
+
|
| 3415 |
+
# Check for HTML files (interactive plots)
|
| 3416 |
+
for key in ["html_path", "dashboard_path"]:
|
| 3417 |
+
if key in result_data and result_data[key]:
|
| 3418 |
+
plots.append({
|
| 3419 |
+
"path": result_data[key],
|
| 3420 |
+
"title": step.get("tool", "Interactive Plot"),
|
| 3421 |
+
"type": "html"
|
| 3422 |
+
})
|
| 3423 |
+
|
| 3424 |
+
return {"plots": plots, "files": files}
|
| 3425 |
+
|
| 3426 |
def analyze(self, file_path: str, task_description: str,
|
| 3427 |
target_col: Optional[str] = None,
|
| 3428 |
use_cache: bool = True,
|
|
|
|
| 3557 |
print("✓ Using cached results")
|
| 3558 |
return cached
|
| 3559 |
|
| 3560 |
+
# ═══════════════════════════════════════════════════════════════════════
|
| 3561 |
+
# 🧠 INTENT CLASSIFICATION → MODE SELECTION
|
| 3562 |
+
# Classify the user's request into one of three execution modes:
|
| 3563 |
+
# DIRECT: "Make a scatter plot" → existing pipeline
|
| 3564 |
+
# INVESTIGATIVE: "Why are customers churning?" → reasoning loop
|
| 3565 |
+
# EXPLORATORY: "Analyze this data" → hypothesis-driven loop
|
| 3566 |
+
# ═══════════════════════════════════════════════════════════════════════
|
| 3567 |
+
intent_classifier = IntentClassifier()
|
| 3568 |
+
intent_result = intent_classifier.classify(
|
| 3569 |
+
query=task_description,
|
| 3570 |
+
dataset_info=schema_info if 'error' not in schema_info else None,
|
| 3571 |
+
has_target_col=bool(target_col)
|
| 3572 |
+
)
|
| 3573 |
+
|
| 3574 |
+
print(f"\n🎯 Intent Classification:")
|
| 3575 |
+
print(f" Mode: {intent_result.mode.upper()}")
|
| 3576 |
+
print(f" Confidence: {intent_result.confidence:.0%}")
|
| 3577 |
+
print(f" Reasoning: {intent_result.reasoning}")
|
| 3578 |
+
print(f" Sub-intent: {intent_result.sub_intent}")
|
| 3579 |
+
|
| 3580 |
+
# Emit intent info for UI
|
| 3581 |
+
if hasattr(self, 'session') and self.session:
|
| 3582 |
+
progress_manager.emit(self.session.session_id, {
|
| 3583 |
+
'type': 'intent_classified',
|
| 3584 |
+
'mode': intent_result.mode,
|
| 3585 |
+
'confidence': intent_result.confidence,
|
| 3586 |
+
'reasoning': intent_result.reasoning,
|
| 3587 |
+
'sub_intent': intent_result.sub_intent
|
| 3588 |
+
})
|
| 3589 |
+
|
| 3590 |
+
# 📝 Record intent classification in reasoning trace
|
| 3591 |
+
self.reasoning_trace.trace_history.append({
|
| 3592 |
+
"type": "intent_classification",
|
| 3593 |
+
"query": task_description,
|
| 3594 |
+
"mode": intent_result.mode,
|
| 3595 |
+
"confidence": intent_result.confidence,
|
| 3596 |
+
"reasoning": intent_result.reasoning,
|
| 3597 |
+
"sub_intent": intent_result.sub_intent
|
| 3598 |
+
})
|
| 3599 |
+
|
| 3600 |
+
# ═══════════════════════════════════════════════════════════════════════
|
| 3601 |
+
# 🧠 REASONING LOOP PATH (Investigative / Exploratory modes)
|
| 3602 |
+
# ═══════════════════════════════════════════════════════════════════════
|
| 3603 |
+
if intent_result.mode in ("investigative", "exploratory"):
|
| 3604 |
+
print(f"\n🧠 Routing to REASONING LOOP ({intent_result.mode} mode)")
|
| 3605 |
+
|
| 3606 |
+
# Determine iteration count based on mode and reasoning effort
|
| 3607 |
+
if intent_result.mode == "exploratory":
|
| 3608 |
+
loop_max = min(max_iterations, 8) # Exploratory gets more iterations
|
| 3609 |
+
else:
|
| 3610 |
+
loop_max = min(max_iterations, 6) # Investigative is more focused
|
| 3611 |
+
|
| 3612 |
+
reasoning_result = self._run_reasoning_loop(
|
| 3613 |
+
question=task_description,
|
| 3614 |
+
file_path=file_path,
|
| 3615 |
+
dataset_info=schema_info if 'error' not in schema_info else {},
|
| 3616 |
+
target_col=target_col,
|
| 3617 |
+
mode=intent_result.mode,
|
| 3618 |
+
max_iterations=loop_max
|
| 3619 |
+
)
|
| 3620 |
+
|
| 3621 |
+
# Cache the result
|
| 3622 |
+
if use_cache and reasoning_result.get("status") == "success":
|
| 3623 |
+
self.cache.set(cache_key, reasoning_result, metadata={
|
| 3624 |
+
"file_path": file_path,
|
| 3625 |
+
"task": task_description,
|
| 3626 |
+
"mode": intent_result.mode
|
| 3627 |
+
})
|
| 3628 |
+
|
| 3629 |
+
return reasoning_result
|
| 3630 |
+
|
| 3631 |
+
# ═══════════════════════════════════════════════════════════════════════
|
| 3632 |
+
# 📋 DIRECT MODE PATH (existing pipeline - below is unchanged)
|
| 3633 |
+
# ═══════════════════════════════════════════════════════════════════════
|
| 3634 |
+
print(f"\n📋 Routing to DIRECT pipeline mode")
|
| 3635 |
+
|
| 3636 |
# Build initial messages
|
| 3637 |
# Use dynamic prompts for small context models
|
| 3638 |
if self.use_compact_prompts:
|
src/reasoning/__init__.py
CHANGED
|
@@ -17,7 +17,16 @@ Architecture:
|
|
| 17 |
|
| 18 |
Tool: "Here's what I found: {stats}"
|
| 19 |
Reasoning: "Based on these stats, this means..."
|
|
|
|
|
|
|
|
|
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
Usage:
|
| 22 |
from reasoning import get_reasoner
|
| 23 |
|
|
@@ -25,6 +34,12 @@ Usage:
|
|
| 25 |
result = reasoner.explain_data(
|
| 26 |
summary={"rows": 1000, "columns": 20, "missing": 50}
|
| 27 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
"""
|
| 29 |
|
| 30 |
import os
|
|
|
|
| 17 |
|
| 18 |
Tool: "Here's what I found: {stats}"
|
| 19 |
Reasoning: "Based on these stats, this means..."
|
| 20 |
+
|
| 21 |
+
Reasoning Loop (NEW):
|
| 22 |
+
REASON → ACT → EVALUATE → LOOP/STOP → SYNTHESIZE
|
| 23 |
|
| 24 |
+
Modules:
|
| 25 |
+
- findings.py: Accumulated evidence state (step tracker + decision ledger)
|
| 26 |
+
- reasoner.py: REASON step - picks next investigation action
|
| 27 |
+
- evaluator.py: EVALUATE step - interprets results, decides continue/stop
|
| 28 |
+
- synthesizer.py: SYNTHESIZE step - builds final answer from evidence
|
| 29 |
+
|
| 30 |
Usage:
|
| 31 |
from reasoning import get_reasoner
|
| 32 |
|
|
|
|
| 34 |
result = reasoner.explain_data(
|
| 35 |
summary={"rows": 1000, "columns": 20, "missing": 50}
|
| 36 |
)
|
| 37 |
+
|
| 38 |
+
# Reasoning Loop components:
|
| 39 |
+
from reasoning.findings import FindingsAccumulator
|
| 40 |
+
from reasoning.reasoner import Reasoner
|
| 41 |
+
from reasoning.evaluator import Evaluator
|
| 42 |
+
from reasoning.synthesizer import Synthesizer
|
| 43 |
"""
|
| 44 |
|
| 45 |
import os
|
src/reasoning/evaluator.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Evaluator Module - The EVALUATE step of the Reasoning Loop.
|
| 3 |
+
|
| 4 |
+
Interprets tool results and decides:
|
| 5 |
+
- What did we learn from this action?
|
| 6 |
+
- Does this answer the user's question?
|
| 7 |
+
- Should we continue investigating or stop?
|
| 8 |
+
- What follow-up questions emerged?
|
| 9 |
+
|
| 10 |
+
The Evaluator transforms raw tool output into understanding.
|
| 11 |
+
|
| 12 |
+
Architecture:
|
| 13 |
+
Tool Result → Evaluator.evaluate() → EvaluationOutput
|
| 14 |
+
- interpretation: natural language explanation
|
| 15 |
+
- answered: did this answer the question?
|
| 16 |
+
- confidence: how confident are we?
|
| 17 |
+
- should_stop: should the loop stop?
|
| 18 |
+
- next_questions: what to investigate next
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import json
|
| 22 |
+
import re
|
| 23 |
+
from dataclasses import dataclass, field
|
| 24 |
+
from typing import Dict, Any, List, Optional, Callable
|
| 25 |
+
|
| 26 |
+
from .findings import Finding, FindingsAccumulator
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class EvaluationOutput:
|
| 31 |
+
"""Output from one EVALUATE step."""
|
| 32 |
+
interpretation: str # What we learned from the tool result
|
| 33 |
+
answered: bool # Does this answer the user's question?
|
| 34 |
+
confidence: float # 0.0-1.0 confidence
|
| 35 |
+
should_stop: bool # Should the reasoning loop stop?
|
| 36 |
+
next_questions: List[str] # Follow-up questions to investigate
|
| 37 |
+
key_metric: Optional[str] = None # Most important metric extracted
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
EVALUATOR_SYSTEM_PROMPT = """You are a senior data scientist interpreting analysis results.
|
| 41 |
+
|
| 42 |
+
Your job:
|
| 43 |
+
1. Interpret what the tool result MEANS (not just what it shows)
|
| 44 |
+
2. Decide if this answers the user's original question
|
| 45 |
+
3. Identify follow-up questions worth investigating
|
| 46 |
+
4. Assign confidence level to your interpretation
|
| 47 |
+
|
| 48 |
+
Be concise but insightful. Focus on:
|
| 49 |
+
- Statistical significance (not just numbers)
|
| 50 |
+
- Business implications (not just patterns)
|
| 51 |
+
- Confounders and caveats
|
| 52 |
+
- What's surprising vs expected
|
| 53 |
+
|
| 54 |
+
CRITICAL: Output ONLY valid JSON, no other text."""
|
| 55 |
+
|
| 56 |
+
EVALUATOR_USER_TEMPLATE = """**User's original question**: {question}
|
| 57 |
+
|
| 58 |
+
**Action taken**: {tool_name}({arguments})
|
| 59 |
+
|
| 60 |
+
**Tool result** (compressed):
|
| 61 |
+
{result_summary}
|
| 62 |
+
|
| 63 |
+
**What we knew before this step**:
|
| 64 |
+
{prior_findings}
|
| 65 |
+
|
| 66 |
+
Evaluate this result. Respond with ONLY this JSON:
|
| 67 |
+
{{
|
| 68 |
+
"interpretation": "1-3 sentences: What does this result MEAN for answering the question?",
|
| 69 |
+
"answered": true/false,
|
| 70 |
+
"confidence": 0.0-1.0,
|
| 71 |
+
"should_stop": true/false,
|
| 72 |
+
"next_questions": ["follow-up question 1", "follow-up question 2"],
|
| 73 |
+
"key_metric": "most important number or finding (optional)"
|
| 74 |
+
}}
|
| 75 |
+
|
| 76 |
+
Guidelines for should_stop:
|
| 77 |
+
- true: Question is fully answered OR we've gathered enough evidence OR no more useful actions
|
| 78 |
+
- false: Important aspects remain uninvestigated
|
| 79 |
+
|
| 80 |
+
Guidelines for confidence:
|
| 81 |
+
- 0.0-0.3: Weak evidence, need more investigation
|
| 82 |
+
- 0.3-0.6: Moderate evidence, some aspects unclear
|
| 83 |
+
- 0.6-0.8: Strong evidence, minor questions remain
|
| 84 |
+
- 0.8-1.0: Very strong evidence, question well answered"""
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class Evaluator:
|
| 88 |
+
"""
|
| 89 |
+
The EVALUATE step of the Reasoning Loop.
|
| 90 |
+
|
| 91 |
+
Takes a tool result and interprets it in the context of
|
| 92 |
+
the user's question and prior findings.
|
| 93 |
+
|
| 94 |
+
Usage:
|
| 95 |
+
evaluator = Evaluator(llm_caller=orchestrator._llm_text_call)
|
| 96 |
+
evaluation = evaluator.evaluate(
|
| 97 |
+
question="Why are customers churning?",
|
| 98 |
+
tool_name="analyze_correlations",
|
| 99 |
+
arguments={"file_path": "data.csv", "target_col": "churn"},
|
| 100 |
+
result=tool_result,
|
| 101 |
+
findings=findings_accumulator
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
if evaluation.should_stop:
|
| 105 |
+
# Move to synthesis
|
| 106 |
+
...
|
| 107 |
+
else:
|
| 108 |
+
# Continue reasoning loop
|
| 109 |
+
...
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
def __init__(self, llm_caller: Callable):
|
| 113 |
+
"""
|
| 114 |
+
Args:
|
| 115 |
+
llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
|
| 116 |
+
"""
|
| 117 |
+
self.llm_caller = llm_caller
|
| 118 |
+
|
| 119 |
+
def evaluate(
|
| 120 |
+
self,
|
| 121 |
+
question: str,
|
| 122 |
+
tool_name: str,
|
| 123 |
+
arguments: Dict[str, Any],
|
| 124 |
+
result: Dict[str, Any],
|
| 125 |
+
findings: FindingsAccumulator,
|
| 126 |
+
result_compressor: Optional[Callable] = None
|
| 127 |
+
) -> EvaluationOutput:
|
| 128 |
+
"""
|
| 129 |
+
Evaluate a tool result.
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
question: User's original question
|
| 133 |
+
tool_name: Name of the tool that was executed
|
| 134 |
+
arguments: Tool arguments used
|
| 135 |
+
result: Raw tool result dict
|
| 136 |
+
findings: Accumulated findings so far
|
| 137 |
+
result_compressor: Optional function to compress tool results
|
| 138 |
+
|
| 139 |
+
Returns:
|
| 140 |
+
EvaluationOutput with interpretation and next steps
|
| 141 |
+
"""
|
| 142 |
+
# Compress the result for LLM consumption
|
| 143 |
+
if result_compressor:
|
| 144 |
+
result_summary = json.dumps(result_compressor(tool_name, result), default=str)
|
| 145 |
+
else:
|
| 146 |
+
result_summary = self._default_compress(result)
|
| 147 |
+
|
| 148 |
+
# Truncate if too long
|
| 149 |
+
if len(result_summary) > 3000:
|
| 150 |
+
result_summary = result_summary[:3000] + "... [truncated]"
|
| 151 |
+
|
| 152 |
+
# Build argument string
|
| 153 |
+
args_str = json.dumps(arguments, default=str)
|
| 154 |
+
if len(args_str) > 500:
|
| 155 |
+
args_str = args_str[:500] + "..."
|
| 156 |
+
|
| 157 |
+
user_prompt = EVALUATOR_USER_TEMPLATE.format(
|
| 158 |
+
question=question,
|
| 159 |
+
tool_name=tool_name,
|
| 160 |
+
arguments=args_str,
|
| 161 |
+
result_summary=result_summary,
|
| 162 |
+
prior_findings=findings.get_context_for_reasoning(max_findings=3)
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
response_text = self.llm_caller(
|
| 166 |
+
system_prompt=EVALUATOR_SYSTEM_PROMPT,
|
| 167 |
+
user_prompt=user_prompt,
|
| 168 |
+
max_tokens=1024
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
return self._parse_response(response_text, result_summary)
|
| 172 |
+
|
| 173 |
+
def build_finding(
|
| 174 |
+
self,
|
| 175 |
+
iteration: int,
|
| 176 |
+
hypothesis: str,
|
| 177 |
+
tool_name: str,
|
| 178 |
+
arguments: Dict[str, Any],
|
| 179 |
+
result_summary: str,
|
| 180 |
+
evaluation: "EvaluationOutput"
|
| 181 |
+
) -> Finding:
|
| 182 |
+
"""
|
| 183 |
+
Build a Finding from a completed iteration.
|
| 184 |
+
|
| 185 |
+
Convenience method that combines the action and evaluation
|
| 186 |
+
into a single Finding for the accumulator.
|
| 187 |
+
"""
|
| 188 |
+
return Finding(
|
| 189 |
+
iteration=iteration,
|
| 190 |
+
hypothesis=hypothesis,
|
| 191 |
+
action=tool_name,
|
| 192 |
+
arguments=arguments,
|
| 193 |
+
result_summary=result_summary[:1000], # Cap size
|
| 194 |
+
interpretation=evaluation.interpretation,
|
| 195 |
+
confidence=evaluation.confidence,
|
| 196 |
+
answered_question=evaluation.answered,
|
| 197 |
+
next_questions=evaluation.next_questions
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
def _parse_response(self, response_text: str, result_summary: str) -> EvaluationOutput:
|
| 201 |
+
"""Parse LLM response into EvaluationOutput."""
|
| 202 |
+
try:
|
| 203 |
+
data = json.loads(response_text.strip())
|
| 204 |
+
except json.JSONDecodeError:
|
| 205 |
+
# Try to extract JSON
|
| 206 |
+
json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
|
| 207 |
+
if json_match:
|
| 208 |
+
try:
|
| 209 |
+
data = json.loads(json_match.group(0))
|
| 210 |
+
except json.JSONDecodeError:
|
| 211 |
+
return self._fallback_evaluation(response_text, result_summary)
|
| 212 |
+
else:
|
| 213 |
+
return self._fallback_evaluation(response_text, result_summary)
|
| 214 |
+
|
| 215 |
+
return EvaluationOutput(
|
| 216 |
+
interpretation=data.get("interpretation", "Result processed."),
|
| 217 |
+
answered=data.get("answered", False),
|
| 218 |
+
confidence=min(1.0, max(0.0, float(data.get("confidence", 0.3)))),
|
| 219 |
+
should_stop=data.get("should_stop", False),
|
| 220 |
+
next_questions=data.get("next_questions", []),
|
| 221 |
+
key_metric=data.get("key_metric")
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
def _fallback_evaluation(self, response_text: str, result_summary: str) -> EvaluationOutput:
|
| 225 |
+
"""Fallback when JSON parsing fails."""
|
| 226 |
+
# Use the raw response as interpretation
|
| 227 |
+
interpretation = response_text.strip()[:500] if response_text else "Analysis step completed."
|
| 228 |
+
|
| 229 |
+
return EvaluationOutput(
|
| 230 |
+
interpretation=interpretation,
|
| 231 |
+
answered=False,
|
| 232 |
+
confidence=0.3,
|
| 233 |
+
should_stop=False,
|
| 234 |
+
next_questions=[],
|
| 235 |
+
key_metric=None
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
def _default_compress(self, result: Dict[str, Any]) -> str:
|
| 239 |
+
"""Default compression for tool results."""
|
| 240 |
+
if not isinstance(result, dict):
|
| 241 |
+
return str(result)[:2000]
|
| 242 |
+
|
| 243 |
+
compressed = {}
|
| 244 |
+
|
| 245 |
+
# Always include status
|
| 246 |
+
if "success" in result:
|
| 247 |
+
compressed["success"] = result["success"]
|
| 248 |
+
if "error" in result:
|
| 249 |
+
compressed["error"] = str(result["error"])[:300]
|
| 250 |
+
|
| 251 |
+
# Include key result fields
|
| 252 |
+
result_data = result.get("result", result)
|
| 253 |
+
if isinstance(result_data, dict):
|
| 254 |
+
for key in ["num_rows", "num_columns", "missing_percentage", "task_type",
|
| 255 |
+
"best_model", "best_score", "models", "correlations",
|
| 256 |
+
"output_file", "output_path", "plots", "summary",
|
| 257 |
+
"total_issues", "columns_affected", "features_created",
|
| 258 |
+
"accuracy", "r2_score", "rmse", "f1_score"]:
|
| 259 |
+
if key in result_data:
|
| 260 |
+
value = result_data[key]
|
| 261 |
+
# Truncate long values
|
| 262 |
+
if isinstance(value, (list, dict)):
|
| 263 |
+
compressed[key] = str(value)[:500]
|
| 264 |
+
else:
|
| 265 |
+
compressed[key] = value
|
| 266 |
+
|
| 267 |
+
return json.dumps(compressed, default=str)
|
src/reasoning/findings.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Findings Accumulator - Core state for the Reasoning Loop.
|
| 3 |
+
|
| 4 |
+
Tracks everything discovered during investigation:
|
| 5 |
+
- Individual findings (action + result + interpretation)
|
| 6 |
+
- Hypotheses being tested
|
| 7 |
+
- Decision ledger (why each action was taken)
|
| 8 |
+
- Confidence tracking
|
| 9 |
+
|
| 10 |
+
This replaces the need for separate "step tracker" and "decision ledger" -
|
| 11 |
+
they're natural byproducts of the accumulated findings.
|
| 12 |
+
|
| 13 |
+
Architecture:
|
| 14 |
+
ReasoningLoop iteration 1: Reason → Act → Evaluate → Finding #1
|
| 15 |
+
ReasoningLoop iteration 2: Reason → Act → Evaluate → Finding #2
|
| 16 |
+
...
|
| 17 |
+
Synthesizer reads all findings → produces final answer
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from dataclasses import dataclass, field
|
| 21 |
+
from typing import List, Dict, Any, Optional
|
| 22 |
+
from datetime import datetime
|
| 23 |
+
import json
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class Finding:
|
| 28 |
+
"""A single finding from one reasoning loop iteration."""
|
| 29 |
+
iteration: int
|
| 30 |
+
hypothesis: str # What we were testing
|
| 31 |
+
action: str # Tool name executed
|
| 32 |
+
arguments: Dict[str, Any] # Tool arguments used
|
| 33 |
+
result_summary: str # Compressed result (what tool returned)
|
| 34 |
+
interpretation: str # What we learned from this result
|
| 35 |
+
confidence: float # 0.0-1.0 confidence in this finding
|
| 36 |
+
answered_question: bool # Did this iteration answer the user's question?
|
| 37 |
+
next_questions: List[str] # Follow-up questions generated
|
| 38 |
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
| 39 |
+
|
| 40 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 41 |
+
return {
|
| 42 |
+
"iteration": self.iteration,
|
| 43 |
+
"hypothesis": self.hypothesis,
|
| 44 |
+
"action": self.action,
|
| 45 |
+
"arguments": self.arguments,
|
| 46 |
+
"result_summary": self.result_summary,
|
| 47 |
+
"interpretation": self.interpretation,
|
| 48 |
+
"confidence": self.confidence,
|
| 49 |
+
"answered": self.answered_question,
|
| 50 |
+
"next_questions": self.next_questions,
|
| 51 |
+
"timestamp": self.timestamp
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@dataclass
|
| 56 |
+
class Hypothesis:
|
| 57 |
+
"""A hypothesis being tested during exploration."""
|
| 58 |
+
text: str
|
| 59 |
+
status: str = "untested" # untested, testing, supported, refuted, inconclusive
|
| 60 |
+
evidence_for: List[str] = field(default_factory=list)
|
| 61 |
+
evidence_against: List[str] = field(default_factory=list)
|
| 62 |
+
priority: float = 0.5 # 0.0-1.0, higher = investigate first
|
| 63 |
+
source_iteration: int = 0 # Which iteration generated this hypothesis
|
| 64 |
+
|
| 65 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 66 |
+
return {
|
| 67 |
+
"text": self.text,
|
| 68 |
+
"status": self.status,
|
| 69 |
+
"evidence_for": self.evidence_for,
|
| 70 |
+
"evidence_against": self.evidence_against,
|
| 71 |
+
"priority": self.priority,
|
| 72 |
+
"source_iteration": self.source_iteration
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class FindingsAccumulator:
|
| 77 |
+
"""
|
| 78 |
+
Accumulates findings across the reasoning loop.
|
| 79 |
+
|
| 80 |
+
This is the central state object that the Reasoner reads from and
|
| 81 |
+
the Evaluator writes to. It serves as:
|
| 82 |
+
- Step tracker (each finding records what was done)
|
| 83 |
+
- Decision ledger (each finding records WHY it was done)
|
| 84 |
+
- Evidence accumulator (interpretations build the answer)
|
| 85 |
+
- Hypothesis manager (for exploratory analysis)
|
| 86 |
+
|
| 87 |
+
Usage:
|
| 88 |
+
findings = FindingsAccumulator(question="Why are customers churning?")
|
| 89 |
+
|
| 90 |
+
# After each iteration:
|
| 91 |
+
findings.add_finding(Finding(
|
| 92 |
+
iteration=1,
|
| 93 |
+
hypothesis="High churn correlates with low engagement",
|
| 94 |
+
action="analyze_correlations",
|
| 95 |
+
arguments={"file_path": "data.csv", "target_col": "churn"},
|
| 96 |
+
result_summary="Found 0.72 correlation between login_frequency and churn",
|
| 97 |
+
interpretation="Strong evidence: infrequent logins predict churn",
|
| 98 |
+
confidence=0.8,
|
| 99 |
+
answered_question=False,
|
| 100 |
+
next_questions=["Is there a threshold for login frequency?"]
|
| 101 |
+
))
|
| 102 |
+
|
| 103 |
+
# For the Reasoner prompt:
|
| 104 |
+
context = findings.get_context_for_reasoning()
|
| 105 |
+
|
| 106 |
+
# For the Synthesizer:
|
| 107 |
+
all_findings = findings.get_all_findings()
|
| 108 |
+
"""
|
| 109 |
+
|
| 110 |
+
def __init__(self, question: str, mode: str = "investigative"):
|
| 111 |
+
"""
|
| 112 |
+
Initialize findings accumulator.
|
| 113 |
+
|
| 114 |
+
Args:
|
| 115 |
+
question: The user's original question
|
| 116 |
+
mode: "investigative" or "exploratory"
|
| 117 |
+
"""
|
| 118 |
+
self.question = question
|
| 119 |
+
self.mode = mode
|
| 120 |
+
self.findings: List[Finding] = []
|
| 121 |
+
self.hypotheses: List[Hypothesis] = []
|
| 122 |
+
self.tools_used: List[str] = []
|
| 123 |
+
self.files_produced: List[str] = []
|
| 124 |
+
self.is_answered = False
|
| 125 |
+
self.answer_confidence = 0.0
|
| 126 |
+
self.started_at = datetime.now().isoformat()
|
| 127 |
+
|
| 128 |
+
@property
|
| 129 |
+
def iteration_count(self) -> int:
|
| 130 |
+
"""Number of completed iterations."""
|
| 131 |
+
return len(self.findings)
|
| 132 |
+
|
| 133 |
+
def add_finding(self, finding: Finding):
|
| 134 |
+
"""Add a finding from a completed iteration."""
|
| 135 |
+
self.findings.append(finding)
|
| 136 |
+
|
| 137 |
+
if finding.action not in self.tools_used:
|
| 138 |
+
self.tools_used.append(finding.action)
|
| 139 |
+
|
| 140 |
+
# Track answer progress
|
| 141 |
+
if finding.answered_question:
|
| 142 |
+
self.is_answered = True
|
| 143 |
+
self.answer_confidence = max(self.answer_confidence, finding.confidence)
|
| 144 |
+
|
| 145 |
+
# Add new hypotheses from next_questions
|
| 146 |
+
for q in finding.next_questions:
|
| 147 |
+
if not any(h.text == q for h in self.hypotheses):
|
| 148 |
+
self.hypotheses.append(Hypothesis(
|
| 149 |
+
text=q,
|
| 150 |
+
status="untested",
|
| 151 |
+
priority=0.5,
|
| 152 |
+
source_iteration=finding.iteration
|
| 153 |
+
))
|
| 154 |
+
|
| 155 |
+
def add_hypothesis(self, text: str, priority: float = 0.5, source_iteration: int = 0):
|
| 156 |
+
"""Add a hypothesis to test."""
|
| 157 |
+
if not any(h.text == text for h in self.hypotheses):
|
| 158 |
+
self.hypotheses.append(Hypothesis(
|
| 159 |
+
text=text,
|
| 160 |
+
status="untested",
|
| 161 |
+
priority=priority,
|
| 162 |
+
source_iteration=source_iteration
|
| 163 |
+
))
|
| 164 |
+
|
| 165 |
+
def update_hypothesis(self, text: str, status: str, evidence: str, is_supporting: bool = True):
|
| 166 |
+
"""Update a hypothesis with new evidence."""
|
| 167 |
+
for h in self.hypotheses:
|
| 168 |
+
if h.text == text:
|
| 169 |
+
h.status = status
|
| 170 |
+
if is_supporting:
|
| 171 |
+
h.evidence_for.append(evidence)
|
| 172 |
+
else:
|
| 173 |
+
h.evidence_against.append(evidence)
|
| 174 |
+
return
|
| 175 |
+
|
| 176 |
+
def get_untested_hypotheses(self) -> List[Hypothesis]:
|
| 177 |
+
"""Get hypotheses that haven't been tested yet, sorted by priority."""
|
| 178 |
+
untested = [h for h in self.hypotheses if h.status == "untested"]
|
| 179 |
+
return sorted(untested, key=lambda h: h.priority, reverse=True)
|
| 180 |
+
|
| 181 |
+
def get_last_output_file(self) -> Optional[str]:
|
| 182 |
+
"""Get the most recent output file from tool results."""
|
| 183 |
+
for finding in reversed(self.findings):
|
| 184 |
+
# Check if result mentions an output file
|
| 185 |
+
result = finding.result_summary
|
| 186 |
+
if "output_file" in result or "output_path" in result:
|
| 187 |
+
try:
|
| 188 |
+
# Try to parse as JSON
|
| 189 |
+
result_dict = json.loads(result) if isinstance(result, str) else result
|
| 190 |
+
return result_dict.get("output_file") or result_dict.get("output_path")
|
| 191 |
+
except (json.JSONDecodeError, TypeError):
|
| 192 |
+
pass
|
| 193 |
+
# Check arguments for file paths
|
| 194 |
+
for key in ["file_path", "input_path"]:
|
| 195 |
+
if key in finding.arguments:
|
| 196 |
+
return finding.arguments[key]
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
def get_context_for_reasoning(self, max_findings: int = 5) -> str:
|
| 200 |
+
"""
|
| 201 |
+
Build context string for the Reasoner's prompt.
|
| 202 |
+
|
| 203 |
+
Returns a concise summary of what's been discovered so far,
|
| 204 |
+
formatted for LLM consumption.
|
| 205 |
+
|
| 206 |
+
Args:
|
| 207 |
+
max_findings: Maximum number of recent findings to include
|
| 208 |
+
"""
|
| 209 |
+
if not self.findings:
|
| 210 |
+
return "No investigations completed yet. This is the first step."
|
| 211 |
+
|
| 212 |
+
parts = []
|
| 213 |
+
|
| 214 |
+
# Summary of what's been done
|
| 215 |
+
parts.append(f"**Investigations completed**: {len(self.findings)}")
|
| 216 |
+
parts.append(f"**Tools used**: {', '.join(self.tools_used)}")
|
| 217 |
+
|
| 218 |
+
# Recent findings (most relevant for next decision)
|
| 219 |
+
recent = self.findings[-max_findings:]
|
| 220 |
+
parts.append("\n**Recent findings**:")
|
| 221 |
+
for f in recent:
|
| 222 |
+
parts.append(
|
| 223 |
+
f" Step {f.iteration}: Ran `{f.action}` to test: \"{f.hypothesis}\"\n"
|
| 224 |
+
f" → Result: {f.interpretation}\n"
|
| 225 |
+
f" → Confidence: {f.confidence:.0%}"
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
# Unanswered questions
|
| 229 |
+
untested = self.get_untested_hypotheses()
|
| 230 |
+
if untested:
|
| 231 |
+
parts.append(f"\n**Open questions** ({len(untested)} remaining):")
|
| 232 |
+
for h in untested[:3]:
|
| 233 |
+
parts.append(f" - {h.text} (priority: {h.priority:.1f})")
|
| 234 |
+
|
| 235 |
+
# Overall progress
|
| 236 |
+
if self.is_answered:
|
| 237 |
+
parts.append(f"\n**Status**: Question partially answered (confidence: {self.answer_confidence:.0%})")
|
| 238 |
+
else:
|
| 239 |
+
parts.append(f"\n**Status**: Still investigating")
|
| 240 |
+
|
| 241 |
+
return "\n".join(parts)
|
| 242 |
+
|
| 243 |
+
def get_context_for_synthesis(self) -> str:
|
| 244 |
+
"""
|
| 245 |
+
Build context string for the Synthesizer.
|
| 246 |
+
|
| 247 |
+
Returns the complete investigative history with all findings
|
| 248 |
+
and hypothesis statuses.
|
| 249 |
+
"""
|
| 250 |
+
parts = []
|
| 251 |
+
|
| 252 |
+
parts.append(f"**Original question**: {self.question}")
|
| 253 |
+
parts.append(f"**Mode**: {self.mode}")
|
| 254 |
+
parts.append(f"**Total iterations**: {len(self.findings)}")
|
| 255 |
+
parts.append(f"**Tools used**: {', '.join(self.tools_used)}")
|
| 256 |
+
|
| 257 |
+
# All findings in order
|
| 258 |
+
parts.append("\n## Investigation Steps\n")
|
| 259 |
+
for f in self.findings:
|
| 260 |
+
parts.append(
|
| 261 |
+
f"### Step {f.iteration}: {f.action}\n"
|
| 262 |
+
f"**Hypothesis**: {f.hypothesis}\n"
|
| 263 |
+
f"**Arguments**: {json.dumps(f.arguments, default=str)}\n"
|
| 264 |
+
f"**Result**: {f.result_summary}\n"
|
| 265 |
+
f"**Interpretation**: {f.interpretation}\n"
|
| 266 |
+
f"**Confidence**: {f.confidence:.0%}\n"
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
# Hypothesis outcomes (for exploratory mode)
|
| 270 |
+
if self.hypotheses:
|
| 271 |
+
parts.append("\n## Hypothesis Outcomes\n")
|
| 272 |
+
for h in self.hypotheses:
|
| 273 |
+
status_emoji = {
|
| 274 |
+
"supported": "✅",
|
| 275 |
+
"refuted": "❌",
|
| 276 |
+
"inconclusive": "❓",
|
| 277 |
+
"testing": "🔄",
|
| 278 |
+
"untested": "⬜"
|
| 279 |
+
}.get(h.status, "⬜")
|
| 280 |
+
|
| 281 |
+
parts.append(f"{status_emoji} **{h.text}** → {h.status}")
|
| 282 |
+
if h.evidence_for:
|
| 283 |
+
parts.append(f" Evidence for: {'; '.join(h.evidence_for)}")
|
| 284 |
+
if h.evidence_against:
|
| 285 |
+
parts.append(f" Evidence against: {'; '.join(h.evidence_against)}")
|
| 286 |
+
|
| 287 |
+
return "\n".join(parts)
|
| 288 |
+
|
| 289 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 290 |
+
"""Serialize for API response / session storage."""
|
| 291 |
+
return {
|
| 292 |
+
"question": self.question,
|
| 293 |
+
"mode": self.mode,
|
| 294 |
+
"iteration_count": self.iteration_count,
|
| 295 |
+
"is_answered": self.is_answered,
|
| 296 |
+
"answer_confidence": self.answer_confidence,
|
| 297 |
+
"tools_used": self.tools_used,
|
| 298 |
+
"files_produced": self.files_produced,
|
| 299 |
+
"findings": [f.to_dict() for f in self.findings],
|
| 300 |
+
"hypotheses": [h.to_dict() for h in self.hypotheses],
|
| 301 |
+
"started_at": self.started_at
|
| 302 |
+
}
|
src/reasoning/reasoner.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Reasoner Module - The REASON step of the Reasoning Loop.
|
| 3 |
+
|
| 4 |
+
Decides what to investigate next based on:
|
| 5 |
+
- The user's original question
|
| 6 |
+
- What we've discovered so far (findings)
|
| 7 |
+
- Available tools
|
| 8 |
+
- Dataset schema
|
| 9 |
+
|
| 10 |
+
The Reasoner does NOT execute anything. It only produces a structured
|
| 11 |
+
decision about what action to take next.
|
| 12 |
+
|
| 13 |
+
Architecture:
|
| 14 |
+
Reasoner.reason() → ReasoningOutput
|
| 15 |
+
- status: "investigating" | "done"
|
| 16 |
+
- reasoning: why this action (decision ledger entry)
|
| 17 |
+
- tool_name: which tool to run
|
| 18 |
+
- arguments: tool arguments
|
| 19 |
+
- hypothesis: what we're testing
|
| 20 |
+
|
| 21 |
+
This replaces the old approach where a massive system prompt told the LLM
|
| 22 |
+
"follow steps 1-15." Instead, the Reasoner makes a strategic decision
|
| 23 |
+
each iteration based on what it's learned so far.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import json
|
| 27 |
+
import re
|
| 28 |
+
from dataclasses import dataclass, field
|
| 29 |
+
from typing import Dict, Any, List, Optional, Callable
|
| 30 |
+
|
| 31 |
+
from .findings import FindingsAccumulator
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@dataclass
|
| 35 |
+
class ReasoningOutput:
|
| 36 |
+
"""Output from one REASON step."""
|
| 37 |
+
status: str # "investigating" or "done"
|
| 38 |
+
reasoning: str # Why this action was chosen
|
| 39 |
+
tool_name: Optional[str] # Tool to execute (None if done)
|
| 40 |
+
arguments: Dict[str, Any] # Tool arguments
|
| 41 |
+
hypothesis: str # What we're testing with this action
|
| 42 |
+
confidence: float = 0.0 # How confident the reasoner is (0-1)
|
| 43 |
+
|
| 44 |
+
@classmethod
|
| 45 |
+
def done(cls, reasoning: str, confidence: float = 0.8) -> "ReasoningOutput":
|
| 46 |
+
"""Create a 'done' output (no more investigation needed)."""
|
| 47 |
+
return cls(
|
| 48 |
+
status="done",
|
| 49 |
+
reasoning=reasoning,
|
| 50 |
+
tool_name=None,
|
| 51 |
+
arguments={},
|
| 52 |
+
hypothesis="",
|
| 53 |
+
confidence=confidence
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# System prompt for the Reasoner LLM call
|
| 58 |
+
REASONER_SYSTEM_PROMPT = """You are a senior data scientist. Your job is to decide the SINGLE MOST IMPORTANT next investigation step.
|
| 59 |
+
|
| 60 |
+
You are given:
|
| 61 |
+
1. The user's question
|
| 62 |
+
2. What has been discovered so far
|
| 63 |
+
3. The dataset schema
|
| 64 |
+
4. Available tools
|
| 65 |
+
|
| 66 |
+
Your task: Decide ONE action to take next. Be strategic:
|
| 67 |
+
- Start with understanding (profiling, correlations) before acting
|
| 68 |
+
- Test the most impactful hypothesis first
|
| 69 |
+
- Don't repeat actions that have already been done
|
| 70 |
+
- Stop when you have enough evidence to answer the question confidently
|
| 71 |
+
|
| 72 |
+
CRITICAL RULES:
|
| 73 |
+
- Output ONLY valid JSON, no other text
|
| 74 |
+
- Use EXACT tool names from the available tools list
|
| 75 |
+
- Use EXACT column names from the dataset schema
|
| 76 |
+
- The file_path argument should use the most recent output file when available
|
| 77 |
+
- For visualization, pick the chart type that best answers the question
|
| 78 |
+
- NEVER hallucinate column names - use only columns from the schema"""
|
| 79 |
+
|
| 80 |
+
REASONER_USER_TEMPLATE = """**User's question**: {question}
|
| 81 |
+
|
| 82 |
+
**Dataset info**:
|
| 83 |
+
- File: {file_path}
|
| 84 |
+
- Rows: {num_rows:,} | Columns: {num_columns}
|
| 85 |
+
- Numeric columns: {numeric_columns}
|
| 86 |
+
- Categorical columns: {categorical_columns}
|
| 87 |
+
{target_info}
|
| 88 |
+
|
| 89 |
+
**Investigation so far**:
|
| 90 |
+
{findings_context}
|
| 91 |
+
|
| 92 |
+
**Available tools**:
|
| 93 |
+
{tools_description}
|
| 94 |
+
|
| 95 |
+
Decide the next action. Respond with ONLY this JSON:
|
| 96 |
+
{{
|
| 97 |
+
"status": "investigating" or "done",
|
| 98 |
+
"reasoning": "1-2 sentence explanation of why this action is needed",
|
| 99 |
+
"tool_name": "exact_tool_name",
|
| 100 |
+
"arguments": {{"arg1": "value1", "arg2": "value2"}},
|
| 101 |
+
"hypothesis": "what we expect to learn from this action"
|
| 102 |
+
}}
|
| 103 |
+
|
| 104 |
+
If you have enough evidence to answer the user's question, respond:
|
| 105 |
+
{{
|
| 106 |
+
"status": "done",
|
| 107 |
+
"reasoning": "We have sufficient evidence because...",
|
| 108 |
+
"tool_name": null,
|
| 109 |
+
"arguments": {{}},
|
| 110 |
+
"hypothesis": ""
|
| 111 |
+
}}"""
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# System prompt for generating hypotheses (Exploratory mode)
|
| 115 |
+
HYPOTHESIS_SYSTEM_PROMPT = """You are a senior data scientist examining a dataset for the first time.
|
| 116 |
+
Given the dataset profile, generate 3-5 hypotheses worth investigating.
|
| 117 |
+
|
| 118 |
+
Focus on:
|
| 119 |
+
- Surprising patterns (unexpected correlations, outliers)
|
| 120 |
+
- Business-relevant relationships (what drives the target variable?)
|
| 121 |
+
- Data quality issues that could affect analysis
|
| 122 |
+
- Distribution anomalies
|
| 123 |
+
|
| 124 |
+
Output ONLY valid JSON array of hypotheses, ranked by priority (most interesting first)."""
|
| 125 |
+
|
| 126 |
+
HYPOTHESIS_USER_TEMPLATE = """**Dataset**: {file_path}
|
| 127 |
+
- Rows: {num_rows:,} | Columns: {num_columns}
|
| 128 |
+
- Numeric: {numeric_columns}
|
| 129 |
+
- Categorical: {categorical_columns}
|
| 130 |
+
{target_info}
|
| 131 |
+
{profile_summary}
|
| 132 |
+
|
| 133 |
+
Generate hypotheses as JSON:
|
| 134 |
+
[
|
| 135 |
+
{{"text": "hypothesis description", "priority": 0.9, "suggested_tool": "tool_name"}},
|
| 136 |
+
...
|
| 137 |
+
]"""
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
class Reasoner:
|
| 141 |
+
"""
|
| 142 |
+
The REASON step of the Reasoning Loop.
|
| 143 |
+
|
| 144 |
+
Makes a strategic decision about what to investigate next,
|
| 145 |
+
based on the user's question and accumulated findings.
|
| 146 |
+
|
| 147 |
+
Usage:
|
| 148 |
+
reasoner = Reasoner(llm_caller=orchestrator._llm_text_call)
|
| 149 |
+
output = reasoner.reason(
|
| 150 |
+
question="Why are customers churning?",
|
| 151 |
+
dataset_info=schema_info,
|
| 152 |
+
findings=findings_accumulator,
|
| 153 |
+
available_tools=tools_description,
|
| 154 |
+
file_path="data.csv"
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
if output.status == "investigating":
|
| 158 |
+
result = execute_tool(output.tool_name, output.arguments)
|
| 159 |
+
else:
|
| 160 |
+
# Done investigating, synthesize answer
|
| 161 |
+
...
|
| 162 |
+
"""
|
| 163 |
+
|
| 164 |
+
def __init__(self, llm_caller: Callable):
|
| 165 |
+
"""
|
| 166 |
+
Args:
|
| 167 |
+
llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
|
| 168 |
+
Wraps the orchestrator's provider-specific LLM call.
|
| 169 |
+
"""
|
| 170 |
+
self.llm_caller = llm_caller
|
| 171 |
+
|
| 172 |
+
def reason(
|
| 173 |
+
self,
|
| 174 |
+
question: str,
|
| 175 |
+
dataset_info: Dict[str, Any],
|
| 176 |
+
findings: FindingsAccumulator,
|
| 177 |
+
available_tools: str,
|
| 178 |
+
file_path: str,
|
| 179 |
+
target_col: Optional[str] = None
|
| 180 |
+
) -> ReasoningOutput:
|
| 181 |
+
"""
|
| 182 |
+
Decide the next investigation step.
|
| 183 |
+
|
| 184 |
+
Args:
|
| 185 |
+
question: User's original question
|
| 186 |
+
dataset_info: Dataset schema (columns, types, stats)
|
| 187 |
+
findings: Accumulated findings from previous iterations
|
| 188 |
+
available_tools: Text description of available tools
|
| 189 |
+
file_path: Current file path (latest output or original)
|
| 190 |
+
target_col: Optional target column
|
| 191 |
+
|
| 192 |
+
Returns:
|
| 193 |
+
ReasoningOutput with the next action to take
|
| 194 |
+
"""
|
| 195 |
+
# Build the user prompt
|
| 196 |
+
numeric_cols = dataset_info.get("numeric_columns", [])
|
| 197 |
+
categorical_cols = dataset_info.get("categorical_columns", [])
|
| 198 |
+
|
| 199 |
+
target_info = ""
|
| 200 |
+
if target_col:
|
| 201 |
+
target_info = f"- Target column: '{target_col}'"
|
| 202 |
+
|
| 203 |
+
user_prompt = REASONER_USER_TEMPLATE.format(
|
| 204 |
+
question=question,
|
| 205 |
+
file_path=file_path,
|
| 206 |
+
num_rows=dataset_info.get("num_rows", 0),
|
| 207 |
+
num_columns=dataset_info.get("num_columns", 0),
|
| 208 |
+
numeric_columns=", ".join([f"'{c}'" for c in numeric_cols[:15]]),
|
| 209 |
+
categorical_columns=", ".join([f"'{c}'" for c in categorical_cols[:15]]),
|
| 210 |
+
target_info=target_info,
|
| 211 |
+
findings_context=findings.get_context_for_reasoning(),
|
| 212 |
+
tools_description=available_tools
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# Call LLM
|
| 216 |
+
response_text = self.llm_caller(
|
| 217 |
+
system_prompt=REASONER_SYSTEM_PROMPT,
|
| 218 |
+
user_prompt=user_prompt,
|
| 219 |
+
max_tokens=1024
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
# Parse response
|
| 223 |
+
return self._parse_response(response_text, file_path)
|
| 224 |
+
|
| 225 |
+
def generate_hypotheses(
|
| 226 |
+
self,
|
| 227 |
+
dataset_info: Dict[str, Any],
|
| 228 |
+
file_path: str,
|
| 229 |
+
target_col: Optional[str] = None,
|
| 230 |
+
profile_summary: str = ""
|
| 231 |
+
) -> List[Dict[str, Any]]:
|
| 232 |
+
"""
|
| 233 |
+
Generate hypotheses for exploratory analysis.
|
| 234 |
+
|
| 235 |
+
Called at the start of Exploratory mode to seed the
|
| 236 |
+
reasoning loop with interesting questions to investigate.
|
| 237 |
+
|
| 238 |
+
Args:
|
| 239 |
+
dataset_info: Dataset schema
|
| 240 |
+
file_path: Path to dataset
|
| 241 |
+
target_col: Optional target column
|
| 242 |
+
profile_summary: Optional profiling results summary
|
| 243 |
+
|
| 244 |
+
Returns:
|
| 245 |
+
List of hypothesis dicts with text, priority, suggested_tool
|
| 246 |
+
"""
|
| 247 |
+
numeric_cols = dataset_info.get("numeric_columns", [])
|
| 248 |
+
categorical_cols = dataset_info.get("categorical_columns", [])
|
| 249 |
+
|
| 250 |
+
target_info = ""
|
| 251 |
+
if target_col:
|
| 252 |
+
target_info = f"- Target column: '{target_col}'"
|
| 253 |
+
|
| 254 |
+
user_prompt = HYPOTHESIS_USER_TEMPLATE.format(
|
| 255 |
+
file_path=file_path,
|
| 256 |
+
num_rows=dataset_info.get("num_rows", 0),
|
| 257 |
+
num_columns=dataset_info.get("num_columns", 0),
|
| 258 |
+
numeric_columns=", ".join([f"'{c}'" for c in numeric_cols[:15]]),
|
| 259 |
+
categorical_columns=", ".join([f"'{c}'" for c in categorical_cols[:15]]),
|
| 260 |
+
target_info=target_info,
|
| 261 |
+
profile_summary=profile_summary or "No profile available yet."
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
response_text = self.llm_caller(
|
| 265 |
+
system_prompt=HYPOTHESIS_SYSTEM_PROMPT,
|
| 266 |
+
user_prompt=user_prompt,
|
| 267 |
+
max_tokens=1024
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
return self._parse_hypotheses(response_text)
|
| 271 |
+
|
| 272 |
+
def _parse_response(self, response_text: str, file_path: str) -> ReasoningOutput:
|
| 273 |
+
"""Parse LLM response into ReasoningOutput."""
|
| 274 |
+
try:
|
| 275 |
+
# Try direct JSON parse
|
| 276 |
+
data = json.loads(response_text.strip())
|
| 277 |
+
except json.JSONDecodeError:
|
| 278 |
+
# Try to extract JSON from markdown/text
|
| 279 |
+
json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
|
| 280 |
+
if json_match:
|
| 281 |
+
try:
|
| 282 |
+
data = json.loads(json_match.group(0))
|
| 283 |
+
except json.JSONDecodeError:
|
| 284 |
+
# Fallback: return a profiling action
|
| 285 |
+
return ReasoningOutput(
|
| 286 |
+
status="investigating",
|
| 287 |
+
reasoning="Could not parse LLM response, defaulting to profiling",
|
| 288 |
+
tool_name="profile_dataset",
|
| 289 |
+
arguments={"file_path": file_path},
|
| 290 |
+
hypothesis="Understanding the data structure first"
|
| 291 |
+
)
|
| 292 |
+
else:
|
| 293 |
+
return ReasoningOutput(
|
| 294 |
+
status="investigating",
|
| 295 |
+
reasoning="Could not parse LLM response, defaulting to profiling",
|
| 296 |
+
tool_name="profile_dataset",
|
| 297 |
+
arguments={"file_path": file_path},
|
| 298 |
+
hypothesis="Understanding the data structure first"
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
status = data.get("status", "investigating")
|
| 302 |
+
tool_name = data.get("tool_name")
|
| 303 |
+
arguments = data.get("arguments", {})
|
| 304 |
+
|
| 305 |
+
# Ensure file_path is in arguments if tool needs it
|
| 306 |
+
if tool_name and "file_path" not in arguments and tool_name not in [
|
| 307 |
+
"execute_python_code", "get_smart_summary"
|
| 308 |
+
]:
|
| 309 |
+
arguments["file_path"] = file_path
|
| 310 |
+
|
| 311 |
+
return ReasoningOutput(
|
| 312 |
+
status=status,
|
| 313 |
+
reasoning=data.get("reasoning", ""),
|
| 314 |
+
tool_name=tool_name if status == "investigating" else None,
|
| 315 |
+
arguments=arguments,
|
| 316 |
+
hypothesis=data.get("hypothesis", ""),
|
| 317 |
+
confidence=data.get("confidence", 0.5)
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
def _parse_hypotheses(self, response_text: str) -> List[Dict[str, Any]]:
|
| 321 |
+
"""Parse hypothesis generation response."""
|
| 322 |
+
try:
|
| 323 |
+
data = json.loads(response_text.strip())
|
| 324 |
+
if isinstance(data, list):
|
| 325 |
+
return data
|
| 326 |
+
except json.JSONDecodeError:
|
| 327 |
+
pass
|
| 328 |
+
|
| 329 |
+
# Try to extract JSON array
|
| 330 |
+
array_match = re.search(r'\[.*\]', response_text, re.DOTALL)
|
| 331 |
+
if array_match:
|
| 332 |
+
try:
|
| 333 |
+
data = json.loads(array_match.group(0))
|
| 334 |
+
if isinstance(data, list):
|
| 335 |
+
return data
|
| 336 |
+
except json.JSONDecodeError:
|
| 337 |
+
pass
|
| 338 |
+
|
| 339 |
+
# Fallback: generate basic hypotheses
|
| 340 |
+
return [
|
| 341 |
+
{"text": "What are the key statistical properties of this dataset?", "priority": 0.9, "suggested_tool": "profile_dataset"},
|
| 342 |
+
{"text": "Are there any significant correlations between variables?", "priority": 0.8, "suggested_tool": "analyze_correlations"},
|
| 343 |
+
{"text": "What does the distribution of key variables look like?", "priority": 0.7, "suggested_tool": "generate_eda_plots"}
|
| 344 |
+
]
|
src/reasoning/synthesizer.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Synthesizer Module - The SYNTHESIZE step of the Reasoning Loop.
|
| 3 |
+
|
| 4 |
+
Takes all accumulated findings and produces a coherent, narrative answer.
|
| 5 |
+
|
| 6 |
+
Unlike the old approach (where the LLM's last response WAS the summary),
|
| 7 |
+
the Synthesizer deliberately constructs the answer from evidence:
|
| 8 |
+
- Connects findings into a coherent story
|
| 9 |
+
- Cites evidence for each claim
|
| 10 |
+
- Highlights confidence levels
|
| 11 |
+
- Notes what wasn't investigated (limitations)
|
| 12 |
+
- Produces actionable insights, not just numbers
|
| 13 |
+
|
| 14 |
+
Architecture:
|
| 15 |
+
FindingsAccumulator → Synthesizer.synthesize() → Markdown narrative
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import json
|
| 19 |
+
from typing import Dict, Any, List, Optional, Callable
|
| 20 |
+
|
| 21 |
+
from .findings import FindingsAccumulator
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
SYNTHESIS_SYSTEM_PROMPT = """You are a senior data scientist writing a concise analysis report.
|
| 25 |
+
|
| 26 |
+
Given the investigation findings, synthesize a clear, evidence-based answer to the user's question.
|
| 27 |
+
|
| 28 |
+
STRUCTURE (use markdown):
|
| 29 |
+
1. **Executive Summary** (2-3 sentences answering the question directly)
|
| 30 |
+
2. **Key Findings** (bullet points with evidence references)
|
| 31 |
+
3. **Supporting Evidence** (specific metrics, correlations, patterns)
|
| 32 |
+
4. **Visualizations** (mention any plots/charts generated, with file paths)
|
| 33 |
+
5. **Limitations & Caveats** (what we didn't investigate, caveats)
|
| 34 |
+
6. **Recommendations** (actionable next steps)
|
| 35 |
+
|
| 36 |
+
RULES:
|
| 37 |
+
- Lead with the answer, then show evidence
|
| 38 |
+
- Use specific numbers (not "high correlation" but "r=0.72")
|
| 39 |
+
- Mention generated files/plots so user can find them
|
| 40 |
+
- Be honest about confidence levels
|
| 41 |
+
- Keep it under 500 words unless complex analysis warrants more
|
| 42 |
+
- Use markdown formatting (headers, bullets, bold for emphasis)"""
|
| 43 |
+
|
| 44 |
+
SYNTHESIS_USER_TEMPLATE = """**Original question**: {question}
|
| 45 |
+
|
| 46 |
+
**Investigation summary**:
|
| 47 |
+
{findings_context}
|
| 48 |
+
|
| 49 |
+
**Generated artifacts**:
|
| 50 |
+
{artifacts_summary}
|
| 51 |
+
|
| 52 |
+
Write the analysis report now. Focus on answering the question with evidence from the investigation."""
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class Synthesizer:
|
| 56 |
+
"""
|
| 57 |
+
The SYNTHESIZE step of the Reasoning Loop.
|
| 58 |
+
|
| 59 |
+
Produces the final answer from accumulated evidence.
|
| 60 |
+
|
| 61 |
+
Usage:
|
| 62 |
+
synthesizer = Synthesizer(llm_caller=orchestrator._llm_text_call)
|
| 63 |
+
report = synthesizer.synthesize(
|
| 64 |
+
findings=findings_accumulator,
|
| 65 |
+
artifacts={"plots": [...], "files": [...]}
|
| 66 |
+
)
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
def __init__(self, llm_caller: Callable):
|
| 70 |
+
"""
|
| 71 |
+
Args:
|
| 72 |
+
llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
|
| 73 |
+
"""
|
| 74 |
+
self.llm_caller = llm_caller
|
| 75 |
+
|
| 76 |
+
def synthesize(
|
| 77 |
+
self,
|
| 78 |
+
findings: FindingsAccumulator,
|
| 79 |
+
artifacts: Optional[Dict[str, Any]] = None,
|
| 80 |
+
max_tokens: int = 3000
|
| 81 |
+
) -> str:
|
| 82 |
+
"""
|
| 83 |
+
Synthesize all findings into a coherent answer.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
findings: Accumulated findings from the reasoning loop
|
| 87 |
+
artifacts: Optional dict of generated artifacts (plots, files, models)
|
| 88 |
+
max_tokens: Max tokens for synthesis response
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
Markdown-formatted analysis report
|
| 92 |
+
"""
|
| 93 |
+
# Build artifacts summary
|
| 94 |
+
artifacts_summary = self._format_artifacts(artifacts or {}, findings)
|
| 95 |
+
|
| 96 |
+
user_prompt = SYNTHESIS_USER_TEMPLATE.format(
|
| 97 |
+
question=findings.question,
|
| 98 |
+
findings_context=findings.get_context_for_synthesis(),
|
| 99 |
+
artifacts_summary=artifacts_summary
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
response = self.llm_caller(
|
| 103 |
+
system_prompt=SYNTHESIS_SYSTEM_PROMPT,
|
| 104 |
+
user_prompt=user_prompt,
|
| 105 |
+
max_tokens=max_tokens
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
return response.strip()
|
| 109 |
+
|
| 110 |
+
def synthesize_exploratory(
|
| 111 |
+
self,
|
| 112 |
+
findings: FindingsAccumulator,
|
| 113 |
+
artifacts: Optional[Dict[str, Any]] = None,
|
| 114 |
+
max_tokens: int = 3000
|
| 115 |
+
) -> str:
|
| 116 |
+
"""
|
| 117 |
+
Synthesize findings from exploratory analysis (no specific question).
|
| 118 |
+
|
| 119 |
+
Uses a different prompt that focuses on discovering patterns
|
| 120 |
+
rather than answering a specific question.
|
| 121 |
+
"""
|
| 122 |
+
exploratory_system = """You are a senior data scientist presenting exploratory analysis results.
|
| 123 |
+
|
| 124 |
+
The user asked for a general analysis. Present the most interesting discoveries.
|
| 125 |
+
|
| 126 |
+
STRUCTURE (use markdown):
|
| 127 |
+
1. **Dataset Overview** (size, structure, key characteristics)
|
| 128 |
+
2. **Most Interesting Discoveries** (ranked by insight value)
|
| 129 |
+
3. **Key Patterns & Relationships** (correlations, distributions, trends)
|
| 130 |
+
4. **Data Quality Notes** (missing data, outliers, issues found)
|
| 131 |
+
5. **Visualizations Generated** (list with descriptions)
|
| 132 |
+
6. **Recommended Next Steps** (what to investigate deeper)
|
| 133 |
+
|
| 134 |
+
RULES:
|
| 135 |
+
- Lead with the most surprising/important finding
|
| 136 |
+
- Use specific numbers and metrics
|
| 137 |
+
- Mention all generated visualizations with file paths
|
| 138 |
+
- Suggest actionable next analysis steps
|
| 139 |
+
- Keep it engaging but data-driven"""
|
| 140 |
+
|
| 141 |
+
artifacts_summary = self._format_artifacts(artifacts or {}, findings)
|
| 142 |
+
|
| 143 |
+
user_prompt = f"""**Analysis request**: {findings.question}
|
| 144 |
+
|
| 145 |
+
**Investigation summary**:
|
| 146 |
+
{findings.get_context_for_synthesis()}
|
| 147 |
+
|
| 148 |
+
**Generated artifacts**:
|
| 149 |
+
{artifacts_summary}
|
| 150 |
+
|
| 151 |
+
Write the exploratory analysis report."""
|
| 152 |
+
|
| 153 |
+
response = self.llm_caller(
|
| 154 |
+
system_prompt=exploratory_system,
|
| 155 |
+
user_prompt=user_prompt,
|
| 156 |
+
max_tokens=max_tokens
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
return response.strip()
|
| 160 |
+
|
| 161 |
+
def _format_artifacts(self, artifacts: Dict[str, Any], findings: FindingsAccumulator) -> str:
|
| 162 |
+
"""Format artifacts for the synthesis prompt."""
|
| 163 |
+
parts = []
|
| 164 |
+
|
| 165 |
+
# Extract plots from findings
|
| 166 |
+
plots = artifacts.get("plots", [])
|
| 167 |
+
if plots:
|
| 168 |
+
parts.append("**Plots generated**:")
|
| 169 |
+
for plot in plots:
|
| 170 |
+
if isinstance(plot, dict):
|
| 171 |
+
parts.append(f" - {plot.get('title', 'Plot')}: {plot.get('url', plot.get('path', 'N/A'))}")
|
| 172 |
+
else:
|
| 173 |
+
parts.append(f" - {plot}")
|
| 174 |
+
|
| 175 |
+
# Extract files from findings
|
| 176 |
+
files = artifacts.get("files", [])
|
| 177 |
+
if files:
|
| 178 |
+
parts.append("**Output files**:")
|
| 179 |
+
for f in files:
|
| 180 |
+
parts.append(f" - {f}")
|
| 181 |
+
|
| 182 |
+
# Extract from findings history
|
| 183 |
+
for finding in findings.findings:
|
| 184 |
+
result = finding.result_summary
|
| 185 |
+
if "output_file" in result or "output_path" in result or ".html" in result or ".png" in result:
|
| 186 |
+
parts.append(f" - Step {finding.iteration} ({finding.action}): output in result")
|
| 187 |
+
|
| 188 |
+
# Tools used summary
|
| 189 |
+
if findings.tools_used:
|
| 190 |
+
parts.append(f"\n**Tools used**: {', '.join(findings.tools_used)}")
|
| 191 |
+
|
| 192 |
+
if not parts:
|
| 193 |
+
return "No artifacts generated yet."
|
| 194 |
+
|
| 195 |
+
return "\n".join(parts)
|
src/routing/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Routing Module - Intent Classification and Request Routing.
|
| 3 |
+
|
| 4 |
+
Determines how the orchestrator should handle a user request:
|
| 5 |
+
- Direct: SBERT routing → tool execution (existing pipeline)
|
| 6 |
+
- Investigative: Reasoning loop with hypothesis testing
|
| 7 |
+
- Exploratory: Auto-hypothesis generation → reasoning loop
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from .intent_classifier import IntentClassifier, IntentResult
|
| 11 |
+
|
| 12 |
+
__all__ = ["IntentClassifier", "IntentResult"]
|
src/routing/intent_classifier.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Intent Classifier - Determines execution mode for the Reasoning Loop.
|
| 3 |
+
|
| 4 |
+
Three execution modes:
|
| 5 |
+
1. DIRECT: "Make a scatter plot" → SBERT routing → tool → done
|
| 6 |
+
- Clear, specific command with obvious tool mapping
|
| 7 |
+
- No reasoning loop needed
|
| 8 |
+
|
| 9 |
+
2. INVESTIGATIVE: "Why are customers churning?" → reasoning loop
|
| 10 |
+
- Analytical question requiring hypothesis testing
|
| 11 |
+
- Reasoning loop drives tool selection
|
| 12 |
+
|
| 13 |
+
3. EXPLORATORY: "Analyze this data" → auto-hypothesis → reasoning loop
|
| 14 |
+
- Open-ended request with no specific question
|
| 15 |
+
- First profiles data, generates hypotheses, then investigates
|
| 16 |
+
|
| 17 |
+
The classifier uses keyword patterns + semantic features to decide.
|
| 18 |
+
This is a lightweight classification (no LLM call needed).
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import re
|
| 22 |
+
from typing import Optional, Dict, Any, Tuple
|
| 23 |
+
from dataclasses import dataclass
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class IntentResult:
|
| 28 |
+
"""Result of intent classification."""
|
| 29 |
+
mode: str # "direct", "investigative", "exploratory"
|
| 30 |
+
confidence: float # 0.0-1.0
|
| 31 |
+
reasoning: str # Why this mode was chosen
|
| 32 |
+
sub_intent: Optional[str] # More specific intent (e.g., "visualization", "cleaning")
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# Patterns that indicate DIRECT mode (specific tool commands)
|
| 36 |
+
DIRECT_PATTERNS = [
|
| 37 |
+
# Visualization commands
|
| 38 |
+
(r"\b(make|create|generate|build|show|draw|plot)\b.*(scatter|histogram|heatmap|box\s*plot|bar\s*chart|pie\s*chart|line\s*chart|dashboard|time\s*series)", "visualization"),
|
| 39 |
+
(r"\b(scatter|histogram|heatmap|boxplot|bar\s*chart)\b.*\b(of|for|between|showing)\b", "visualization"),
|
| 40 |
+
|
| 41 |
+
# Data cleaning commands
|
| 42 |
+
(r"\b(clean|remove|drop|fill|impute|handle)\b.*(missing|null|nan|outlier|duplicate)", "cleaning"),
|
| 43 |
+
(r"\b(fix|convert|change)\b.*(data\s*type|dtype|column\s*type)", "cleaning"),
|
| 44 |
+
|
| 45 |
+
# Feature engineering commands
|
| 46 |
+
(r"\b(create|add|extract|generate)\b.*(feature|time\s*feature|interaction|encoding)", "feature_engineering"),
|
| 47 |
+
(r"\b(encode|one-hot|label\s*encode|ordinal)\b.*\b(categorical|column)", "feature_engineering"),
|
| 48 |
+
|
| 49 |
+
# Model training commands
|
| 50 |
+
(r"\b(train|build|fit|run)\b.*(model|classifier|regressor|baseline|xgboost|random\s*forest)", "training"),
|
| 51 |
+
(r"\b(tune|optimize)\b.*\b(hyperparameter|model|parameter)", "training"),
|
| 52 |
+
(r"\b(cross[\s-]?valid)", "training"),
|
| 53 |
+
|
| 54 |
+
# Profiling commands
|
| 55 |
+
(r"\b(profile|describe|summarize)\b.*\b(dataset|data|table|file)", "profiling"),
|
| 56 |
+
(r"\b(data\s*quality|quality\s*check|check\s*quality)", "profiling"),
|
| 57 |
+
|
| 58 |
+
# Report generation
|
| 59 |
+
(r"\b(generate|create|build)\b.*\b(report|eda\s*report|profiling\s*report)", "reporting"),
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
# Patterns that indicate INVESTIGATIVE mode (analytical questions)
|
| 63 |
+
INVESTIGATIVE_PATTERNS = [
|
| 64 |
+
# Causal / explanatory questions
|
| 65 |
+
(r"\bwhy\b.*(are|is|do|does|did)\b", "causal"),
|
| 66 |
+
(r"\bwhat\b.*(cause|driv|factor|reason|explain|lead)", "causal"),
|
| 67 |
+
(r"\bwhat\b.*(affect|impact|influence|determine)", "causal"),
|
| 68 |
+
|
| 69 |
+
# Relationship / correlation questions
|
| 70 |
+
(r"\bhow\b.*(does|do|is|are)\b.*\b(relate|correlat|affect|impact|change|vary)", "relationship"),
|
| 71 |
+
(r"\b(relationship|correlation|association)\b.*\bbetween\b", "relationship"),
|
| 72 |
+
|
| 73 |
+
# Comparison questions
|
| 74 |
+
(r"\b(differ|compar|contrast)\b.*\bbetween\b", "comparison"),
|
| 75 |
+
(r"\bwhich\b.*(better|worse|higher|lower|more|less|best|worst)", "comparison"),
|
| 76 |
+
|
| 77 |
+
# Pattern / trend questions
|
| 78 |
+
(r"\b(pattern|trend|anomal|outlier|unusual|interesting)\b", "pattern"),
|
| 79 |
+
(r"\bis\s+there\b.*(pattern|trend|relationship|correlation|difference)", "pattern"),
|
| 80 |
+
|
| 81 |
+
# Prediction-oriented questions (but NOT direct "train a model" commands)
|
| 82 |
+
(r"\bcan\s+(we|i|you)\b.*(predict|forecast|estimate|determine)", "predictive"),
|
| 83 |
+
(r"\bwhat\b.*(predict|forecast|expect|happen)", "predictive"),
|
| 84 |
+
|
| 85 |
+
# Segmentation / grouping questions
|
| 86 |
+
(r"\b(segment|group|cluster|categori)\b", "segmentation"),
|
| 87 |
+
(r"\bwhat\b.*(type|kind|group|segment)\b.*\b(customer|user|product)", "segmentation"),
|
| 88 |
+
]
|
| 89 |
+
|
| 90 |
+
# Patterns that indicate EXPLORATORY mode (open-ended requests)
|
| 91 |
+
EXPLORATORY_PATTERNS = [
|
| 92 |
+
(r"^analyze\b.*\b(this|the|my)\b.*\b(data|dataset|file|csv)", "general_analysis"),
|
| 93 |
+
(r"^(tell|show)\b.*\b(me|us)\b.*\b(about|everything|what)", "general_analysis"),
|
| 94 |
+
(r"^(explore|investigate|examine|look\s*(at|into))\b.*\b(this|the|my)\b", "general_analysis"),
|
| 95 |
+
(r"^what\b.*\b(can|do)\b.*\b(you|we)\b.*\b(find|learn|discover|see)", "general_analysis"),
|
| 96 |
+
(r"^(give|provide)\b.*\b(overview|summary|insight|analysis)", "general_analysis"),
|
| 97 |
+
(r"^(run|do|perform)\b.*\b(full|complete|comprehensive|end.to.end)\b.*\b(analysis|pipeline|workflow)", "full_pipeline"),
|
| 98 |
+
(r"^(find|discover|uncover)\b.*\b(insight|pattern|trend|interesting)", "general_analysis"),
|
| 99 |
+
]
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
class IntentClassifier:
|
| 103 |
+
"""
|
| 104 |
+
Classifies user intent into one of three execution modes.
|
| 105 |
+
|
| 106 |
+
Uses pattern matching (no LLM call needed) for fast classification.
|
| 107 |
+
Falls back to heuristics when patterns don't match.
|
| 108 |
+
|
| 109 |
+
Usage:
|
| 110 |
+
classifier = IntentClassifier()
|
| 111 |
+
result = classifier.classify("Why are customers churning?")
|
| 112 |
+
# IntentResult(mode="investigative", confidence=0.9, ...)
|
| 113 |
+
|
| 114 |
+
result = classifier.classify("Make a scatter plot of age vs income")
|
| 115 |
+
# IntentResult(mode="direct", confidence=0.95, ...)
|
| 116 |
+
|
| 117 |
+
result = classifier.classify("Analyze this dataset")
|
| 118 |
+
# IntentResult(mode="exploratory", confidence=0.85, ...)
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
def classify(
|
| 122 |
+
self,
|
| 123 |
+
query: str,
|
| 124 |
+
dataset_info: Optional[Dict[str, Any]] = None,
|
| 125 |
+
has_target_col: bool = False
|
| 126 |
+
) -> IntentResult:
|
| 127 |
+
"""
|
| 128 |
+
Classify user intent into execution mode.
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
query: User's natural language query
|
| 132 |
+
dataset_info: Optional dataset schema info
|
| 133 |
+
has_target_col: Whether user provided a target column
|
| 134 |
+
|
| 135 |
+
Returns:
|
| 136 |
+
IntentResult with mode, confidence, and reasoning
|
| 137 |
+
"""
|
| 138 |
+
query_lower = query.lower().strip()
|
| 139 |
+
|
| 140 |
+
# Phase 1: Check for DIRECT patterns (strongest evidence)
|
| 141 |
+
direct_match = self._match_patterns(query_lower, DIRECT_PATTERNS)
|
| 142 |
+
if direct_match:
|
| 143 |
+
pattern, sub_intent = direct_match
|
| 144 |
+
return IntentResult(
|
| 145 |
+
mode="direct",
|
| 146 |
+
confidence=0.90,
|
| 147 |
+
reasoning=f"Direct command detected: {sub_intent} (pattern: {pattern[:50]})",
|
| 148 |
+
sub_intent=sub_intent
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
# Phase 2: Check for INVESTIGATIVE patterns
|
| 152 |
+
invest_match = self._match_patterns(query_lower, INVESTIGATIVE_PATTERNS)
|
| 153 |
+
if invest_match:
|
| 154 |
+
pattern, sub_intent = invest_match
|
| 155 |
+
return IntentResult(
|
| 156 |
+
mode="investigative",
|
| 157 |
+
confidence=0.85,
|
| 158 |
+
reasoning=f"Analytical question detected: {sub_intent}",
|
| 159 |
+
sub_intent=sub_intent
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
# Phase 3: Check for EXPLORATORY patterns
|
| 163 |
+
explore_match = self._match_patterns(query_lower, EXPLORATORY_PATTERNS)
|
| 164 |
+
if explore_match:
|
| 165 |
+
pattern, sub_intent = explore_match
|
| 166 |
+
|
| 167 |
+
# Special case: "full pipeline" with target col → direct ML pipeline
|
| 168 |
+
if sub_intent == "full_pipeline" and has_target_col:
|
| 169 |
+
return IntentResult(
|
| 170 |
+
mode="direct",
|
| 171 |
+
confidence=0.85,
|
| 172 |
+
reasoning="Full ML pipeline requested with target column",
|
| 173 |
+
sub_intent="full_ml_pipeline"
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
return IntentResult(
|
| 177 |
+
mode="exploratory",
|
| 178 |
+
confidence=0.80,
|
| 179 |
+
reasoning=f"Open-ended analysis request: {sub_intent}",
|
| 180 |
+
sub_intent=sub_intent
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
# Phase 4: Heuristic fallback
|
| 184 |
+
return self._heuristic_classify(query_lower, has_target_col)
|
| 185 |
+
|
| 186 |
+
def _match_patterns(self, query: str, patterns: list) -> Optional[Tuple[str, str]]:
|
| 187 |
+
"""Try to match query against a list of (pattern, sub_intent) tuples."""
|
| 188 |
+
for pattern, sub_intent in patterns:
|
| 189 |
+
if re.search(pattern, query, re.IGNORECASE):
|
| 190 |
+
return (pattern, sub_intent)
|
| 191 |
+
return None
|
| 192 |
+
|
| 193 |
+
def _heuristic_classify(self, query: str, has_target_col: bool) -> IntentResult:
|
| 194 |
+
"""Fallback classification using simple heuristics."""
|
| 195 |
+
|
| 196 |
+
# Question words → investigative
|
| 197 |
+
if query.startswith(("why", "how", "what", "which", "is there", "are there", "does", "do")):
|
| 198 |
+
return IntentResult(
|
| 199 |
+
mode="investigative",
|
| 200 |
+
confidence=0.60,
|
| 201 |
+
reasoning="Query starts with question word, likely analytical",
|
| 202 |
+
sub_intent="general_question"
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Very short queries → likely direct commands
|
| 206 |
+
word_count = len(query.split())
|
| 207 |
+
if word_count <= 5:
|
| 208 |
+
return IntentResult(
|
| 209 |
+
mode="direct",
|
| 210 |
+
confidence=0.55,
|
| 211 |
+
reasoning="Short query, likely a direct command",
|
| 212 |
+
sub_intent="short_command"
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# Has target column + action verbs → direct ML pipeline
|
| 216 |
+
if has_target_col and any(w in query for w in ["predict", "train", "model", "classify", "regression"]):
|
| 217 |
+
return IntentResult(
|
| 218 |
+
mode="direct",
|
| 219 |
+
confidence=0.75,
|
| 220 |
+
reasoning="Target column provided with ML action verb",
|
| 221 |
+
sub_intent="ml_pipeline"
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
# Default: exploratory (safest default for data science)
|
| 225 |
+
return IntentResult(
|
| 226 |
+
mode="exploratory",
|
| 227 |
+
confidence=0.40,
|
| 228 |
+
reasoning="No strong pattern match, defaulting to exploratory analysis",
|
| 229 |
+
sub_intent="default"
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
@staticmethod
|
| 233 |
+
def is_follow_up(query: str) -> bool:
|
| 234 |
+
"""
|
| 235 |
+
Detect if this is a follow-up question (uses context from previous analysis).
|
| 236 |
+
|
| 237 |
+
Follow-ups should generally be INVESTIGATIVE (they're asking about
|
| 238 |
+
something specific in the context of previous results).
|
| 239 |
+
"""
|
| 240 |
+
follow_up_patterns = [
|
| 241 |
+
r"^(now|next|also|and|then)\b",
|
| 242 |
+
r"\b(the same|that|this|those|these)\b.*\b(data|model|result|plot|chart)",
|
| 243 |
+
r"\b(more|another|different)\b.*\b(plot|chart|analysis|model)",
|
| 244 |
+
r"\b(what about|how about|can you also)\b",
|
| 245 |
+
r"\b(using|with)\b.*\b(the same|that|this)\b",
|
| 246 |
+
]
|
| 247 |
+
|
| 248 |
+
query_lower = query.lower().strip()
|
| 249 |
+
return any(re.search(p, query_lower) for p in follow_up_patterns)
|