Pulastya B commited on
Commit
6b731f7
·
1 Parent(s): fea1598

Made Major Changes to the Overall User Experience by adding a pipeline visualizer

Browse files
FRRONTEEEND/components/ChatInterface.tsx CHANGED
@@ -9,6 +9,7 @@ import remarkGfm from 'remark-gfm';
9
  import { useAuth } from '../lib/AuthContext';
10
  import { trackQuery, incrementSessionQueries, getHuggingFaceStatus } from '../lib/supabase';
11
  import { SettingsModal } from './SettingsModal';
 
12
 
13
  // HuggingFace logo SVG component for the export button
14
  const HuggingFaceLogo = ({ className = "w-4 h-4" }: { className?: string }) => (
@@ -214,6 +215,12 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
214
  const processedAnalysisRef = useRef<Set<string>>(new Set()); // Track processed analysis_complete events
215
  const [sseReconnectTrigger, setSseReconnectTrigger] = useState(0); // Force SSE reconnection for follow-up queries
216
 
 
 
 
 
 
 
217
  // Auth context for user tracking
218
  const { user, isAuthenticated, dbSessionId, signOut } = useAuth();
219
 
@@ -349,17 +356,115 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
349
  console.log(`🤖 Agent assigned: ${data.agent}`);
350
  } else if (data.type === 'tool_executing') {
351
  setCurrentStep(data.message || `🔧 Executing: ${data.tool}`);
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  } else if (data.type === 'tool_completed') {
353
  setCurrentStep(data.message || `✓ Completed: ${data.tool}`);
 
 
 
 
 
 
354
  } else if (data.type === 'tool_failed') {
355
  setCurrentStep(data.message || `❌ Failed: ${data.tool}`);
 
 
 
 
 
 
356
  } else if (data.type === 'token_update') {
357
  // Optional: Display token budget updates
358
  console.log('💰 Token update:', data.message);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  } else if (data.type === 'analysis_failed') {
360
  console.log('❌ Analysis failed', data);
361
  setIsTyping(false);
362
 
 
 
 
 
 
363
  // Show error message to user - add to sessions
364
  setSessions(prev => prev.map(s => {
365
  if (s.id === activeSessionId) {
@@ -382,6 +487,11 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
382
  console.log('✅ Analysis completed', data.result);
383
  setIsTyping(false);
384
 
 
 
 
 
 
385
  // Create a unique key based on actual workflow content to prevent duplicates
386
  // Use the last tool executed + summary hash for uniqueness
387
  const lastTool = data.result?.workflow_history?.[data.result.workflow_history.length - 1]?.tool || 'unknown';
@@ -539,6 +649,12 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
539
 
540
  // Show loading indicator immediately (for UI feedback)
541
  setIsTyping(true);
 
 
 
 
 
 
542
 
543
  try {
544
 
@@ -1197,23 +1313,13 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
1197
  ))
1198
  )}
1199
  {isTyping && (
1200
- <div className="flex gap-4">
1201
- <div className="w-8 h-8 rounded-lg flex items-center justify-center shrink-0 bg-white/5 border border-white/10">
1202
- <Bot className="w-4 h-4 text-indigo-400" />
1203
- </div>
1204
- <div className="bg-white/[0.03] p-4 rounded-2xl border border-white/5">
1205
- <div className="flex items-center gap-3">
1206
- <div className="flex gap-1">
1207
- <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.3s]"></span>
1208
- <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.15s]"></span>
1209
- <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce"></span>
1210
- </div>
1211
- <span className="text-sm text-white/60">
1212
- {currentStep || '🔧 Starting analysis...'}
1213
- </span>
1214
- </div>
1215
- </div>
1216
- </div>
1217
  )}
1218
  </div>
1219
 
@@ -1394,7 +1500,7 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
1394
  const allPlots: Array<{title: string, url: string, type?: string}> = [];
1395
  const allReports: Array<{name: string, path: string}> = [];
1396
  const allDataFiles: string[] = [];
1397
- const baselineModels = ['xgboost', 'random_forest', 'catboost', 'lightgbm', 'ridge', 'lasso'];
1398
  const foundModels = new Set<string>();
1399
 
1400
  activeSession.messages.forEach(msg => {
 
9
  import { useAuth } from '../lib/AuthContext';
10
  import { trackQuery, incrementSessionQueries, getHuggingFaceStatus } from '../lib/supabase';
11
  import { SettingsModal } from './SettingsModal';
12
+ import { PipelineView, PipelineStep } from './PipelineView';
13
 
14
  // HuggingFace logo SVG component for the export button
15
  const HuggingFaceLogo = ({ className = "w-4 h-4" }: { className?: string }) => (
 
215
  const processedAnalysisRef = useRef<Set<string>>(new Set()); // Track processed analysis_complete events
216
  const [sseReconnectTrigger, setSseReconnectTrigger] = useState(0); // Force SSE reconnection for follow-up queries
217
 
218
+ // Pipeline visualization state (reasoning loop)
219
+ const [pipelineSteps, setPipelineSteps] = useState<PipelineStep[]>([]);
220
+ const [pipelineMode, setPipelineMode] = useState<string | null>(null);
221
+ const [pipelineHypotheses, setPipelineHypotheses] = useState<string[]>([]);
222
+ const pipelineStepCounterRef = useRef(0); // Unique step ID counter
223
+
224
  // Auth context for user tracking
225
  const { user, isAuthenticated, dbSessionId, signOut } = useAuth();
226
 
 
356
  console.log(`🤖 Agent assigned: ${data.agent}`);
357
  } else if (data.type === 'tool_executing') {
358
  setCurrentStep(data.message || `🔧 Executing: ${data.tool}`);
359
+ // Add pipeline step if in reasoning mode
360
+ if (pipelineMode) {
361
+ const stepId = `act-${++pipelineStepCounterRef.current}`;
362
+ setPipelineSteps(prev => [...prev, {
363
+ id: stepId,
364
+ type: 'act',
365
+ status: 'active',
366
+ title: `Executing: ${data.tool}`,
367
+ subtitle: data.message || '',
368
+ tool: data.tool,
369
+ timestamp: new Date()
370
+ }]);
371
+ }
372
  } else if (data.type === 'tool_completed') {
373
  setCurrentStep(data.message || `✓ Completed: ${data.tool}`);
374
+ // Update pipeline step status
375
+ if (pipelineMode) {
376
+ setPipelineSteps(prev => prev.map(s =>
377
+ s.type === 'act' && s.status === 'active' ? { ...s, status: 'completed' as const } : s
378
+ ));
379
+ }
380
  } else if (data.type === 'tool_failed') {
381
  setCurrentStep(data.message || `❌ Failed: ${data.tool}`);
382
+ // Update pipeline step status
383
+ if (pipelineMode) {
384
+ setPipelineSteps(prev => prev.map(s =>
385
+ s.type === 'act' && s.status === 'active' ? { ...s, status: 'failed' as const, subtitle: data.message || 'Tool failed' } : s
386
+ ));
387
+ }
388
  } else if (data.type === 'token_update') {
389
  // Optional: Display token budget updates
390
  console.log('💰 Token update:', data.message);
391
+ } else if (data.type === 'intent_classified') {
392
+ // 🎯 Reasoning Loop: Intent classification result
393
+ console.log(`🎯 Intent: ${data.mode} (${Math.round(data.confidence * 100)}%)`);
394
+ setPipelineMode(data.mode);
395
+ const stepId = `intent-${++pipelineStepCounterRef.current}`;
396
+ setPipelineSteps(prev => [...prev, {
397
+ id: stepId,
398
+ type: 'intent',
399
+ status: 'completed',
400
+ title: `Intent: ${data.mode.charAt(0).toUpperCase() + data.mode.slice(1)}`,
401
+ subtitle: data.sub_intent || data.reasoning,
402
+ detail: data.reasoning,
403
+ confidence: data.confidence,
404
+ timestamp: new Date()
405
+ }]);
406
+ } else if (data.type === 'reasoning_mode') {
407
+ // 🧠 Reasoning Loop activated
408
+ console.log(`🧠 Reasoning mode: ${data.mode}`);
409
+ setPipelineMode(data.mode);
410
+ setCurrentStep(data.message || `🧠 Reasoning Loop (${data.mode})`);
411
+ } else if (data.type === 'hypotheses_generated') {
412
+ // 💡 Exploratory mode: hypotheses generated
413
+ console.log(`💡 ${data.count} hypotheses generated`);
414
+ setPipelineHypotheses(data.hypotheses || []);
415
+ const stepId = `hyp-${++pipelineStepCounterRef.current}`;
416
+ setPipelineSteps(prev => [...prev, {
417
+ id: stepId,
418
+ type: 'hypothesis',
419
+ status: 'completed',
420
+ title: `${data.count} Hypotheses Generated`,
421
+ subtitle: data.hypotheses?.[0] || '',
422
+ detail: (data.hypotheses || []).map((h: string, i: number) => `${i + 1}. ${h}`).join('\n'),
423
+ timestamp: new Date()
424
+ }]);
425
+ } else if (data.type === 'reasoning_step') {
426
+ // 🤔 Reasoning step: LLM decided next action
427
+ console.log(`🤔 Iteration ${data.iteration}: ${data.tool}`);
428
+ // Mark previous "reason" steps as completed
429
+ setPipelineSteps(prev => prev.map(s =>
430
+ s.type === 'reason' && s.status === 'active' ? { ...s, status: 'completed' as const } : s
431
+ ));
432
+ const stepId = `reason-${++pipelineStepCounterRef.current}`;
433
+ setPipelineSteps(prev => [...prev, {
434
+ id: stepId,
435
+ type: 'reason',
436
+ status: 'completed',
437
+ title: `Reason → ${data.tool}`,
438
+ subtitle: data.hypothesis || '',
439
+ detail: data.reasoning,
440
+ iteration: data.iteration,
441
+ tool: data.tool,
442
+ timestamp: new Date()
443
+ }]);
444
+ } else if (data.type === 'finding_discovered') {
445
+ // 🔬 Finding from evaluation step
446
+ console.log(`🔬 Finding (confidence: ${Math.round(data.confidence * 100)}%)`);
447
+ const stepId = `finding-${++pipelineStepCounterRef.current}`;
448
+ setPipelineSteps(prev => [...prev, {
449
+ id: stepId,
450
+ type: 'finding',
451
+ status: 'completed',
452
+ title: data.answered ? '✓ Question Answered' : 'Finding Discovered',
453
+ subtitle: data.interpretation?.substring(0, 100) || '',
454
+ detail: data.interpretation,
455
+ confidence: data.confidence,
456
+ iteration: data.iteration,
457
+ timestamp: new Date()
458
+ }]);
459
  } else if (data.type === 'analysis_failed') {
460
  console.log('❌ Analysis failed', data);
461
  setIsTyping(false);
462
 
463
+ // Reset pipeline state
464
+ setPipelineSteps([]);
465
+ setPipelineMode(null);
466
+ setPipelineHypotheses([]);
467
+
468
  // Show error message to user - add to sessions
469
  setSessions(prev => prev.map(s => {
470
  if (s.id === activeSessionId) {
 
487
  console.log('✅ Analysis completed', data.result);
488
  setIsTyping(false);
489
 
490
+ // Reset pipeline state
491
+ setPipelineSteps([]);
492
+ setPipelineMode(null);
493
+ setPipelineHypotheses([]);
494
+
495
  // Create a unique key based on actual workflow content to prevent duplicates
496
  // Use the last tool executed + summary hash for uniqueness
497
  const lastTool = data.result?.workflow_history?.[data.result.workflow_history.length - 1]?.tool || 'unknown';
 
649
 
650
  // Show loading indicator immediately (for UI feedback)
651
  setIsTyping(true);
652
+
653
+ // Reset pipeline state for new analysis
654
+ setPipelineSteps([]);
655
+ setPipelineMode(null);
656
+ setPipelineHypotheses([]);
657
+ pipelineStepCounterRef.current = 0;
658
 
659
  try {
660
 
 
1313
  ))
1314
  )}
1315
  {isTyping && (
1316
+ <PipelineView
1317
+ steps={pipelineSteps}
1318
+ mode={pipelineMode}
1319
+ currentStep={currentStep}
1320
+ isActive={isTyping}
1321
+ hypotheses={pipelineHypotheses}
1322
+ />
 
 
 
 
 
 
 
 
 
 
1323
  )}
1324
  </div>
1325
 
 
1500
  const allPlots: Array<{title: string, url: string, type?: string}> = [];
1501
  const allReports: Array<{name: string, path: string}> = [];
1502
  const allDataFiles: string[] = [];
1503
+ const baselineModels = ['xgboost', 'random_forest', 'lightgbm', 'ridge', 'lasso'];
1504
  const foundModels = new Set<string>();
1505
 
1506
  activeSession.messages.forEach(msg => {
FRRONTEEEND/components/PipelineView.tsx ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { motion, AnimatePresence } from 'framer-motion';
3
+ import {
4
+ Brain, Zap, BarChart3, CheckCircle2, XCircle,
5
+ Loader2, ChevronDown, ChevronUp, Lightbulb,
6
+ Search, FlaskConical, FileText, Target, ArrowRight
7
+ } from 'lucide-react';
8
+ import { cn } from '../lib/utils';
9
+
10
+ // ─── Types ───────────────────────────────────────────────────
11
+
12
+ export interface PipelineStep {
13
+ id: string;
14
+ type: 'intent' | 'hypothesis' | 'reason' | 'act' | 'evaluate' | 'finding' | 'synthesize';
15
+ status: 'pending' | 'active' | 'completed' | 'failed';
16
+ title: string;
17
+ subtitle?: string;
18
+ detail?: string; // Extended info (shown on expand)
19
+ confidence?: number; // 0-1
20
+ timestamp?: Date;
21
+ tool?: string;
22
+ iteration?: number;
23
+ }
24
+
25
+ interface PipelineViewProps {
26
+ steps: PipelineStep[];
27
+ mode: string | null; // "direct" | "investigative" | "exploratory" | null
28
+ currentStep: string; // Existing currentStep string from ChatInterface
29
+ isActive: boolean; // Whether analysis is running
30
+ hypotheses?: string[];
31
+ className?: string;
32
+ }
33
+
34
+ // ─── Icons per step type ─────────────────────────────────────
35
+
36
+ const stepIcons: Record<PipelineStep['type'], React.ElementType> = {
37
+ intent: Target,
38
+ hypothesis: Lightbulb,
39
+ reason: Brain,
40
+ act: Zap,
41
+ evaluate: Search,
42
+ finding: FlaskConical,
43
+ synthesize: FileText,
44
+ };
45
+
46
+ const stepColors: Record<PipelineStep['type'], string> = {
47
+ intent: 'text-violet-400 bg-violet-500/10 border-violet-500/20',
48
+ hypothesis: 'text-amber-400 bg-amber-500/10 border-amber-500/20',
49
+ reason: 'text-cyan-400 bg-cyan-500/10 border-cyan-500/20',
50
+ act: 'text-emerald-400 bg-emerald-500/10 border-emerald-500/20',
51
+ evaluate: 'text-blue-400 bg-blue-500/10 border-blue-500/20',
52
+ finding: 'text-pink-400 bg-pink-500/10 border-pink-500/20',
53
+ synthesize: 'text-orange-400 bg-orange-500/10 border-orange-500/20',
54
+ };
55
+
56
+ const statusDotColors: Record<PipelineStep['status'], string> = {
57
+ pending: 'bg-white/20',
58
+ active: 'bg-emerald-500',
59
+ completed: 'bg-emerald-500',
60
+ failed: 'bg-red-500',
61
+ };
62
+
63
+ // ─── Confidence Bar ──────────────────────────────────────────
64
+
65
+ const ConfidenceBar: React.FC<{ value: number }> = ({ value }) => (
66
+ <div className="flex items-center gap-2 mt-1">
67
+ <div className="flex-1 h-1 bg-white/5 rounded-full overflow-hidden">
68
+ <motion.div
69
+ className={cn(
70
+ "h-full rounded-full",
71
+ value >= 0.7 ? "bg-emerald-500" : value >= 0.4 ? "bg-amber-500" : "bg-red-400"
72
+ )}
73
+ initial={{ width: 0 }}
74
+ animate={{ width: `${Math.round(value * 100)}%` }}
75
+ transition={{ duration: 0.6, ease: "easeOut" }}
76
+ />
77
+ </div>
78
+ <span className="text-[10px] font-mono text-white/30 w-8 text-right">
79
+ {Math.round(value * 100)}%
80
+ </span>
81
+ </div>
82
+ );
83
+
84
+ // ─── Mode Badge ──────────────────────────────────────────────
85
+
86
+ const ModeBadge: React.FC<{ mode: string }> = ({ mode }) => {
87
+ const config: Record<string, { label: string; color: string; icon: React.ElementType }> = {
88
+ direct: { label: 'Direct', color: 'bg-emerald-500/10 text-emerald-400 border-emerald-500/20', icon: Zap },
89
+ investigative: { label: 'Investigative', color: 'bg-cyan-500/10 text-cyan-400 border-cyan-500/20', icon: Search },
90
+ exploratory: { label: 'Exploratory', color: 'bg-violet-500/10 text-violet-400 border-violet-500/20', icon: FlaskConical },
91
+ };
92
+ const { label, color, icon: Icon } = config[mode] || config.direct;
93
+
94
+ return (
95
+ <span className={cn("inline-flex items-center gap-1.5 px-2 py-0.5 text-[10px] font-medium rounded-full border", color)}>
96
+ <Icon className="w-3 h-3" />
97
+ {label} Mode
98
+ </span>
99
+ );
100
+ };
101
+
102
+ // ─── Single Step Row ─────────────────────────────────────────
103
+
104
+ const StepRow: React.FC<{ step: PipelineStep; isLast: boolean }> = ({ step, isLast }) => {
105
+ const [expanded, setExpanded] = React.useState(false);
106
+ const Icon = stepIcons[step.type] || Zap;
107
+ const colorClass = stepColors[step.type] || stepColors.act;
108
+ const isActive = step.status === 'active';
109
+ const isCompleted = step.status === 'completed';
110
+ const isFailed = step.status === 'failed';
111
+
112
+ return (
113
+ <div className="relative">
114
+ {/* Connector line */}
115
+ {!isLast && (
116
+ <div className={cn(
117
+ "absolute left-4 top-10 w-px h-[calc(100%-16px)]",
118
+ isCompleted ? "bg-emerald-500/30" : "bg-white/5"
119
+ )} />
120
+ )}
121
+
122
+ <motion.div
123
+ initial={{ opacity: 0, x: -12 }}
124
+ animate={{ opacity: 1, x: 0 }}
125
+ transition={{ duration: 0.3 }}
126
+ className={cn(
127
+ "relative flex items-start gap-3 p-2 rounded-lg cursor-pointer transition-colors",
128
+ isActive && "bg-white/[0.03]",
129
+ expanded && "bg-white/[0.02]"
130
+ )}
131
+ onClick={() => step.detail && setExpanded(!expanded)}
132
+ >
133
+ {/* Icon circle */}
134
+ <div className={cn(
135
+ "w-8 h-8 rounded-lg flex items-center justify-center shrink-0 border",
136
+ colorClass,
137
+ isActive && "animate-pulse"
138
+ )}>
139
+ {isActive ? (
140
+ <Loader2 className="w-4 h-4 animate-spin" />
141
+ ) : isCompleted ? (
142
+ <CheckCircle2 className="w-4 h-4 text-emerald-400" />
143
+ ) : isFailed ? (
144
+ <XCircle className="w-4 h-4 text-red-400" />
145
+ ) : (
146
+ <Icon className="w-4 h-4" />
147
+ )}
148
+ </div>
149
+
150
+ {/* Content */}
151
+ <div className="flex-1 min-w-0">
152
+ <div className="flex items-center gap-2">
153
+ <span className={cn(
154
+ "text-xs font-medium truncate",
155
+ isActive ? "text-white" : isCompleted ? "text-white/70" : "text-white/40"
156
+ )}>
157
+ {step.title}
158
+ </span>
159
+ {step.iteration && (
160
+ <span className="text-[10px] font-mono text-white/20 shrink-0">
161
+ #{step.iteration}
162
+ </span>
163
+ )}
164
+ {step.detail && (
165
+ expanded
166
+ ? <ChevronUp className="w-3 h-3 text-white/20 shrink-0" />
167
+ : <ChevronDown className="w-3 h-3 text-white/20 shrink-0" />
168
+ )}
169
+ </div>
170
+
171
+ {step.subtitle && (
172
+ <p className="text-[11px] text-white/30 mt-0.5 truncate">
173
+ {step.subtitle}
174
+ </p>
175
+ )}
176
+
177
+ {step.confidence !== undefined && step.confidence > 0 && (
178
+ <ConfidenceBar value={step.confidence} />
179
+ )}
180
+ </div>
181
+
182
+ {/* Status dot */}
183
+ <div className={cn(
184
+ "w-2 h-2 rounded-full shrink-0 mt-2",
185
+ statusDotColors[step.status]
186
+ )} />
187
+ </motion.div>
188
+
189
+ {/* Expanded detail */}
190
+ <AnimatePresence>
191
+ {expanded && step.detail && (
192
+ <motion.div
193
+ initial={{ height: 0, opacity: 0 }}
194
+ animate={{ height: 'auto', opacity: 1 }}
195
+ exit={{ height: 0, opacity: 0 }}
196
+ transition={{ duration: 0.2 }}
197
+ className="overflow-hidden"
198
+ >
199
+ <div className="ml-11 mr-2 mb-2 p-2 rounded-lg bg-white/[0.02] border border-white/5">
200
+ <p className="text-[11px] text-white/40 leading-relaxed whitespace-pre-wrap">
201
+ {step.detail}
202
+ </p>
203
+ </div>
204
+ </motion.div>
205
+ )}
206
+ </AnimatePresence>
207
+ </div>
208
+ );
209
+ };
210
+
211
+ // ─── Hypotheses Panel ────────────────────────────────────────
212
+
213
+ const HypothesesPanel: React.FC<{ hypotheses: string[] }> = ({ hypotheses }) => {
214
+ const [collapsed, setCollapsed] = React.useState(false);
215
+
216
+ if (!hypotheses.length) return null;
217
+
218
+ return (
219
+ <div className="mb-3">
220
+ <button
221
+ onClick={() => setCollapsed(!collapsed)}
222
+ className="flex items-center gap-1.5 text-[10px] font-medium text-amber-400/70 hover:text-amber-400 transition-colors mb-1.5"
223
+ >
224
+ <Lightbulb className="w-3 h-3" />
225
+ <span>{hypotheses.length} Hypotheses</span>
226
+ {collapsed ? <ChevronDown className="w-3 h-3" /> : <ChevronUp className="w-3 h-3" />}
227
+ </button>
228
+ <AnimatePresence>
229
+ {!collapsed && (
230
+ <motion.div
231
+ initial={{ height: 0, opacity: 0 }}
232
+ animate={{ height: 'auto', opacity: 1 }}
233
+ exit={{ height: 0, opacity: 0 }}
234
+ className="overflow-hidden"
235
+ >
236
+ <div className="space-y-1 ml-4">
237
+ {hypotheses.map((h, i) => (
238
+ <div key={i} className="flex items-start gap-1.5">
239
+ <ArrowRight className="w-3 h-3 text-amber-500/30 mt-0.5 shrink-0" />
240
+ <span className="text-[11px] text-white/30">{h}</span>
241
+ </div>
242
+ ))}
243
+ </div>
244
+ </motion.div>
245
+ )}
246
+ </AnimatePresence>
247
+ </div>
248
+ );
249
+ };
250
+
251
+ // ─── Main Pipeline View ──────────────────────────────────────
252
+
253
+ export const PipelineView: React.FC<PipelineViewProps> = ({
254
+ steps,
255
+ mode,
256
+ currentStep,
257
+ isActive,
258
+ hypotheses = [],
259
+ className
260
+ }) => {
261
+ // If no steps yet and not in reasoning mode, show the simple fallback
262
+ if (!steps.length && !mode) {
263
+ return (
264
+ <div className={cn("flex gap-4", className)}>
265
+ <div className="w-8 h-8 rounded-lg flex items-center justify-center shrink-0 bg-white/5 border border-white/10">
266
+ <Loader2 className="w-4 h-4 text-indigo-400 animate-spin" />
267
+ </div>
268
+ <div className="bg-white/[0.03] p-4 rounded-2xl border border-white/5">
269
+ <div className="flex items-center gap-3">
270
+ <div className="flex gap-1">
271
+ <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.3s]" />
272
+ <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.15s]" />
273
+ <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce" />
274
+ </div>
275
+ <span className="text-sm text-white/60">
276
+ {currentStep || '🔧 Starting analysis...'}
277
+ </span>
278
+ </div>
279
+ </div>
280
+ </div>
281
+ );
282
+ }
283
+
284
+ // Count completed steps
285
+ const completedCount = steps.filter(s => s.status === 'completed').length;
286
+ const totalCount = steps.length;
287
+ const progressPct = totalCount > 0 ? (completedCount / totalCount) * 100 : 0;
288
+
289
+ return (
290
+ <div className={cn("flex gap-4", className)}>
291
+ {/* Bot avatar */}
292
+ <div className="w-8 h-8 rounded-lg flex items-center justify-center shrink-0 bg-white/5 border border-white/10">
293
+ <Brain className="w-4 h-4 text-cyan-400" />
294
+ </div>
295
+
296
+ {/* Pipeline card */}
297
+ <div className="flex-1 bg-white/[0.03] p-4 rounded-2xl border border-white/5 max-w-lg">
298
+ {/* Header */}
299
+ <div className="flex items-center justify-between mb-3">
300
+ <div className="flex items-center gap-2">
301
+ <span className="text-xs font-semibold text-white/80">Reasoning Pipeline</span>
302
+ {mode && <ModeBadge mode={mode} />}
303
+ </div>
304
+ {isActive && (
305
+ <div className="flex items-center gap-1.5 text-[10px] text-emerald-400">
306
+ <Loader2 className="w-3 h-3 animate-spin" />
307
+ <span>Running</span>
308
+ </div>
309
+ )}
310
+ </div>
311
+
312
+ {/* Progress bar */}
313
+ <div className="h-1 bg-white/5 rounded-full overflow-hidden mb-3">
314
+ <motion.div
315
+ className="h-full bg-gradient-to-r from-cyan-500 to-emerald-500 rounded-full"
316
+ initial={{ width: 0 }}
317
+ animate={{ width: `${progressPct}%` }}
318
+ transition={{ duration: 0.4, ease: "easeOut" }}
319
+ />
320
+ </div>
321
+
322
+ {/* Hypotheses (exploratory mode) */}
323
+ {hypotheses.length > 0 && <HypothesesPanel hypotheses={hypotheses} />}
324
+
325
+ {/* Steps timeline */}
326
+ <div className="space-y-0.5 max-h-[320px] overflow-y-auto pr-1 scrollbar-thin scrollbar-thumb-white/5">
327
+ {steps.map((step, i) => (
328
+ <StepRow key={step.id} step={step} isLast={i === steps.length - 1} />
329
+ ))}
330
+ </div>
331
+
332
+ {/* Footer summary */}
333
+ {!isActive && completedCount > 0 && (
334
+ <div className="mt-3 pt-2 border-t border-white/5 flex items-center justify-between">
335
+ <span className="text-[10px] text-white/20">
336
+ {completedCount} step{completedCount !== 1 ? 's' : ''} completed
337
+ </span>
338
+ <span className="text-[10px] text-white/20 font-mono">
339
+ {steps.filter(s => s.type === 'finding').length} finding{steps.filter(s => s.type === 'finding').length !== 1 ? 's' : ''}
340
+ </span>
341
+ </div>
342
+ )}
343
+ </div>
344
+ </div>
345
+ );
346
+ };
347
+
348
+ export default PipelineView;
src/orchestrator.py CHANGED
@@ -21,6 +21,11 @@ from .tools.tools_registry import TOOLS, get_all_tool_names, get_tools_by_catego
21
  from .tools.agent_tool_mapping import (get_tools_for_agent, filter_tools_by_names,
22
  get_agent_description, suggest_next_agent)
23
  from .reasoning.reasoning_trace import get_reasoning_trace, reset_reasoning_trace
 
 
 
 
 
24
  from .session_memory import SessionMemory
25
  from .session_store import SessionStore
26
  from .workflow_state import WorkflowState
@@ -2898,6 +2903,526 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
2898
  "task_type": result_data.get("task_type")
2899
  })
2900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2901
  def analyze(self, file_path: str, task_description: str,
2902
  target_col: Optional[str] = None,
2903
  use_cache: bool = True,
@@ -3032,6 +3557,82 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
3032
  print("✓ Using cached results")
3033
  return cached
3034
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3035
  # Build initial messages
3036
  # Use dynamic prompts for small context models
3037
  if self.use_compact_prompts:
 
21
  from .tools.agent_tool_mapping import (get_tools_for_agent, filter_tools_by_names,
22
  get_agent_description, suggest_next_agent)
23
  from .reasoning.reasoning_trace import get_reasoning_trace, reset_reasoning_trace
24
+ from .reasoning.findings import FindingsAccumulator, Finding
25
+ from .reasoning.reasoner import Reasoner, ReasoningOutput
26
+ from .reasoning.evaluator import Evaluator, EvaluationOutput
27
+ from .reasoning.synthesizer import Synthesizer
28
+ from .routing.intent_classifier import IntentClassifier, IntentResult
29
  from .session_memory import SessionMemory
30
  from .session_store import SessionStore
31
  from .workflow_state import WorkflowState
 
2903
  "task_type": result_data.get("task_type")
2904
  })
2905
 
2906
+ # ═══════════════════════════════════════════════════════════════════════════
2907
+ # REASONING LOOP INFRASTRUCTURE
2908
+ # Three new methods that power the hypothesis-driven analysis mode:
2909
+ # _llm_text_call → Provider-agnostic text LLM call (no tool schemas)
2910
+ # _get_tools_description → Lightweight text description of available tools
2911
+ # _run_reasoning_loop → The core Reason → Act → Evaluate → Loop/Stop cycle
2912
+ # ═══════════════════════════════════════════════════════════════════════════
2913
+
2914
+ def _llm_text_call(self, system_prompt: str, user_prompt: str, max_tokens: int = 2048) -> str:
2915
+ """
2916
+ Simple text-only LLM call (no tool schemas).
2917
+
2918
+ Used by Reasoner, Evaluator, and Synthesizer for lightweight
2919
+ reasoning calls. Much cheaper than full tool-calling API calls.
2920
+
2921
+ Args:
2922
+ system_prompt: System prompt for the LLM
2923
+ user_prompt: User prompt for the LLM
2924
+ max_tokens: Maximum response tokens
2925
+
2926
+ Returns:
2927
+ Plain text response from the LLM
2928
+ """
2929
+ messages = [
2930
+ {"role": "system", "content": system_prompt},
2931
+ {"role": "user", "content": user_prompt}
2932
+ ]
2933
+
2934
+ # Rate limiting
2935
+ if self.min_api_call_interval > 0:
2936
+ time_since_last_call = time.time() - self.last_api_call_time
2937
+ if time_since_last_call < self.min_api_call_interval:
2938
+ wait_time = self.min_api_call_interval - time_since_last_call
2939
+ time.sleep(wait_time)
2940
+
2941
+ try:
2942
+ if self.provider == "mistral":
2943
+ if hasattr(self.mistral_client, 'chat') and hasattr(self.mistral_client.chat, 'complete'):
2944
+ response = self.mistral_client.chat.complete(
2945
+ model=self.model,
2946
+ messages=messages,
2947
+ temperature=0.1,
2948
+ max_tokens=max_tokens
2949
+ )
2950
+ else:
2951
+ response = self.mistral_client.chat(
2952
+ model=self.model,
2953
+ messages=messages,
2954
+ temperature=0.1,
2955
+ max_tokens=max_tokens
2956
+ )
2957
+ self.api_calls_made += 1
2958
+ self.last_api_call_time = time.time()
2959
+
2960
+ if hasattr(response, 'usage') and response.usage:
2961
+ self.tokens_this_minute += response.usage.total_tokens
2962
+
2963
+ return self._extract_content_text(response.choices[0].message.content)
2964
+
2965
+ elif self.provider == "groq":
2966
+ response = self.groq_client.chat.completions.create(
2967
+ model=self.model,
2968
+ messages=messages,
2969
+ temperature=0.1,
2970
+ max_tokens=max_tokens
2971
+ )
2972
+ self.api_calls_made += 1
2973
+ self.last_api_call_time = time.time()
2974
+
2975
+ if hasattr(response, 'usage') and response.usage:
2976
+ self.tokens_this_minute += response.usage.total_tokens
2977
+
2978
+ return self._extract_content_text(response.choices[0].message.content)
2979
+
2980
+ elif self.provider == "gemini":
2981
+ full_prompt = f"{system_prompt}\n\n{user_prompt}"
2982
+ response = self.gemini_model.generate_content(
2983
+ full_prompt,
2984
+ generation_config={
2985
+ "temperature": 0.1,
2986
+ "max_output_tokens": max_tokens
2987
+ }
2988
+ )
2989
+ self.api_calls_made += 1
2990
+ self.last_api_call_time = time.time()
2991
+ return response.text
2992
+
2993
+ else:
2994
+ raise ValueError(f"Unsupported provider: {self.provider}")
2995
+
2996
+ except Exception as e:
2997
+ error_str = str(e)
2998
+ # Handle rate limits
2999
+ if "429" in error_str or "rate_limit" in error_str.lower():
3000
+ print(f"⏳ Rate limit in reasoning call, waiting 10s...")
3001
+ time.sleep(10)
3002
+ return self._llm_text_call(system_prompt, user_prompt, max_tokens)
3003
+ raise
3004
+
3005
+ def _get_tools_description(self, tool_names: Optional[List[str]] = None) -> str:
3006
+ """
3007
+ Build a lightweight text description of available tools.
3008
+
3009
+ Used in Reasoner prompts instead of sending full JSON tool schemas.
3010
+ This is much more token-efficient than the OpenAI tools format.
3011
+
3012
+ Args:
3013
+ tool_names: Optional list of tool names to include (None = all tools)
3014
+
3015
+ Returns:
3016
+ Formatted text like:
3017
+ - profile_dataset(file_path): Profile a dataset to understand structure
3018
+ - analyze_correlations(file_path, target_col): Analyze column correlations
3019
+ ...
3020
+ """
3021
+ import inspect
3022
+
3023
+ lines = []
3024
+ tool_map = self.tool_functions
3025
+
3026
+ # Filter to specific tools if requested
3027
+ if tool_names:
3028
+ tool_map = {k: v for k, v in tool_map.items() if k in tool_names}
3029
+
3030
+ for name, func in sorted(tool_map.items()):
3031
+ # Get function signature
3032
+ try:
3033
+ sig = inspect.signature(func)
3034
+ params = []
3035
+ for param_name, param in sig.parameters.items():
3036
+ if param_name in ("kwargs", "args"):
3037
+ continue
3038
+ if param.default is inspect.Parameter.empty:
3039
+ params.append(param_name)
3040
+ else:
3041
+ params.append(f"{param_name}=...")
3042
+ params_str = ", ".join(params[:5]) # Max 5 params shown
3043
+ if len(sig.parameters) > 5:
3044
+ params_str += ", ..."
3045
+ except (ValueError, TypeError):
3046
+ params_str = "..."
3047
+
3048
+ # Get first line of docstring
3049
+ doc = (func.__doc__ or "").strip().split("\n")[0][:100]
3050
+
3051
+ lines.append(f"- {name}({params_str}): {doc}")
3052
+
3053
+ return "\n".join(lines)
3054
+
3055
+ def _run_reasoning_loop(
3056
+ self,
3057
+ question: str,
3058
+ file_path: str,
3059
+ dataset_info: Dict[str, Any],
3060
+ target_col: Optional[str] = None,
3061
+ mode: str = "investigative",
3062
+ max_iterations: int = 7,
3063
+ tool_names: Optional[List[str]] = None
3064
+ ) -> Dict[str, Any]:
3065
+ """
3066
+ Run the Reasoning Loop: Reason → Act → Evaluate → Loop/Stop → Synthesize.
3067
+
3068
+ This is the core of the hypothesis-driven analysis mode.
3069
+ Instead of a pipeline, the agent:
3070
+ 1. REASONS about what to investigate next
3071
+ 2. ACTS (executes one tool)
3072
+ 3. EVALUATES the result
3073
+ 4. Decides to LOOP (investigate more) or STOP
3074
+ 5. SYNTHESIZES all findings into a coherent answer
3075
+
3076
+ Args:
3077
+ question: User's question or "Analyze this data"
3078
+ file_path: Path to the dataset
3079
+ dataset_info: Schema info from local extraction
3080
+ target_col: Optional target column
3081
+ mode: "investigative" or "exploratory"
3082
+ max_iterations: Max reasoning iterations (default 7)
3083
+ tool_names: Optional subset of tools to use
3084
+
3085
+ Returns:
3086
+ Dict with status, summary, findings, workflow_history, etc.
3087
+ """
3088
+ start_time = time.time()
3089
+
3090
+ # Initialize reasoning components (pass our LLM caller)
3091
+ reasoner = Reasoner(llm_caller=self._llm_text_call)
3092
+ evaluator = Evaluator(llm_caller=self._llm_text_call)
3093
+ synthesizer = Synthesizer(llm_caller=self._llm_text_call)
3094
+ findings = FindingsAccumulator(question=question, mode=mode)
3095
+
3096
+ # Get tools description for the reasoner
3097
+ tools_desc = self._get_tools_description(tool_names)
3098
+
3099
+ # Track for API response
3100
+ workflow_history = []
3101
+ current_file = file_path # Tracks the latest output file
3102
+
3103
+ # Emit mode info for UI
3104
+ if hasattr(self, 'session') and self.session:
3105
+ progress_manager.emit(self.session.session_id, {
3106
+ 'type': 'reasoning_mode',
3107
+ 'mode': mode,
3108
+ 'message': f"🧠 Reasoning Loop activated ({mode} mode)",
3109
+ 'question': question
3110
+ })
3111
+
3112
+ print(f"\n{'='*60}")
3113
+ print(f"🧠 REASONING LOOP ({mode.upper()} mode)")
3114
+ print(f" Question: {question}")
3115
+ print(f" Max iterations: {max_iterations}")
3116
+ print(f"{'='*60}")
3117
+
3118
+ # ── EXPLORATORY MODE: Generate hypotheses first ──
3119
+ if mode == "exploratory":
3120
+ print(f"\n🔬 Generating hypotheses from data profile...")
3121
+
3122
+ # Profile the dataset first if not already done
3123
+ profile_result = self._execute_tool("profile_dataset", {"file_path": file_path})
3124
+ profile_summary = ""
3125
+ if profile_result.get("success", True):
3126
+ profile_summary = json.dumps(
3127
+ self._compress_tool_result("profile_dataset",
3128
+ self._make_json_serializable(profile_result)),
3129
+ default=str
3130
+ )[:2000]
3131
+
3132
+ workflow_history.append({
3133
+ "iteration": 0,
3134
+ "tool": "profile_dataset",
3135
+ "arguments": {"file_path": file_path},
3136
+ "result": profile_result
3137
+ })
3138
+ self._update_workflow_state("profile_dataset", profile_result)
3139
+
3140
+ # Generate hypotheses
3141
+ hypotheses = reasoner.generate_hypotheses(
3142
+ dataset_info=dataset_info,
3143
+ file_path=file_path,
3144
+ target_col=target_col,
3145
+ profile_summary=profile_summary
3146
+ )
3147
+
3148
+ print(f" Generated {len(hypotheses)} hypotheses:")
3149
+ for i, h in enumerate(hypotheses):
3150
+ text = h.get("text", str(h))
3151
+ priority = h.get("priority", 0.5)
3152
+ findings.add_hypothesis(text, priority=priority, source_iteration=0)
3153
+ print(f" {i+1}. [{priority:.1f}] {text}")
3154
+
3155
+ # Emit hypothesis info
3156
+ if hasattr(self, 'session') and self.session:
3157
+ progress_manager.emit(self.session.session_id, {
3158
+ 'type': 'hypotheses_generated',
3159
+ 'hypotheses': [h.get("text", str(h)) for h in hypotheses],
3160
+ 'count': len(hypotheses)
3161
+ })
3162
+
3163
+ # ── MAIN REASONING LOOP ──
3164
+ for iteration in range(1, max_iterations + 1):
3165
+ print(f"\n── Iteration {iteration}/{max_iterations} ──")
3166
+
3167
+ # STEP 1: REASON - What should we investigate next?
3168
+ print(f"🤔 REASON: Deciding next action...")
3169
+
3170
+ reasoning_output = reasoner.reason(
3171
+ question=question,
3172
+ dataset_info=dataset_info,
3173
+ findings=findings,
3174
+ available_tools=tools_desc,
3175
+ file_path=current_file,
3176
+ target_col=target_col
3177
+ )
3178
+
3179
+ print(f" Status: {reasoning_output.status}")
3180
+ print(f" Reasoning: {reasoning_output.reasoning}")
3181
+
3182
+ # Check if done
3183
+ if reasoning_output.status == "done":
3184
+ print(f"✅ Reasoner says: DONE (confidence: {reasoning_output.confidence:.0%})")
3185
+ print(f" Reason: {reasoning_output.reasoning}")
3186
+ break
3187
+
3188
+ tool_name = reasoning_output.tool_name
3189
+ tool_args = reasoning_output.arguments
3190
+ hypothesis = reasoning_output.hypothesis
3191
+
3192
+ if not tool_name or tool_name not in self.tool_functions:
3193
+ print(f"⚠️ Invalid tool: {tool_name}, skipping iteration")
3194
+ continue
3195
+
3196
+ print(f" Tool: {tool_name}")
3197
+ print(f" Hypothesis: {hypothesis}")
3198
+
3199
+ # Emit reasoning step for UI
3200
+ if hasattr(self, 'session') and self.session:
3201
+ progress_manager.emit(self.session.session_id, {
3202
+ 'type': 'reasoning_step',
3203
+ 'iteration': iteration,
3204
+ 'tool': tool_name,
3205
+ 'hypothesis': hypothesis,
3206
+ 'reasoning': reasoning_output.reasoning
3207
+ })
3208
+
3209
+ # STEP 2: ACT - Execute the tool
3210
+ print(f"⚡ ACT: Executing {tool_name}...")
3211
+
3212
+ # Emit tool execution event
3213
+ if hasattr(self, 'session') and self.session:
3214
+ progress_manager.emit(self.session.session_id, {
3215
+ 'type': 'tool_executing',
3216
+ 'tool': tool_name,
3217
+ 'message': f"🔧 Executing: {tool_name}",
3218
+ 'arguments': tool_args
3219
+ })
3220
+
3221
+ tool_result = self._execute_tool(tool_name, tool_args)
3222
+
3223
+ # Track output file for next iteration
3224
+ if tool_result.get("success", True):
3225
+ result_data = tool_result.get("result", {})
3226
+ if isinstance(result_data, dict):
3227
+ new_file = result_data.get("output_file") or result_data.get("output_path")
3228
+ if new_file:
3229
+ current_file = new_file
3230
+
3231
+ # Emit success
3232
+ if hasattr(self, 'session') and self.session:
3233
+ progress_manager.emit(self.session.session_id, {
3234
+ 'type': 'tool_completed',
3235
+ 'tool': tool_name,
3236
+ 'message': f"✓ Completed: {tool_name}"
3237
+ })
3238
+ print(f" ✓ Tool completed successfully")
3239
+ else:
3240
+ error_msg = tool_result.get("error", "Unknown error")
3241
+ print(f" ❌ Tool failed: {error_msg}")
3242
+ if hasattr(self, 'session') and self.session:
3243
+ progress_manager.emit(self.session.session_id, {
3244
+ 'type': 'tool_failed',
3245
+ 'tool': tool_name,
3246
+ 'message': f"❌ FAILED: {tool_name}",
3247
+ 'error': error_msg
3248
+ })
3249
+
3250
+ # Track in workflow history
3251
+ workflow_history.append({
3252
+ "iteration": iteration,
3253
+ "tool": tool_name,
3254
+ "arguments": tool_args,
3255
+ "result": tool_result
3256
+ })
3257
+
3258
+ # Update workflow state
3259
+ self._update_workflow_state(tool_name, tool_result)
3260
+
3261
+ # Checkpoint
3262
+ if tool_result.get("success", True):
3263
+ session_id = self.http_session_key or "default"
3264
+ self.recovery_manager.checkpoint_manager.save_checkpoint(
3265
+ session_id=session_id,
3266
+ workflow_state={
3267
+ 'iteration': iteration,
3268
+ 'workflow_history': workflow_history,
3269
+ 'current_file': file_path,
3270
+ 'task_description': question,
3271
+ 'target_col': target_col
3272
+ },
3273
+ last_tool=tool_name,
3274
+ iteration=iteration
3275
+ )
3276
+
3277
+ # STEP 3: EVALUATE - What did we learn?
3278
+ print(f"📊 EVALUATE: Interpreting results...")
3279
+
3280
+ evaluation = evaluator.evaluate(
3281
+ question=question,
3282
+ tool_name=tool_name,
3283
+ arguments=tool_args,
3284
+ result=tool_result,
3285
+ findings=findings,
3286
+ result_compressor=lambda tn, r: self._compress_tool_result(
3287
+ tn, self._make_json_serializable(r)
3288
+ )
3289
+ )
3290
+
3291
+ print(f" Interpretation: {evaluation.interpretation}")
3292
+ print(f" Answered: {evaluation.answered} (confidence: {evaluation.confidence:.0%})")
3293
+ print(f" Should stop: {evaluation.should_stop}")
3294
+ if evaluation.next_questions:
3295
+ print(f" Next questions: {evaluation.next_questions}")
3296
+
3297
+ # Build finding and add to accumulator
3298
+ compressed_result = json.dumps(
3299
+ self._compress_tool_result(tool_name, self._make_json_serializable(tool_result)),
3300
+ default=str
3301
+ )
3302
+
3303
+ finding = evaluator.build_finding(
3304
+ iteration=iteration,
3305
+ hypothesis=hypothesis,
3306
+ tool_name=tool_name,
3307
+ arguments=tool_args,
3308
+ result_summary=compressed_result,
3309
+ evaluation=evaluation
3310
+ )
3311
+ findings.add_finding(finding)
3312
+
3313
+ # Emit finding for UI
3314
+ if hasattr(self, 'session') and self.session:
3315
+ progress_manager.emit(self.session.session_id, {
3316
+ 'type': 'finding_discovered',
3317
+ 'iteration': iteration,
3318
+ 'interpretation': evaluation.interpretation,
3319
+ 'confidence': evaluation.confidence,
3320
+ 'answered': evaluation.answered
3321
+ })
3322
+
3323
+ # Check if we should stop
3324
+ if evaluation.should_stop:
3325
+ print(f"\n✅ Evaluator says: STOP (confidence: {evaluation.confidence:.0%})")
3326
+ break
3327
+
3328
+ # ── STEP 4: SYNTHESIZE - Build the final answer ──
3329
+ print(f"\n{'='*60}")
3330
+ print(f"📝 SYNTHESIZE: Building final answer from {len(findings.findings)} findings...")
3331
+ print(f"{'='*60}")
3332
+
3333
+ # Collect artifacts from workflow history
3334
+ artifacts = self._collect_artifacts(workflow_history)
3335
+
3336
+ # Generate synthesis
3337
+ if mode == "exploratory":
3338
+ summary_text = synthesizer.synthesize_exploratory(
3339
+ findings=findings,
3340
+ artifacts=artifacts
3341
+ )
3342
+ else:
3343
+ summary_text = synthesizer.synthesize(
3344
+ findings=findings,
3345
+ artifacts=artifacts
3346
+ )
3347
+
3348
+ # Also generate enhanced summary for plots/metrics extraction
3349
+ try:
3350
+ enhanced = self._generate_enhanced_summary(
3351
+ workflow_history, summary_text, question
3352
+ )
3353
+ plots_data = enhanced.get("plots", [])
3354
+ metrics_data = enhanced.get("metrics", {})
3355
+ artifacts_data = enhanced.get("artifacts", {})
3356
+ except Exception as e:
3357
+ print(f"⚠️ Enhanced summary generation failed: {e}")
3358
+ plots_data = []
3359
+ metrics_data = {}
3360
+ artifacts_data = {}
3361
+
3362
+ # Save to session
3363
+ if self.session:
3364
+ self.session.add_conversation(question, summary_text)
3365
+ self.session_store.save(self.session)
3366
+
3367
+ result = {
3368
+ "status": "success",
3369
+ "summary": summary_text,
3370
+ "metrics": metrics_data,
3371
+ "artifacts": artifacts_data,
3372
+ "plots": plots_data,
3373
+ "workflow_history": workflow_history,
3374
+ "findings": findings.to_dict(),
3375
+ "reasoning_trace": self.reasoning_trace.get_trace(),
3376
+ "reasoning_summary": self.reasoning_trace.get_trace_summary(),
3377
+ "execution_mode": mode,
3378
+ "iterations": findings.iteration_count,
3379
+ "api_calls": self.api_calls_made,
3380
+ "execution_time": round(time.time() - start_time, 2)
3381
+ }
3382
+
3383
+ print(f"\n✅ Reasoning loop completed in {result['execution_time']}s")
3384
+ print(f" Iterations: {findings.iteration_count}")
3385
+ print(f" Tools used: {', '.join(findings.tools_used)}")
3386
+ print(f" API calls: {self.api_calls_made}")
3387
+
3388
+ return result
3389
+
3390
+ def _collect_artifacts(self, workflow_history: List[Dict]) -> Dict[str, Any]:
3391
+ """Collect plots, files, and other artifacts from workflow history."""
3392
+ plots = []
3393
+ files = []
3394
+
3395
+ for step in workflow_history:
3396
+ result = step.get("result", {})
3397
+ if not isinstance(result, dict):
3398
+ continue
3399
+
3400
+ result_data = result.get("result", result)
3401
+ if isinstance(result_data, dict):
3402
+ # Collect output files
3403
+ for key in ["output_file", "output_path", "report_path"]:
3404
+ if key in result_data and result_data[key]:
3405
+ files.append(result_data[key])
3406
+
3407
+ # Collect plots
3408
+ if "plots" in result_data:
3409
+ for plot in result_data["plots"]:
3410
+ if isinstance(plot, dict):
3411
+ plots.append(plot)
3412
+ elif isinstance(plot, str):
3413
+ plots.append({"path": plot, "title": step.get("tool", "Plot")})
3414
+
3415
+ # Check for HTML files (interactive plots)
3416
+ for key in ["html_path", "dashboard_path"]:
3417
+ if key in result_data and result_data[key]:
3418
+ plots.append({
3419
+ "path": result_data[key],
3420
+ "title": step.get("tool", "Interactive Plot"),
3421
+ "type": "html"
3422
+ })
3423
+
3424
+ return {"plots": plots, "files": files}
3425
+
3426
  def analyze(self, file_path: str, task_description: str,
3427
  target_col: Optional[str] = None,
3428
  use_cache: bool = True,
 
3557
  print("✓ Using cached results")
3558
  return cached
3559
 
3560
+ # ═══════════════════════════════════════════════════════════════════════
3561
+ # 🧠 INTENT CLASSIFICATION → MODE SELECTION
3562
+ # Classify the user's request into one of three execution modes:
3563
+ # DIRECT: "Make a scatter plot" → existing pipeline
3564
+ # INVESTIGATIVE: "Why are customers churning?" → reasoning loop
3565
+ # EXPLORATORY: "Analyze this data" → hypothesis-driven loop
3566
+ # ═══════════════════════════════════════════════════════════════════════
3567
+ intent_classifier = IntentClassifier()
3568
+ intent_result = intent_classifier.classify(
3569
+ query=task_description,
3570
+ dataset_info=schema_info if 'error' not in schema_info else None,
3571
+ has_target_col=bool(target_col)
3572
+ )
3573
+
3574
+ print(f"\n🎯 Intent Classification:")
3575
+ print(f" Mode: {intent_result.mode.upper()}")
3576
+ print(f" Confidence: {intent_result.confidence:.0%}")
3577
+ print(f" Reasoning: {intent_result.reasoning}")
3578
+ print(f" Sub-intent: {intent_result.sub_intent}")
3579
+
3580
+ # Emit intent info for UI
3581
+ if hasattr(self, 'session') and self.session:
3582
+ progress_manager.emit(self.session.session_id, {
3583
+ 'type': 'intent_classified',
3584
+ 'mode': intent_result.mode,
3585
+ 'confidence': intent_result.confidence,
3586
+ 'reasoning': intent_result.reasoning,
3587
+ 'sub_intent': intent_result.sub_intent
3588
+ })
3589
+
3590
+ # 📝 Record intent classification in reasoning trace
3591
+ self.reasoning_trace.trace_history.append({
3592
+ "type": "intent_classification",
3593
+ "query": task_description,
3594
+ "mode": intent_result.mode,
3595
+ "confidence": intent_result.confidence,
3596
+ "reasoning": intent_result.reasoning,
3597
+ "sub_intent": intent_result.sub_intent
3598
+ })
3599
+
3600
+ # ═══════════════════════════════════════════════════════════════════════
3601
+ # 🧠 REASONING LOOP PATH (Investigative / Exploratory modes)
3602
+ # ═══════════════════════════════════════════════════════════════════════
3603
+ if intent_result.mode in ("investigative", "exploratory"):
3604
+ print(f"\n🧠 Routing to REASONING LOOP ({intent_result.mode} mode)")
3605
+
3606
+ # Determine iteration count based on mode and reasoning effort
3607
+ if intent_result.mode == "exploratory":
3608
+ loop_max = min(max_iterations, 8) # Exploratory gets more iterations
3609
+ else:
3610
+ loop_max = min(max_iterations, 6) # Investigative is more focused
3611
+
3612
+ reasoning_result = self._run_reasoning_loop(
3613
+ question=task_description,
3614
+ file_path=file_path,
3615
+ dataset_info=schema_info if 'error' not in schema_info else {},
3616
+ target_col=target_col,
3617
+ mode=intent_result.mode,
3618
+ max_iterations=loop_max
3619
+ )
3620
+
3621
+ # Cache the result
3622
+ if use_cache and reasoning_result.get("status") == "success":
3623
+ self.cache.set(cache_key, reasoning_result, metadata={
3624
+ "file_path": file_path,
3625
+ "task": task_description,
3626
+ "mode": intent_result.mode
3627
+ })
3628
+
3629
+ return reasoning_result
3630
+
3631
+ # ═══════════════════════════════════════════════════════════════════════
3632
+ # 📋 DIRECT MODE PATH (existing pipeline - below is unchanged)
3633
+ # ═══════════════════════════════════════════════════════════════════════
3634
+ print(f"\n📋 Routing to DIRECT pipeline mode")
3635
+
3636
  # Build initial messages
3637
  # Use dynamic prompts for small context models
3638
  if self.use_compact_prompts:
src/reasoning/__init__.py CHANGED
@@ -17,7 +17,16 @@ Architecture:
17
 
18
  Tool: "Here's what I found: {stats}"
19
  Reasoning: "Based on these stats, this means..."
 
 
 
20
 
 
 
 
 
 
 
21
  Usage:
22
  from reasoning import get_reasoner
23
 
@@ -25,6 +34,12 @@ Usage:
25
  result = reasoner.explain_data(
26
  summary={"rows": 1000, "columns": 20, "missing": 50}
27
  )
 
 
 
 
 
 
28
  """
29
 
30
  import os
 
17
 
18
  Tool: "Here's what I found: {stats}"
19
  Reasoning: "Based on these stats, this means..."
20
+
21
+ Reasoning Loop (NEW):
22
+ REASON → ACT → EVALUATE → LOOP/STOP → SYNTHESIZE
23
 
24
+ Modules:
25
+ - findings.py: Accumulated evidence state (step tracker + decision ledger)
26
+ - reasoner.py: REASON step - picks next investigation action
27
+ - evaluator.py: EVALUATE step - interprets results, decides continue/stop
28
+ - synthesizer.py: SYNTHESIZE step - builds final answer from evidence
29
+
30
  Usage:
31
  from reasoning import get_reasoner
32
 
 
34
  result = reasoner.explain_data(
35
  summary={"rows": 1000, "columns": 20, "missing": 50}
36
  )
37
+
38
+ # Reasoning Loop components:
39
+ from reasoning.findings import FindingsAccumulator
40
+ from reasoning.reasoner import Reasoner
41
+ from reasoning.evaluator import Evaluator
42
+ from reasoning.synthesizer import Synthesizer
43
  """
44
 
45
  import os
src/reasoning/evaluator.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evaluator Module - The EVALUATE step of the Reasoning Loop.
3
+
4
+ Interprets tool results and decides:
5
+ - What did we learn from this action?
6
+ - Does this answer the user's question?
7
+ - Should we continue investigating or stop?
8
+ - What follow-up questions emerged?
9
+
10
+ The Evaluator transforms raw tool output into understanding.
11
+
12
+ Architecture:
13
+ Tool Result → Evaluator.evaluate() → EvaluationOutput
14
+ - interpretation: natural language explanation
15
+ - answered: did this answer the question?
16
+ - confidence: how confident are we?
17
+ - should_stop: should the loop stop?
18
+ - next_questions: what to investigate next
19
+ """
20
+
21
+ import json
22
+ import re
23
+ from dataclasses import dataclass, field
24
+ from typing import Dict, Any, List, Optional, Callable
25
+
26
+ from .findings import Finding, FindingsAccumulator
27
+
28
+
29
+ @dataclass
30
+ class EvaluationOutput:
31
+ """Output from one EVALUATE step."""
32
+ interpretation: str # What we learned from the tool result
33
+ answered: bool # Does this answer the user's question?
34
+ confidence: float # 0.0-1.0 confidence
35
+ should_stop: bool # Should the reasoning loop stop?
36
+ next_questions: List[str] # Follow-up questions to investigate
37
+ key_metric: Optional[str] = None # Most important metric extracted
38
+
39
+
40
+ EVALUATOR_SYSTEM_PROMPT = """You are a senior data scientist interpreting analysis results.
41
+
42
+ Your job:
43
+ 1. Interpret what the tool result MEANS (not just what it shows)
44
+ 2. Decide if this answers the user's original question
45
+ 3. Identify follow-up questions worth investigating
46
+ 4. Assign confidence level to your interpretation
47
+
48
+ Be concise but insightful. Focus on:
49
+ - Statistical significance (not just numbers)
50
+ - Business implications (not just patterns)
51
+ - Confounders and caveats
52
+ - What's surprising vs expected
53
+
54
+ CRITICAL: Output ONLY valid JSON, no other text."""
55
+
56
+ EVALUATOR_USER_TEMPLATE = """**User's original question**: {question}
57
+
58
+ **Action taken**: {tool_name}({arguments})
59
+
60
+ **Tool result** (compressed):
61
+ {result_summary}
62
+
63
+ **What we knew before this step**:
64
+ {prior_findings}
65
+
66
+ Evaluate this result. Respond with ONLY this JSON:
67
+ {{
68
+ "interpretation": "1-3 sentences: What does this result MEAN for answering the question?",
69
+ "answered": true/false,
70
+ "confidence": 0.0-1.0,
71
+ "should_stop": true/false,
72
+ "next_questions": ["follow-up question 1", "follow-up question 2"],
73
+ "key_metric": "most important number or finding (optional)"
74
+ }}
75
+
76
+ Guidelines for should_stop:
77
+ - true: Question is fully answered OR we've gathered enough evidence OR no more useful actions
78
+ - false: Important aspects remain uninvestigated
79
+
80
+ Guidelines for confidence:
81
+ - 0.0-0.3: Weak evidence, need more investigation
82
+ - 0.3-0.6: Moderate evidence, some aspects unclear
83
+ - 0.6-0.8: Strong evidence, minor questions remain
84
+ - 0.8-1.0: Very strong evidence, question well answered"""
85
+
86
+
87
+ class Evaluator:
88
+ """
89
+ The EVALUATE step of the Reasoning Loop.
90
+
91
+ Takes a tool result and interprets it in the context of
92
+ the user's question and prior findings.
93
+
94
+ Usage:
95
+ evaluator = Evaluator(llm_caller=orchestrator._llm_text_call)
96
+ evaluation = evaluator.evaluate(
97
+ question="Why are customers churning?",
98
+ tool_name="analyze_correlations",
99
+ arguments={"file_path": "data.csv", "target_col": "churn"},
100
+ result=tool_result,
101
+ findings=findings_accumulator
102
+ )
103
+
104
+ if evaluation.should_stop:
105
+ # Move to synthesis
106
+ ...
107
+ else:
108
+ # Continue reasoning loop
109
+ ...
110
+ """
111
+
112
+ def __init__(self, llm_caller: Callable):
113
+ """
114
+ Args:
115
+ llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
116
+ """
117
+ self.llm_caller = llm_caller
118
+
119
+ def evaluate(
120
+ self,
121
+ question: str,
122
+ tool_name: str,
123
+ arguments: Dict[str, Any],
124
+ result: Dict[str, Any],
125
+ findings: FindingsAccumulator,
126
+ result_compressor: Optional[Callable] = None
127
+ ) -> EvaluationOutput:
128
+ """
129
+ Evaluate a tool result.
130
+
131
+ Args:
132
+ question: User's original question
133
+ tool_name: Name of the tool that was executed
134
+ arguments: Tool arguments used
135
+ result: Raw tool result dict
136
+ findings: Accumulated findings so far
137
+ result_compressor: Optional function to compress tool results
138
+
139
+ Returns:
140
+ EvaluationOutput with interpretation and next steps
141
+ """
142
+ # Compress the result for LLM consumption
143
+ if result_compressor:
144
+ result_summary = json.dumps(result_compressor(tool_name, result), default=str)
145
+ else:
146
+ result_summary = self._default_compress(result)
147
+
148
+ # Truncate if too long
149
+ if len(result_summary) > 3000:
150
+ result_summary = result_summary[:3000] + "... [truncated]"
151
+
152
+ # Build argument string
153
+ args_str = json.dumps(arguments, default=str)
154
+ if len(args_str) > 500:
155
+ args_str = args_str[:500] + "..."
156
+
157
+ user_prompt = EVALUATOR_USER_TEMPLATE.format(
158
+ question=question,
159
+ tool_name=tool_name,
160
+ arguments=args_str,
161
+ result_summary=result_summary,
162
+ prior_findings=findings.get_context_for_reasoning(max_findings=3)
163
+ )
164
+
165
+ response_text = self.llm_caller(
166
+ system_prompt=EVALUATOR_SYSTEM_PROMPT,
167
+ user_prompt=user_prompt,
168
+ max_tokens=1024
169
+ )
170
+
171
+ return self._parse_response(response_text, result_summary)
172
+
173
+ def build_finding(
174
+ self,
175
+ iteration: int,
176
+ hypothesis: str,
177
+ tool_name: str,
178
+ arguments: Dict[str, Any],
179
+ result_summary: str,
180
+ evaluation: "EvaluationOutput"
181
+ ) -> Finding:
182
+ """
183
+ Build a Finding from a completed iteration.
184
+
185
+ Convenience method that combines the action and evaluation
186
+ into a single Finding for the accumulator.
187
+ """
188
+ return Finding(
189
+ iteration=iteration,
190
+ hypothesis=hypothesis,
191
+ action=tool_name,
192
+ arguments=arguments,
193
+ result_summary=result_summary[:1000], # Cap size
194
+ interpretation=evaluation.interpretation,
195
+ confidence=evaluation.confidence,
196
+ answered_question=evaluation.answered,
197
+ next_questions=evaluation.next_questions
198
+ )
199
+
200
+ def _parse_response(self, response_text: str, result_summary: str) -> EvaluationOutput:
201
+ """Parse LLM response into EvaluationOutput."""
202
+ try:
203
+ data = json.loads(response_text.strip())
204
+ except json.JSONDecodeError:
205
+ # Try to extract JSON
206
+ json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
207
+ if json_match:
208
+ try:
209
+ data = json.loads(json_match.group(0))
210
+ except json.JSONDecodeError:
211
+ return self._fallback_evaluation(response_text, result_summary)
212
+ else:
213
+ return self._fallback_evaluation(response_text, result_summary)
214
+
215
+ return EvaluationOutput(
216
+ interpretation=data.get("interpretation", "Result processed."),
217
+ answered=data.get("answered", False),
218
+ confidence=min(1.0, max(0.0, float(data.get("confidence", 0.3)))),
219
+ should_stop=data.get("should_stop", False),
220
+ next_questions=data.get("next_questions", []),
221
+ key_metric=data.get("key_metric")
222
+ )
223
+
224
+ def _fallback_evaluation(self, response_text: str, result_summary: str) -> EvaluationOutput:
225
+ """Fallback when JSON parsing fails."""
226
+ # Use the raw response as interpretation
227
+ interpretation = response_text.strip()[:500] if response_text else "Analysis step completed."
228
+
229
+ return EvaluationOutput(
230
+ interpretation=interpretation,
231
+ answered=False,
232
+ confidence=0.3,
233
+ should_stop=False,
234
+ next_questions=[],
235
+ key_metric=None
236
+ )
237
+
238
+ def _default_compress(self, result: Dict[str, Any]) -> str:
239
+ """Default compression for tool results."""
240
+ if not isinstance(result, dict):
241
+ return str(result)[:2000]
242
+
243
+ compressed = {}
244
+
245
+ # Always include status
246
+ if "success" in result:
247
+ compressed["success"] = result["success"]
248
+ if "error" in result:
249
+ compressed["error"] = str(result["error"])[:300]
250
+
251
+ # Include key result fields
252
+ result_data = result.get("result", result)
253
+ if isinstance(result_data, dict):
254
+ for key in ["num_rows", "num_columns", "missing_percentage", "task_type",
255
+ "best_model", "best_score", "models", "correlations",
256
+ "output_file", "output_path", "plots", "summary",
257
+ "total_issues", "columns_affected", "features_created",
258
+ "accuracy", "r2_score", "rmse", "f1_score"]:
259
+ if key in result_data:
260
+ value = result_data[key]
261
+ # Truncate long values
262
+ if isinstance(value, (list, dict)):
263
+ compressed[key] = str(value)[:500]
264
+ else:
265
+ compressed[key] = value
266
+
267
+ return json.dumps(compressed, default=str)
src/reasoning/findings.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Findings Accumulator - Core state for the Reasoning Loop.
3
+
4
+ Tracks everything discovered during investigation:
5
+ - Individual findings (action + result + interpretation)
6
+ - Hypotheses being tested
7
+ - Decision ledger (why each action was taken)
8
+ - Confidence tracking
9
+
10
+ This replaces the need for separate "step tracker" and "decision ledger" -
11
+ they're natural byproducts of the accumulated findings.
12
+
13
+ Architecture:
14
+ ReasoningLoop iteration 1: Reason → Act → Evaluate → Finding #1
15
+ ReasoningLoop iteration 2: Reason → Act → Evaluate → Finding #2
16
+ ...
17
+ Synthesizer reads all findings → produces final answer
18
+ """
19
+
20
+ from dataclasses import dataclass, field
21
+ from typing import List, Dict, Any, Optional
22
+ from datetime import datetime
23
+ import json
24
+
25
+
26
+ @dataclass
27
+ class Finding:
28
+ """A single finding from one reasoning loop iteration."""
29
+ iteration: int
30
+ hypothesis: str # What we were testing
31
+ action: str # Tool name executed
32
+ arguments: Dict[str, Any] # Tool arguments used
33
+ result_summary: str # Compressed result (what tool returned)
34
+ interpretation: str # What we learned from this result
35
+ confidence: float # 0.0-1.0 confidence in this finding
36
+ answered_question: bool # Did this iteration answer the user's question?
37
+ next_questions: List[str] # Follow-up questions generated
38
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
39
+
40
+ def to_dict(self) -> Dict[str, Any]:
41
+ return {
42
+ "iteration": self.iteration,
43
+ "hypothesis": self.hypothesis,
44
+ "action": self.action,
45
+ "arguments": self.arguments,
46
+ "result_summary": self.result_summary,
47
+ "interpretation": self.interpretation,
48
+ "confidence": self.confidence,
49
+ "answered": self.answered_question,
50
+ "next_questions": self.next_questions,
51
+ "timestamp": self.timestamp
52
+ }
53
+
54
+
55
+ @dataclass
56
+ class Hypothesis:
57
+ """A hypothesis being tested during exploration."""
58
+ text: str
59
+ status: str = "untested" # untested, testing, supported, refuted, inconclusive
60
+ evidence_for: List[str] = field(default_factory=list)
61
+ evidence_against: List[str] = field(default_factory=list)
62
+ priority: float = 0.5 # 0.0-1.0, higher = investigate first
63
+ source_iteration: int = 0 # Which iteration generated this hypothesis
64
+
65
+ def to_dict(self) -> Dict[str, Any]:
66
+ return {
67
+ "text": self.text,
68
+ "status": self.status,
69
+ "evidence_for": self.evidence_for,
70
+ "evidence_against": self.evidence_against,
71
+ "priority": self.priority,
72
+ "source_iteration": self.source_iteration
73
+ }
74
+
75
+
76
+ class FindingsAccumulator:
77
+ """
78
+ Accumulates findings across the reasoning loop.
79
+
80
+ This is the central state object that the Reasoner reads from and
81
+ the Evaluator writes to. It serves as:
82
+ - Step tracker (each finding records what was done)
83
+ - Decision ledger (each finding records WHY it was done)
84
+ - Evidence accumulator (interpretations build the answer)
85
+ - Hypothesis manager (for exploratory analysis)
86
+
87
+ Usage:
88
+ findings = FindingsAccumulator(question="Why are customers churning?")
89
+
90
+ # After each iteration:
91
+ findings.add_finding(Finding(
92
+ iteration=1,
93
+ hypothesis="High churn correlates with low engagement",
94
+ action="analyze_correlations",
95
+ arguments={"file_path": "data.csv", "target_col": "churn"},
96
+ result_summary="Found 0.72 correlation between login_frequency and churn",
97
+ interpretation="Strong evidence: infrequent logins predict churn",
98
+ confidence=0.8,
99
+ answered_question=False,
100
+ next_questions=["Is there a threshold for login frequency?"]
101
+ ))
102
+
103
+ # For the Reasoner prompt:
104
+ context = findings.get_context_for_reasoning()
105
+
106
+ # For the Synthesizer:
107
+ all_findings = findings.get_all_findings()
108
+ """
109
+
110
+ def __init__(self, question: str, mode: str = "investigative"):
111
+ """
112
+ Initialize findings accumulator.
113
+
114
+ Args:
115
+ question: The user's original question
116
+ mode: "investigative" or "exploratory"
117
+ """
118
+ self.question = question
119
+ self.mode = mode
120
+ self.findings: List[Finding] = []
121
+ self.hypotheses: List[Hypothesis] = []
122
+ self.tools_used: List[str] = []
123
+ self.files_produced: List[str] = []
124
+ self.is_answered = False
125
+ self.answer_confidence = 0.0
126
+ self.started_at = datetime.now().isoformat()
127
+
128
+ @property
129
+ def iteration_count(self) -> int:
130
+ """Number of completed iterations."""
131
+ return len(self.findings)
132
+
133
+ def add_finding(self, finding: Finding):
134
+ """Add a finding from a completed iteration."""
135
+ self.findings.append(finding)
136
+
137
+ if finding.action not in self.tools_used:
138
+ self.tools_used.append(finding.action)
139
+
140
+ # Track answer progress
141
+ if finding.answered_question:
142
+ self.is_answered = True
143
+ self.answer_confidence = max(self.answer_confidence, finding.confidence)
144
+
145
+ # Add new hypotheses from next_questions
146
+ for q in finding.next_questions:
147
+ if not any(h.text == q for h in self.hypotheses):
148
+ self.hypotheses.append(Hypothesis(
149
+ text=q,
150
+ status="untested",
151
+ priority=0.5,
152
+ source_iteration=finding.iteration
153
+ ))
154
+
155
+ def add_hypothesis(self, text: str, priority: float = 0.5, source_iteration: int = 0):
156
+ """Add a hypothesis to test."""
157
+ if not any(h.text == text for h in self.hypotheses):
158
+ self.hypotheses.append(Hypothesis(
159
+ text=text,
160
+ status="untested",
161
+ priority=priority,
162
+ source_iteration=source_iteration
163
+ ))
164
+
165
+ def update_hypothesis(self, text: str, status: str, evidence: str, is_supporting: bool = True):
166
+ """Update a hypothesis with new evidence."""
167
+ for h in self.hypotheses:
168
+ if h.text == text:
169
+ h.status = status
170
+ if is_supporting:
171
+ h.evidence_for.append(evidence)
172
+ else:
173
+ h.evidence_against.append(evidence)
174
+ return
175
+
176
+ def get_untested_hypotheses(self) -> List[Hypothesis]:
177
+ """Get hypotheses that haven't been tested yet, sorted by priority."""
178
+ untested = [h for h in self.hypotheses if h.status == "untested"]
179
+ return sorted(untested, key=lambda h: h.priority, reverse=True)
180
+
181
+ def get_last_output_file(self) -> Optional[str]:
182
+ """Get the most recent output file from tool results."""
183
+ for finding in reversed(self.findings):
184
+ # Check if result mentions an output file
185
+ result = finding.result_summary
186
+ if "output_file" in result or "output_path" in result:
187
+ try:
188
+ # Try to parse as JSON
189
+ result_dict = json.loads(result) if isinstance(result, str) else result
190
+ return result_dict.get("output_file") or result_dict.get("output_path")
191
+ except (json.JSONDecodeError, TypeError):
192
+ pass
193
+ # Check arguments for file paths
194
+ for key in ["file_path", "input_path"]:
195
+ if key in finding.arguments:
196
+ return finding.arguments[key]
197
+ return None
198
+
199
+ def get_context_for_reasoning(self, max_findings: int = 5) -> str:
200
+ """
201
+ Build context string for the Reasoner's prompt.
202
+
203
+ Returns a concise summary of what's been discovered so far,
204
+ formatted for LLM consumption.
205
+
206
+ Args:
207
+ max_findings: Maximum number of recent findings to include
208
+ """
209
+ if not self.findings:
210
+ return "No investigations completed yet. This is the first step."
211
+
212
+ parts = []
213
+
214
+ # Summary of what's been done
215
+ parts.append(f"**Investigations completed**: {len(self.findings)}")
216
+ parts.append(f"**Tools used**: {', '.join(self.tools_used)}")
217
+
218
+ # Recent findings (most relevant for next decision)
219
+ recent = self.findings[-max_findings:]
220
+ parts.append("\n**Recent findings**:")
221
+ for f in recent:
222
+ parts.append(
223
+ f" Step {f.iteration}: Ran `{f.action}` to test: \"{f.hypothesis}\"\n"
224
+ f" → Result: {f.interpretation}\n"
225
+ f" → Confidence: {f.confidence:.0%}"
226
+ )
227
+
228
+ # Unanswered questions
229
+ untested = self.get_untested_hypotheses()
230
+ if untested:
231
+ parts.append(f"\n**Open questions** ({len(untested)} remaining):")
232
+ for h in untested[:3]:
233
+ parts.append(f" - {h.text} (priority: {h.priority:.1f})")
234
+
235
+ # Overall progress
236
+ if self.is_answered:
237
+ parts.append(f"\n**Status**: Question partially answered (confidence: {self.answer_confidence:.0%})")
238
+ else:
239
+ parts.append(f"\n**Status**: Still investigating")
240
+
241
+ return "\n".join(parts)
242
+
243
+ def get_context_for_synthesis(self) -> str:
244
+ """
245
+ Build context string for the Synthesizer.
246
+
247
+ Returns the complete investigative history with all findings
248
+ and hypothesis statuses.
249
+ """
250
+ parts = []
251
+
252
+ parts.append(f"**Original question**: {self.question}")
253
+ parts.append(f"**Mode**: {self.mode}")
254
+ parts.append(f"**Total iterations**: {len(self.findings)}")
255
+ parts.append(f"**Tools used**: {', '.join(self.tools_used)}")
256
+
257
+ # All findings in order
258
+ parts.append("\n## Investigation Steps\n")
259
+ for f in self.findings:
260
+ parts.append(
261
+ f"### Step {f.iteration}: {f.action}\n"
262
+ f"**Hypothesis**: {f.hypothesis}\n"
263
+ f"**Arguments**: {json.dumps(f.arguments, default=str)}\n"
264
+ f"**Result**: {f.result_summary}\n"
265
+ f"**Interpretation**: {f.interpretation}\n"
266
+ f"**Confidence**: {f.confidence:.0%}\n"
267
+ )
268
+
269
+ # Hypothesis outcomes (for exploratory mode)
270
+ if self.hypotheses:
271
+ parts.append("\n## Hypothesis Outcomes\n")
272
+ for h in self.hypotheses:
273
+ status_emoji = {
274
+ "supported": "✅",
275
+ "refuted": "❌",
276
+ "inconclusive": "❓",
277
+ "testing": "🔄",
278
+ "untested": "⬜"
279
+ }.get(h.status, "⬜")
280
+
281
+ parts.append(f"{status_emoji} **{h.text}** → {h.status}")
282
+ if h.evidence_for:
283
+ parts.append(f" Evidence for: {'; '.join(h.evidence_for)}")
284
+ if h.evidence_against:
285
+ parts.append(f" Evidence against: {'; '.join(h.evidence_against)}")
286
+
287
+ return "\n".join(parts)
288
+
289
+ def to_dict(self) -> Dict[str, Any]:
290
+ """Serialize for API response / session storage."""
291
+ return {
292
+ "question": self.question,
293
+ "mode": self.mode,
294
+ "iteration_count": self.iteration_count,
295
+ "is_answered": self.is_answered,
296
+ "answer_confidence": self.answer_confidence,
297
+ "tools_used": self.tools_used,
298
+ "files_produced": self.files_produced,
299
+ "findings": [f.to_dict() for f in self.findings],
300
+ "hypotheses": [h.to_dict() for h in self.hypotheses],
301
+ "started_at": self.started_at
302
+ }
src/reasoning/reasoner.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Reasoner Module - The REASON step of the Reasoning Loop.
3
+
4
+ Decides what to investigate next based on:
5
+ - The user's original question
6
+ - What we've discovered so far (findings)
7
+ - Available tools
8
+ - Dataset schema
9
+
10
+ The Reasoner does NOT execute anything. It only produces a structured
11
+ decision about what action to take next.
12
+
13
+ Architecture:
14
+ Reasoner.reason() → ReasoningOutput
15
+ - status: "investigating" | "done"
16
+ - reasoning: why this action (decision ledger entry)
17
+ - tool_name: which tool to run
18
+ - arguments: tool arguments
19
+ - hypothesis: what we're testing
20
+
21
+ This replaces the old approach where a massive system prompt told the LLM
22
+ "follow steps 1-15." Instead, the Reasoner makes a strategic decision
23
+ each iteration based on what it's learned so far.
24
+ """
25
+
26
+ import json
27
+ import re
28
+ from dataclasses import dataclass, field
29
+ from typing import Dict, Any, List, Optional, Callable
30
+
31
+ from .findings import FindingsAccumulator
32
+
33
+
34
+ @dataclass
35
+ class ReasoningOutput:
36
+ """Output from one REASON step."""
37
+ status: str # "investigating" or "done"
38
+ reasoning: str # Why this action was chosen
39
+ tool_name: Optional[str] # Tool to execute (None if done)
40
+ arguments: Dict[str, Any] # Tool arguments
41
+ hypothesis: str # What we're testing with this action
42
+ confidence: float = 0.0 # How confident the reasoner is (0-1)
43
+
44
+ @classmethod
45
+ def done(cls, reasoning: str, confidence: float = 0.8) -> "ReasoningOutput":
46
+ """Create a 'done' output (no more investigation needed)."""
47
+ return cls(
48
+ status="done",
49
+ reasoning=reasoning,
50
+ tool_name=None,
51
+ arguments={},
52
+ hypothesis="",
53
+ confidence=confidence
54
+ )
55
+
56
+
57
+ # System prompt for the Reasoner LLM call
58
+ REASONER_SYSTEM_PROMPT = """You are a senior data scientist. Your job is to decide the SINGLE MOST IMPORTANT next investigation step.
59
+
60
+ You are given:
61
+ 1. The user's question
62
+ 2. What has been discovered so far
63
+ 3. The dataset schema
64
+ 4. Available tools
65
+
66
+ Your task: Decide ONE action to take next. Be strategic:
67
+ - Start with understanding (profiling, correlations) before acting
68
+ - Test the most impactful hypothesis first
69
+ - Don't repeat actions that have already been done
70
+ - Stop when you have enough evidence to answer the question confidently
71
+
72
+ CRITICAL RULES:
73
+ - Output ONLY valid JSON, no other text
74
+ - Use EXACT tool names from the available tools list
75
+ - Use EXACT column names from the dataset schema
76
+ - The file_path argument should use the most recent output file when available
77
+ - For visualization, pick the chart type that best answers the question
78
+ - NEVER hallucinate column names - use only columns from the schema"""
79
+
80
+ REASONER_USER_TEMPLATE = """**User's question**: {question}
81
+
82
+ **Dataset info**:
83
+ - File: {file_path}
84
+ - Rows: {num_rows:,} | Columns: {num_columns}
85
+ - Numeric columns: {numeric_columns}
86
+ - Categorical columns: {categorical_columns}
87
+ {target_info}
88
+
89
+ **Investigation so far**:
90
+ {findings_context}
91
+
92
+ **Available tools**:
93
+ {tools_description}
94
+
95
+ Decide the next action. Respond with ONLY this JSON:
96
+ {{
97
+ "status": "investigating" or "done",
98
+ "reasoning": "1-2 sentence explanation of why this action is needed",
99
+ "tool_name": "exact_tool_name",
100
+ "arguments": {{"arg1": "value1", "arg2": "value2"}},
101
+ "hypothesis": "what we expect to learn from this action"
102
+ }}
103
+
104
+ If you have enough evidence to answer the user's question, respond:
105
+ {{
106
+ "status": "done",
107
+ "reasoning": "We have sufficient evidence because...",
108
+ "tool_name": null,
109
+ "arguments": {{}},
110
+ "hypothesis": ""
111
+ }}"""
112
+
113
+
114
+ # System prompt for generating hypotheses (Exploratory mode)
115
+ HYPOTHESIS_SYSTEM_PROMPT = """You are a senior data scientist examining a dataset for the first time.
116
+ Given the dataset profile, generate 3-5 hypotheses worth investigating.
117
+
118
+ Focus on:
119
+ - Surprising patterns (unexpected correlations, outliers)
120
+ - Business-relevant relationships (what drives the target variable?)
121
+ - Data quality issues that could affect analysis
122
+ - Distribution anomalies
123
+
124
+ Output ONLY valid JSON array of hypotheses, ranked by priority (most interesting first)."""
125
+
126
+ HYPOTHESIS_USER_TEMPLATE = """**Dataset**: {file_path}
127
+ - Rows: {num_rows:,} | Columns: {num_columns}
128
+ - Numeric: {numeric_columns}
129
+ - Categorical: {categorical_columns}
130
+ {target_info}
131
+ {profile_summary}
132
+
133
+ Generate hypotheses as JSON:
134
+ [
135
+ {{"text": "hypothesis description", "priority": 0.9, "suggested_tool": "tool_name"}},
136
+ ...
137
+ ]"""
138
+
139
+
140
+ class Reasoner:
141
+ """
142
+ The REASON step of the Reasoning Loop.
143
+
144
+ Makes a strategic decision about what to investigate next,
145
+ based on the user's question and accumulated findings.
146
+
147
+ Usage:
148
+ reasoner = Reasoner(llm_caller=orchestrator._llm_text_call)
149
+ output = reasoner.reason(
150
+ question="Why are customers churning?",
151
+ dataset_info=schema_info,
152
+ findings=findings_accumulator,
153
+ available_tools=tools_description,
154
+ file_path="data.csv"
155
+ )
156
+
157
+ if output.status == "investigating":
158
+ result = execute_tool(output.tool_name, output.arguments)
159
+ else:
160
+ # Done investigating, synthesize answer
161
+ ...
162
+ """
163
+
164
+ def __init__(self, llm_caller: Callable):
165
+ """
166
+ Args:
167
+ llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
168
+ Wraps the orchestrator's provider-specific LLM call.
169
+ """
170
+ self.llm_caller = llm_caller
171
+
172
+ def reason(
173
+ self,
174
+ question: str,
175
+ dataset_info: Dict[str, Any],
176
+ findings: FindingsAccumulator,
177
+ available_tools: str,
178
+ file_path: str,
179
+ target_col: Optional[str] = None
180
+ ) -> ReasoningOutput:
181
+ """
182
+ Decide the next investigation step.
183
+
184
+ Args:
185
+ question: User's original question
186
+ dataset_info: Dataset schema (columns, types, stats)
187
+ findings: Accumulated findings from previous iterations
188
+ available_tools: Text description of available tools
189
+ file_path: Current file path (latest output or original)
190
+ target_col: Optional target column
191
+
192
+ Returns:
193
+ ReasoningOutput with the next action to take
194
+ """
195
+ # Build the user prompt
196
+ numeric_cols = dataset_info.get("numeric_columns", [])
197
+ categorical_cols = dataset_info.get("categorical_columns", [])
198
+
199
+ target_info = ""
200
+ if target_col:
201
+ target_info = f"- Target column: '{target_col}'"
202
+
203
+ user_prompt = REASONER_USER_TEMPLATE.format(
204
+ question=question,
205
+ file_path=file_path,
206
+ num_rows=dataset_info.get("num_rows", 0),
207
+ num_columns=dataset_info.get("num_columns", 0),
208
+ numeric_columns=", ".join([f"'{c}'" for c in numeric_cols[:15]]),
209
+ categorical_columns=", ".join([f"'{c}'" for c in categorical_cols[:15]]),
210
+ target_info=target_info,
211
+ findings_context=findings.get_context_for_reasoning(),
212
+ tools_description=available_tools
213
+ )
214
+
215
+ # Call LLM
216
+ response_text = self.llm_caller(
217
+ system_prompt=REASONER_SYSTEM_PROMPT,
218
+ user_prompt=user_prompt,
219
+ max_tokens=1024
220
+ )
221
+
222
+ # Parse response
223
+ return self._parse_response(response_text, file_path)
224
+
225
+ def generate_hypotheses(
226
+ self,
227
+ dataset_info: Dict[str, Any],
228
+ file_path: str,
229
+ target_col: Optional[str] = None,
230
+ profile_summary: str = ""
231
+ ) -> List[Dict[str, Any]]:
232
+ """
233
+ Generate hypotheses for exploratory analysis.
234
+
235
+ Called at the start of Exploratory mode to seed the
236
+ reasoning loop with interesting questions to investigate.
237
+
238
+ Args:
239
+ dataset_info: Dataset schema
240
+ file_path: Path to dataset
241
+ target_col: Optional target column
242
+ profile_summary: Optional profiling results summary
243
+
244
+ Returns:
245
+ List of hypothesis dicts with text, priority, suggested_tool
246
+ """
247
+ numeric_cols = dataset_info.get("numeric_columns", [])
248
+ categorical_cols = dataset_info.get("categorical_columns", [])
249
+
250
+ target_info = ""
251
+ if target_col:
252
+ target_info = f"- Target column: '{target_col}'"
253
+
254
+ user_prompt = HYPOTHESIS_USER_TEMPLATE.format(
255
+ file_path=file_path,
256
+ num_rows=dataset_info.get("num_rows", 0),
257
+ num_columns=dataset_info.get("num_columns", 0),
258
+ numeric_columns=", ".join([f"'{c}'" for c in numeric_cols[:15]]),
259
+ categorical_columns=", ".join([f"'{c}'" for c in categorical_cols[:15]]),
260
+ target_info=target_info,
261
+ profile_summary=profile_summary or "No profile available yet."
262
+ )
263
+
264
+ response_text = self.llm_caller(
265
+ system_prompt=HYPOTHESIS_SYSTEM_PROMPT,
266
+ user_prompt=user_prompt,
267
+ max_tokens=1024
268
+ )
269
+
270
+ return self._parse_hypotheses(response_text)
271
+
272
+ def _parse_response(self, response_text: str, file_path: str) -> ReasoningOutput:
273
+ """Parse LLM response into ReasoningOutput."""
274
+ try:
275
+ # Try direct JSON parse
276
+ data = json.loads(response_text.strip())
277
+ except json.JSONDecodeError:
278
+ # Try to extract JSON from markdown/text
279
+ json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
280
+ if json_match:
281
+ try:
282
+ data = json.loads(json_match.group(0))
283
+ except json.JSONDecodeError:
284
+ # Fallback: return a profiling action
285
+ return ReasoningOutput(
286
+ status="investigating",
287
+ reasoning="Could not parse LLM response, defaulting to profiling",
288
+ tool_name="profile_dataset",
289
+ arguments={"file_path": file_path},
290
+ hypothesis="Understanding the data structure first"
291
+ )
292
+ else:
293
+ return ReasoningOutput(
294
+ status="investigating",
295
+ reasoning="Could not parse LLM response, defaulting to profiling",
296
+ tool_name="profile_dataset",
297
+ arguments={"file_path": file_path},
298
+ hypothesis="Understanding the data structure first"
299
+ )
300
+
301
+ status = data.get("status", "investigating")
302
+ tool_name = data.get("tool_name")
303
+ arguments = data.get("arguments", {})
304
+
305
+ # Ensure file_path is in arguments if tool needs it
306
+ if tool_name and "file_path" not in arguments and tool_name not in [
307
+ "execute_python_code", "get_smart_summary"
308
+ ]:
309
+ arguments["file_path"] = file_path
310
+
311
+ return ReasoningOutput(
312
+ status=status,
313
+ reasoning=data.get("reasoning", ""),
314
+ tool_name=tool_name if status == "investigating" else None,
315
+ arguments=arguments,
316
+ hypothesis=data.get("hypothesis", ""),
317
+ confidence=data.get("confidence", 0.5)
318
+ )
319
+
320
+ def _parse_hypotheses(self, response_text: str) -> List[Dict[str, Any]]:
321
+ """Parse hypothesis generation response."""
322
+ try:
323
+ data = json.loads(response_text.strip())
324
+ if isinstance(data, list):
325
+ return data
326
+ except json.JSONDecodeError:
327
+ pass
328
+
329
+ # Try to extract JSON array
330
+ array_match = re.search(r'\[.*\]', response_text, re.DOTALL)
331
+ if array_match:
332
+ try:
333
+ data = json.loads(array_match.group(0))
334
+ if isinstance(data, list):
335
+ return data
336
+ except json.JSONDecodeError:
337
+ pass
338
+
339
+ # Fallback: generate basic hypotheses
340
+ return [
341
+ {"text": "What are the key statistical properties of this dataset?", "priority": 0.9, "suggested_tool": "profile_dataset"},
342
+ {"text": "Are there any significant correlations between variables?", "priority": 0.8, "suggested_tool": "analyze_correlations"},
343
+ {"text": "What does the distribution of key variables look like?", "priority": 0.7, "suggested_tool": "generate_eda_plots"}
344
+ ]
src/reasoning/synthesizer.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Synthesizer Module - The SYNTHESIZE step of the Reasoning Loop.
3
+
4
+ Takes all accumulated findings and produces a coherent, narrative answer.
5
+
6
+ Unlike the old approach (where the LLM's last response WAS the summary),
7
+ the Synthesizer deliberately constructs the answer from evidence:
8
+ - Connects findings into a coherent story
9
+ - Cites evidence for each claim
10
+ - Highlights confidence levels
11
+ - Notes what wasn't investigated (limitations)
12
+ - Produces actionable insights, not just numbers
13
+
14
+ Architecture:
15
+ FindingsAccumulator → Synthesizer.synthesize() → Markdown narrative
16
+ """
17
+
18
+ import json
19
+ from typing import Dict, Any, List, Optional, Callable
20
+
21
+ from .findings import FindingsAccumulator
22
+
23
+
24
+ SYNTHESIS_SYSTEM_PROMPT = """You are a senior data scientist writing a concise analysis report.
25
+
26
+ Given the investigation findings, synthesize a clear, evidence-based answer to the user's question.
27
+
28
+ STRUCTURE (use markdown):
29
+ 1. **Executive Summary** (2-3 sentences answering the question directly)
30
+ 2. **Key Findings** (bullet points with evidence references)
31
+ 3. **Supporting Evidence** (specific metrics, correlations, patterns)
32
+ 4. **Visualizations** (mention any plots/charts generated, with file paths)
33
+ 5. **Limitations & Caveats** (what we didn't investigate, caveats)
34
+ 6. **Recommendations** (actionable next steps)
35
+
36
+ RULES:
37
+ - Lead with the answer, then show evidence
38
+ - Use specific numbers (not "high correlation" but "r=0.72")
39
+ - Mention generated files/plots so user can find them
40
+ - Be honest about confidence levels
41
+ - Keep it under 500 words unless complex analysis warrants more
42
+ - Use markdown formatting (headers, bullets, bold for emphasis)"""
43
+
44
+ SYNTHESIS_USER_TEMPLATE = """**Original question**: {question}
45
+
46
+ **Investigation summary**:
47
+ {findings_context}
48
+
49
+ **Generated artifacts**:
50
+ {artifacts_summary}
51
+
52
+ Write the analysis report now. Focus on answering the question with evidence from the investigation."""
53
+
54
+
55
+ class Synthesizer:
56
+ """
57
+ The SYNTHESIZE step of the Reasoning Loop.
58
+
59
+ Produces the final answer from accumulated evidence.
60
+
61
+ Usage:
62
+ synthesizer = Synthesizer(llm_caller=orchestrator._llm_text_call)
63
+ report = synthesizer.synthesize(
64
+ findings=findings_accumulator,
65
+ artifacts={"plots": [...], "files": [...]}
66
+ )
67
+ """
68
+
69
+ def __init__(self, llm_caller: Callable):
70
+ """
71
+ Args:
72
+ llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
73
+ """
74
+ self.llm_caller = llm_caller
75
+
76
+ def synthesize(
77
+ self,
78
+ findings: FindingsAccumulator,
79
+ artifacts: Optional[Dict[str, Any]] = None,
80
+ max_tokens: int = 3000
81
+ ) -> str:
82
+ """
83
+ Synthesize all findings into a coherent answer.
84
+
85
+ Args:
86
+ findings: Accumulated findings from the reasoning loop
87
+ artifacts: Optional dict of generated artifacts (plots, files, models)
88
+ max_tokens: Max tokens for synthesis response
89
+
90
+ Returns:
91
+ Markdown-formatted analysis report
92
+ """
93
+ # Build artifacts summary
94
+ artifacts_summary = self._format_artifacts(artifacts or {}, findings)
95
+
96
+ user_prompt = SYNTHESIS_USER_TEMPLATE.format(
97
+ question=findings.question,
98
+ findings_context=findings.get_context_for_synthesis(),
99
+ artifacts_summary=artifacts_summary
100
+ )
101
+
102
+ response = self.llm_caller(
103
+ system_prompt=SYNTHESIS_SYSTEM_PROMPT,
104
+ user_prompt=user_prompt,
105
+ max_tokens=max_tokens
106
+ )
107
+
108
+ return response.strip()
109
+
110
+ def synthesize_exploratory(
111
+ self,
112
+ findings: FindingsAccumulator,
113
+ artifacts: Optional[Dict[str, Any]] = None,
114
+ max_tokens: int = 3000
115
+ ) -> str:
116
+ """
117
+ Synthesize findings from exploratory analysis (no specific question).
118
+
119
+ Uses a different prompt that focuses on discovering patterns
120
+ rather than answering a specific question.
121
+ """
122
+ exploratory_system = """You are a senior data scientist presenting exploratory analysis results.
123
+
124
+ The user asked for a general analysis. Present the most interesting discoveries.
125
+
126
+ STRUCTURE (use markdown):
127
+ 1. **Dataset Overview** (size, structure, key characteristics)
128
+ 2. **Most Interesting Discoveries** (ranked by insight value)
129
+ 3. **Key Patterns & Relationships** (correlations, distributions, trends)
130
+ 4. **Data Quality Notes** (missing data, outliers, issues found)
131
+ 5. **Visualizations Generated** (list with descriptions)
132
+ 6. **Recommended Next Steps** (what to investigate deeper)
133
+
134
+ RULES:
135
+ - Lead with the most surprising/important finding
136
+ - Use specific numbers and metrics
137
+ - Mention all generated visualizations with file paths
138
+ - Suggest actionable next analysis steps
139
+ - Keep it engaging but data-driven"""
140
+
141
+ artifacts_summary = self._format_artifacts(artifacts or {}, findings)
142
+
143
+ user_prompt = f"""**Analysis request**: {findings.question}
144
+
145
+ **Investigation summary**:
146
+ {findings.get_context_for_synthesis()}
147
+
148
+ **Generated artifacts**:
149
+ {artifacts_summary}
150
+
151
+ Write the exploratory analysis report."""
152
+
153
+ response = self.llm_caller(
154
+ system_prompt=exploratory_system,
155
+ user_prompt=user_prompt,
156
+ max_tokens=max_tokens
157
+ )
158
+
159
+ return response.strip()
160
+
161
+ def _format_artifacts(self, artifacts: Dict[str, Any], findings: FindingsAccumulator) -> str:
162
+ """Format artifacts for the synthesis prompt."""
163
+ parts = []
164
+
165
+ # Extract plots from findings
166
+ plots = artifacts.get("plots", [])
167
+ if plots:
168
+ parts.append("**Plots generated**:")
169
+ for plot in plots:
170
+ if isinstance(plot, dict):
171
+ parts.append(f" - {plot.get('title', 'Plot')}: {plot.get('url', plot.get('path', 'N/A'))}")
172
+ else:
173
+ parts.append(f" - {plot}")
174
+
175
+ # Extract files from findings
176
+ files = artifacts.get("files", [])
177
+ if files:
178
+ parts.append("**Output files**:")
179
+ for f in files:
180
+ parts.append(f" - {f}")
181
+
182
+ # Extract from findings history
183
+ for finding in findings.findings:
184
+ result = finding.result_summary
185
+ if "output_file" in result or "output_path" in result or ".html" in result or ".png" in result:
186
+ parts.append(f" - Step {finding.iteration} ({finding.action}): output in result")
187
+
188
+ # Tools used summary
189
+ if findings.tools_used:
190
+ parts.append(f"\n**Tools used**: {', '.join(findings.tools_used)}")
191
+
192
+ if not parts:
193
+ return "No artifacts generated yet."
194
+
195
+ return "\n".join(parts)
src/routing/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Routing Module - Intent Classification and Request Routing.
3
+
4
+ Determines how the orchestrator should handle a user request:
5
+ - Direct: SBERT routing → tool execution (existing pipeline)
6
+ - Investigative: Reasoning loop with hypothesis testing
7
+ - Exploratory: Auto-hypothesis generation → reasoning loop
8
+ """
9
+
10
+ from .intent_classifier import IntentClassifier, IntentResult
11
+
12
+ __all__ = ["IntentClassifier", "IntentResult"]
src/routing/intent_classifier.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Intent Classifier - Determines execution mode for the Reasoning Loop.
3
+
4
+ Three execution modes:
5
+ 1. DIRECT: "Make a scatter plot" → SBERT routing → tool → done
6
+ - Clear, specific command with obvious tool mapping
7
+ - No reasoning loop needed
8
+
9
+ 2. INVESTIGATIVE: "Why are customers churning?" → reasoning loop
10
+ - Analytical question requiring hypothesis testing
11
+ - Reasoning loop drives tool selection
12
+
13
+ 3. EXPLORATORY: "Analyze this data" → auto-hypothesis → reasoning loop
14
+ - Open-ended request with no specific question
15
+ - First profiles data, generates hypotheses, then investigates
16
+
17
+ The classifier uses keyword patterns + semantic features to decide.
18
+ This is a lightweight classification (no LLM call needed).
19
+ """
20
+
21
+ import re
22
+ from typing import Optional, Dict, Any, Tuple
23
+ from dataclasses import dataclass
24
+
25
+
26
+ @dataclass
27
+ class IntentResult:
28
+ """Result of intent classification."""
29
+ mode: str # "direct", "investigative", "exploratory"
30
+ confidence: float # 0.0-1.0
31
+ reasoning: str # Why this mode was chosen
32
+ sub_intent: Optional[str] # More specific intent (e.g., "visualization", "cleaning")
33
+
34
+
35
+ # Patterns that indicate DIRECT mode (specific tool commands)
36
+ DIRECT_PATTERNS = [
37
+ # Visualization commands
38
+ (r"\b(make|create|generate|build|show|draw|plot)\b.*(scatter|histogram|heatmap|box\s*plot|bar\s*chart|pie\s*chart|line\s*chart|dashboard|time\s*series)", "visualization"),
39
+ (r"\b(scatter|histogram|heatmap|boxplot|bar\s*chart)\b.*\b(of|for|between|showing)\b", "visualization"),
40
+
41
+ # Data cleaning commands
42
+ (r"\b(clean|remove|drop|fill|impute|handle)\b.*(missing|null|nan|outlier|duplicate)", "cleaning"),
43
+ (r"\b(fix|convert|change)\b.*(data\s*type|dtype|column\s*type)", "cleaning"),
44
+
45
+ # Feature engineering commands
46
+ (r"\b(create|add|extract|generate)\b.*(feature|time\s*feature|interaction|encoding)", "feature_engineering"),
47
+ (r"\b(encode|one-hot|label\s*encode|ordinal)\b.*\b(categorical|column)", "feature_engineering"),
48
+
49
+ # Model training commands
50
+ (r"\b(train|build|fit|run)\b.*(model|classifier|regressor|baseline|xgboost|random\s*forest)", "training"),
51
+ (r"\b(tune|optimize)\b.*\b(hyperparameter|model|parameter)", "training"),
52
+ (r"\b(cross[\s-]?valid)", "training"),
53
+
54
+ # Profiling commands
55
+ (r"\b(profile|describe|summarize)\b.*\b(dataset|data|table|file)", "profiling"),
56
+ (r"\b(data\s*quality|quality\s*check|check\s*quality)", "profiling"),
57
+
58
+ # Report generation
59
+ (r"\b(generate|create|build)\b.*\b(report|eda\s*report|profiling\s*report)", "reporting"),
60
+ ]
61
+
62
+ # Patterns that indicate INVESTIGATIVE mode (analytical questions)
63
+ INVESTIGATIVE_PATTERNS = [
64
+ # Causal / explanatory questions
65
+ (r"\bwhy\b.*(are|is|do|does|did)\b", "causal"),
66
+ (r"\bwhat\b.*(cause|driv|factor|reason|explain|lead)", "causal"),
67
+ (r"\bwhat\b.*(affect|impact|influence|determine)", "causal"),
68
+
69
+ # Relationship / correlation questions
70
+ (r"\bhow\b.*(does|do|is|are)\b.*\b(relate|correlat|affect|impact|change|vary)", "relationship"),
71
+ (r"\b(relationship|correlation|association)\b.*\bbetween\b", "relationship"),
72
+
73
+ # Comparison questions
74
+ (r"\b(differ|compar|contrast)\b.*\bbetween\b", "comparison"),
75
+ (r"\bwhich\b.*(better|worse|higher|lower|more|less|best|worst)", "comparison"),
76
+
77
+ # Pattern / trend questions
78
+ (r"\b(pattern|trend|anomal|outlier|unusual|interesting)\b", "pattern"),
79
+ (r"\bis\s+there\b.*(pattern|trend|relationship|correlation|difference)", "pattern"),
80
+
81
+ # Prediction-oriented questions (but NOT direct "train a model" commands)
82
+ (r"\bcan\s+(we|i|you)\b.*(predict|forecast|estimate|determine)", "predictive"),
83
+ (r"\bwhat\b.*(predict|forecast|expect|happen)", "predictive"),
84
+
85
+ # Segmentation / grouping questions
86
+ (r"\b(segment|group|cluster|categori)\b", "segmentation"),
87
+ (r"\bwhat\b.*(type|kind|group|segment)\b.*\b(customer|user|product)", "segmentation"),
88
+ ]
89
+
90
+ # Patterns that indicate EXPLORATORY mode (open-ended requests)
91
+ EXPLORATORY_PATTERNS = [
92
+ (r"^analyze\b.*\b(this|the|my)\b.*\b(data|dataset|file|csv)", "general_analysis"),
93
+ (r"^(tell|show)\b.*\b(me|us)\b.*\b(about|everything|what)", "general_analysis"),
94
+ (r"^(explore|investigate|examine|look\s*(at|into))\b.*\b(this|the|my)\b", "general_analysis"),
95
+ (r"^what\b.*\b(can|do)\b.*\b(you|we)\b.*\b(find|learn|discover|see)", "general_analysis"),
96
+ (r"^(give|provide)\b.*\b(overview|summary|insight|analysis)", "general_analysis"),
97
+ (r"^(run|do|perform)\b.*\b(full|complete|comprehensive|end.to.end)\b.*\b(analysis|pipeline|workflow)", "full_pipeline"),
98
+ (r"^(find|discover|uncover)\b.*\b(insight|pattern|trend|interesting)", "general_analysis"),
99
+ ]
100
+
101
+
102
+ class IntentClassifier:
103
+ """
104
+ Classifies user intent into one of three execution modes.
105
+
106
+ Uses pattern matching (no LLM call needed) for fast classification.
107
+ Falls back to heuristics when patterns don't match.
108
+
109
+ Usage:
110
+ classifier = IntentClassifier()
111
+ result = classifier.classify("Why are customers churning?")
112
+ # IntentResult(mode="investigative", confidence=0.9, ...)
113
+
114
+ result = classifier.classify("Make a scatter plot of age vs income")
115
+ # IntentResult(mode="direct", confidence=0.95, ...)
116
+
117
+ result = classifier.classify("Analyze this dataset")
118
+ # IntentResult(mode="exploratory", confidence=0.85, ...)
119
+ """
120
+
121
+ def classify(
122
+ self,
123
+ query: str,
124
+ dataset_info: Optional[Dict[str, Any]] = None,
125
+ has_target_col: bool = False
126
+ ) -> IntentResult:
127
+ """
128
+ Classify user intent into execution mode.
129
+
130
+ Args:
131
+ query: User's natural language query
132
+ dataset_info: Optional dataset schema info
133
+ has_target_col: Whether user provided a target column
134
+
135
+ Returns:
136
+ IntentResult with mode, confidence, and reasoning
137
+ """
138
+ query_lower = query.lower().strip()
139
+
140
+ # Phase 1: Check for DIRECT patterns (strongest evidence)
141
+ direct_match = self._match_patterns(query_lower, DIRECT_PATTERNS)
142
+ if direct_match:
143
+ pattern, sub_intent = direct_match
144
+ return IntentResult(
145
+ mode="direct",
146
+ confidence=0.90,
147
+ reasoning=f"Direct command detected: {sub_intent} (pattern: {pattern[:50]})",
148
+ sub_intent=sub_intent
149
+ )
150
+
151
+ # Phase 2: Check for INVESTIGATIVE patterns
152
+ invest_match = self._match_patterns(query_lower, INVESTIGATIVE_PATTERNS)
153
+ if invest_match:
154
+ pattern, sub_intent = invest_match
155
+ return IntentResult(
156
+ mode="investigative",
157
+ confidence=0.85,
158
+ reasoning=f"Analytical question detected: {sub_intent}",
159
+ sub_intent=sub_intent
160
+ )
161
+
162
+ # Phase 3: Check for EXPLORATORY patterns
163
+ explore_match = self._match_patterns(query_lower, EXPLORATORY_PATTERNS)
164
+ if explore_match:
165
+ pattern, sub_intent = explore_match
166
+
167
+ # Special case: "full pipeline" with target col → direct ML pipeline
168
+ if sub_intent == "full_pipeline" and has_target_col:
169
+ return IntentResult(
170
+ mode="direct",
171
+ confidence=0.85,
172
+ reasoning="Full ML pipeline requested with target column",
173
+ sub_intent="full_ml_pipeline"
174
+ )
175
+
176
+ return IntentResult(
177
+ mode="exploratory",
178
+ confidence=0.80,
179
+ reasoning=f"Open-ended analysis request: {sub_intent}",
180
+ sub_intent=sub_intent
181
+ )
182
+
183
+ # Phase 4: Heuristic fallback
184
+ return self._heuristic_classify(query_lower, has_target_col)
185
+
186
+ def _match_patterns(self, query: str, patterns: list) -> Optional[Tuple[str, str]]:
187
+ """Try to match query against a list of (pattern, sub_intent) tuples."""
188
+ for pattern, sub_intent in patterns:
189
+ if re.search(pattern, query, re.IGNORECASE):
190
+ return (pattern, sub_intent)
191
+ return None
192
+
193
+ def _heuristic_classify(self, query: str, has_target_col: bool) -> IntentResult:
194
+ """Fallback classification using simple heuristics."""
195
+
196
+ # Question words → investigative
197
+ if query.startswith(("why", "how", "what", "which", "is there", "are there", "does", "do")):
198
+ return IntentResult(
199
+ mode="investigative",
200
+ confidence=0.60,
201
+ reasoning="Query starts with question word, likely analytical",
202
+ sub_intent="general_question"
203
+ )
204
+
205
+ # Very short queries → likely direct commands
206
+ word_count = len(query.split())
207
+ if word_count <= 5:
208
+ return IntentResult(
209
+ mode="direct",
210
+ confidence=0.55,
211
+ reasoning="Short query, likely a direct command",
212
+ sub_intent="short_command"
213
+ )
214
+
215
+ # Has target column + action verbs → direct ML pipeline
216
+ if has_target_col and any(w in query for w in ["predict", "train", "model", "classify", "regression"]):
217
+ return IntentResult(
218
+ mode="direct",
219
+ confidence=0.75,
220
+ reasoning="Target column provided with ML action verb",
221
+ sub_intent="ml_pipeline"
222
+ )
223
+
224
+ # Default: exploratory (safest default for data science)
225
+ return IntentResult(
226
+ mode="exploratory",
227
+ confidence=0.40,
228
+ reasoning="No strong pattern match, defaulting to exploratory analysis",
229
+ sub_intent="default"
230
+ )
231
+
232
+ @staticmethod
233
+ def is_follow_up(query: str) -> bool:
234
+ """
235
+ Detect if this is a follow-up question (uses context from previous analysis).
236
+
237
+ Follow-ups should generally be INVESTIGATIVE (they're asking about
238
+ something specific in the context of previous results).
239
+ """
240
+ follow_up_patterns = [
241
+ r"^(now|next|also|and|then)\b",
242
+ r"\b(the same|that|this|those|these)\b.*\b(data|model|result|plot|chart)",
243
+ r"\b(more|another|different)\b.*\b(plot|chart|analysis|model)",
244
+ r"\b(what about|how about|can you also)\b",
245
+ r"\b(using|with)\b.*\b(the same|that|this)\b",
246
+ ]
247
+
248
+ query_lower = query.lower().strip()
249
+ return any(re.search(p, query_lower) for p in follow_up_patterns)