Spaces:

Pulastya0
/

Data-Science-Agent

Running

App Files Files Community

Pulastya B commited on Feb 19

Commit

6b731f7

1 Parent(s): fea1598

Made Major Changes to the Overall User Experience by adding a pipeline visualizer

Browse files

Files changed (10) hide show

FRRONTEEEND/components/ChatInterface.tsx +124 -18
FRRONTEEEND/components/PipelineView.tsx +348 -0
src/orchestrator.py +601 -0
src/reasoning/__init__.py +15 -0
src/reasoning/evaluator.py +267 -0
src/reasoning/findings.py +302 -0
src/reasoning/reasoner.py +344 -0
src/reasoning/synthesizer.py +195 -0
src/routing/__init__.py +12 -0
src/routing/intent_classifier.py +249 -0

FRRONTEEEND/components/ChatInterface.tsx CHANGED Viewed

@@ -9,6 +9,7 @@ import remarkGfm from 'remark-gfm';
 import { useAuth } from '../lib/AuthContext';
 import { trackQuery, incrementSessionQueries, getHuggingFaceStatus } from '../lib/supabase';
 import { SettingsModal } from './SettingsModal';
 // HuggingFace logo SVG component for the export button
 const HuggingFaceLogo = ({ className = "w-4 h-4" }: { className?: string }) => (
@@ -214,6 +215,12 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
   const processedAnalysisRef = useRef<Set<string>>(new Set()); // Track processed analysis_complete events
   const [sseReconnectTrigger, setSseReconnectTrigger] = useState(0); // Force SSE reconnection for follow-up queries
   // Auth context for user tracking
   const { user, isAuthenticated, dbSessionId, signOut } = useAuth();
@@ -349,17 +356,115 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
               console.log(`🤖 Agent assigned: ${data.agent}`);
             } else if (data.type === 'tool_executing') {
               setCurrentStep(data.message || `🔧 Executing: ${data.tool}`);
             } else if (data.type === 'tool_completed') {
               setCurrentStep(data.message || `✓ Completed: ${data.tool}`);
             } else if (data.type === 'tool_failed') {
               setCurrentStep(data.message || `❌ Failed: ${data.tool}`);
             } else if (data.type === 'token_update') {
               // Optional: Display token budget updates
               console.log('💰 Token update:', data.message);
             } else if (data.type === 'analysis_failed') {
               console.log('❌ Analysis failed', data);
               setIsTyping(false);
               // Show error message to user - add to sessions
               setSessions(prev => prev.map(s => {
                 if (s.id === activeSessionId) {
@@ -382,6 +487,11 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
               console.log('✅ Analysis completed', data.result);
               setIsTyping(false);
               // Create a unique key based on actual workflow content to prevent duplicates
               // Use the last tool executed + summary hash for uniqueness
               const lastTool = data.result?.workflow_history?.[data.result.workflow_history.length - 1]?.tool || 'unknown';
@@ -539,6 +649,12 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
     // Show loading indicator immediately (for UI feedback)
     setIsTyping(true);
     try {
@@ -1197,23 +1313,13 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
             ))
           )}
           {isTyping && (
-             <div className="flex gap-4">
-                <div className="w-8 h-8 rounded-lg flex items-center justify-center shrink-0 bg-white/5 border border-white/10">
-                  <Bot className="w-4 h-4 text-indigo-400" />
-                </div>
-                <div className="bg-white/[0.03] p-4 rounded-2xl border border-white/5">
-                  <div className="flex items-center gap-3">
-                    <div className="flex gap-1">
-                      <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.3s]"></span>
-                      <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.15s]"></span>
-                      <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce"></span>
-                    </div>
-                    <span className="text-sm text-white/60">
-                      {currentStep || '🔧 Starting analysis...'}
-                    </span>
-                  </div>
-                </div>
-             </div>
           )}
         </div>
@@ -1394,7 +1500,7 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
                 const allPlots: Array<{title: string, url: string, type?: string}> = [];
                 const allReports: Array<{name: string, path: string}> = [];
                 const allDataFiles: string[] = [];
-                const baselineModels = ['xgboost', 'random_forest', 'catboost', 'lightgbm', 'ridge', 'lasso'];
                 const foundModels = new Set<string>();
                 activeSession.messages.forEach(msg => {

 import { useAuth } from '../lib/AuthContext';
 import { trackQuery, incrementSessionQueries, getHuggingFaceStatus } from '../lib/supabase';
 import { SettingsModal } from './SettingsModal';
+import { PipelineView, PipelineStep } from './PipelineView';
 // HuggingFace logo SVG component for the export button
 const HuggingFaceLogo = ({ className = "w-4 h-4" }: { className?: string }) => (
   const processedAnalysisRef = useRef<Set<string>>(new Set()); // Track processed analysis_complete events
   const [sseReconnectTrigger, setSseReconnectTrigger] = useState(0); // Force SSE reconnection for follow-up queries
+  // Pipeline visualization state (reasoning loop)
+  const [pipelineSteps, setPipelineSteps] = useState<PipelineStep[]>([]);
+  const [pipelineMode, setPipelineMode] = useState<string | null>(null);
+  const [pipelineHypotheses, setPipelineHypotheses] = useState<string[]>([]);
+  const pipelineStepCounterRef = useRef(0); // Unique step ID counter
   // Auth context for user tracking
   const { user, isAuthenticated, dbSessionId, signOut } = useAuth();
               console.log(`🤖 Agent assigned: ${data.agent}`);
             } else if (data.type === 'tool_executing') {
               setCurrentStep(data.message || `🔧 Executing: ${data.tool}`);
+              // Add pipeline step if in reasoning mode
+              if (pipelineMode) {
+                const stepId = `act-${++pipelineStepCounterRef.current}`;
+                setPipelineSteps(prev => [...prev, {
+                  id: stepId,
+                  type: 'act',
+                  status: 'active',
+                  title: `Executing: ${data.tool}`,
+                  subtitle: data.message || '',
+                  tool: data.tool,
+                  timestamp: new Date()
+                }]);
+              }
             } else if (data.type === 'tool_completed') {
               setCurrentStep(data.message || `✓ Completed: ${data.tool}`);
+              // Update pipeline step status
+              if (pipelineMode) {
+                setPipelineSteps(prev => prev.map(s =>
+                  s.type === 'act' && s.status === 'active' ? { ...s, status: 'completed' as const } : s
+                ));
+              }
             } else if (data.type === 'tool_failed') {
               setCurrentStep(data.message || `❌ Failed: ${data.tool}`);
+              // Update pipeline step status
+              if (pipelineMode) {
+                setPipelineSteps(prev => prev.map(s =>
+                  s.type === 'act' && s.status === 'active' ? { ...s, status: 'failed' as const, subtitle: data.message || 'Tool failed' } : s
+                ));
+              }
             } else if (data.type === 'token_update') {
               // Optional: Display token budget updates
               console.log('💰 Token update:', data.message);
+            } else if (data.type === 'intent_classified') {
+              // 🎯 Reasoning Loop: Intent classification result
+              console.log(`🎯 Intent: ${data.mode} (${Math.round(data.confidence * 100)}%)`);
+              setPipelineMode(data.mode);
+              const stepId = `intent-${++pipelineStepCounterRef.current}`;
+              setPipelineSteps(prev => [...prev, {
+                id: stepId,
+                type: 'intent',
+                status: 'completed',
+                title: `Intent: ${data.mode.charAt(0).toUpperCase() + data.mode.slice(1)}`,
+                subtitle: data.sub_intent || data.reasoning,
+                detail: data.reasoning,
+                confidence: data.confidence,
+                timestamp: new Date()
+              }]);
+            } else if (data.type === 'reasoning_mode') {
+              // 🧠 Reasoning Loop activated
+              console.log(`🧠 Reasoning mode: ${data.mode}`);
+              setPipelineMode(data.mode);
+              setCurrentStep(data.message || `🧠 Reasoning Loop (${data.mode})`);
+            } else if (data.type === 'hypotheses_generated') {
+              // 💡 Exploratory mode: hypotheses generated
+              console.log(`💡 ${data.count} hypotheses generated`);
+              setPipelineHypotheses(data.hypotheses || []);
+              const stepId = `hyp-${++pipelineStepCounterRef.current}`;
+              setPipelineSteps(prev => [...prev, {
+                id: stepId,
+                type: 'hypothesis',
+                status: 'completed',
+                title: `${data.count} Hypotheses Generated`,
+                subtitle: data.hypotheses?.[0] || '',
+                detail: (data.hypotheses || []).map((h: string, i: number) => `${i + 1}. ${h}`).join('\n'),
+                timestamp: new Date()
+              }]);
+            } else if (data.type === 'reasoning_step') {
+              // 🤔 Reasoning step: LLM decided next action
+              console.log(`🤔 Iteration ${data.iteration}: ${data.tool}`);
+              // Mark previous "reason" steps as completed
+              setPipelineSteps(prev => prev.map(s =>
+                s.type === 'reason' && s.status === 'active' ? { ...s, status: 'completed' as const } : s
+              ));
+              const stepId = `reason-${++pipelineStepCounterRef.current}`;
+              setPipelineSteps(prev => [...prev, {
+                id: stepId,
+                type: 'reason',
+                status: 'completed',
+                title: `Reason → ${data.tool}`,
+                subtitle: data.hypothesis || '',
+                detail: data.reasoning,
+                iteration: data.iteration,
+                tool: data.tool,
+                timestamp: new Date()
+              }]);
+            } else if (data.type === 'finding_discovered') {
+              // 🔬 Finding from evaluation step
+              console.log(`🔬 Finding (confidence: ${Math.round(data.confidence * 100)}%)`);
+              const stepId = `finding-${++pipelineStepCounterRef.current}`;
+              setPipelineSteps(prev => [...prev, {
+                id: stepId,
+                type: 'finding',
+                status: 'completed',
+                title: data.answered ? '✓ Question Answered' : 'Finding Discovered',
+                subtitle: data.interpretation?.substring(0, 100) || '',
+                detail: data.interpretation,
+                confidence: data.confidence,
+                iteration: data.iteration,
+                timestamp: new Date()
+              }]);
             } else if (data.type === 'analysis_failed') {
               console.log('❌ Analysis failed', data);
               setIsTyping(false);
+              // Reset pipeline state
+              setPipelineSteps([]);
+              setPipelineMode(null);
+              setPipelineHypotheses([]);
               // Show error message to user - add to sessions
               setSessions(prev => prev.map(s => {
                 if (s.id === activeSessionId) {
               console.log('✅ Analysis completed', data.result);
               setIsTyping(false);
+              // Reset pipeline state
+              setPipelineSteps([]);
+              setPipelineMode(null);
+              setPipelineHypotheses([]);
               // Create a unique key based on actual workflow content to prevent duplicates
               // Use the last tool executed + summary hash for uniqueness
               const lastTool = data.result?.workflow_history?.[data.result.workflow_history.length - 1]?.tool || 'unknown';
     // Show loading indicator immediately (for UI feedback)
     setIsTyping(true);
+    // Reset pipeline state for new analysis
+    setPipelineSteps([]);
+    setPipelineMode(null);
+    setPipelineHypotheses([]);
+    pipelineStepCounterRef.current = 0;
     try {
             ))
           )}
           {isTyping && (
+             <PipelineView
+               steps={pipelineSteps}
+               mode={pipelineMode}
+               currentStep={currentStep}
+               isActive={isTyping}
+               hypotheses={pipelineHypotheses}
+             />
           )}
         </div>
                 const allPlots: Array<{title: string, url: string, type?: string}> = [];
                 const allReports: Array<{name: string, path: string}> = [];
                 const allDataFiles: string[] = [];
+                const baselineModels = ['xgboost', 'random_forest', 'lightgbm', 'ridge', 'lasso'];
                 const foundModels = new Set<string>();
                 activeSession.messages.forEach(msg => {

FRRONTEEEND/components/PipelineView.tsx ADDED Viewed

	@@ -0,0 +1,348 @@

+import React from 'react';
+import { motion, AnimatePresence } from 'framer-motion';
+import {
+  Brain, Zap, BarChart3, CheckCircle2, XCircle,
+  Loader2, ChevronDown, ChevronUp, Lightbulb,
+  Search, FlaskConical, FileText, Target, ArrowRight
+} from 'lucide-react';
+import { cn } from '../lib/utils';
+// ─── Types ───────────────────────────────────────────────────
+export interface PipelineStep {
+  id: string;
+  type: 'intent' | 'hypothesis' | 'reason' | 'act' | 'evaluate' | 'finding' | 'synthesize';
+  status: 'pending' | 'active' | 'completed' | 'failed';
+  title: string;
+  subtitle?: string;
+  detail?: string;       // Extended info (shown on expand)
+  confidence?: number;   // 0-1
+  timestamp?: Date;
+  tool?: string;
+  iteration?: number;
+}
+interface PipelineViewProps {
+  steps: PipelineStep[];
+  mode: string | null;           // "direct" | "investigative" | "exploratory" | null
+  currentStep: string;           // Existing currentStep string from ChatInterface
+  isActive: boolean;             // Whether analysis is running
+  hypotheses?: string[];
+  className?: string;
+}
+// ─── Icons per step type ─────────────────────────────────────
+const stepIcons: Record<PipelineStep['type'], React.ElementType> = {
+  intent: Target,
+  hypothesis: Lightbulb,
+  reason: Brain,
+  act: Zap,
+  evaluate: Search,
+  finding: FlaskConical,
+  synthesize: FileText,
+};
+const stepColors: Record<PipelineStep['type'], string> = {
+  intent: 'text-violet-400 bg-violet-500/10 border-violet-500/20',
+  hypothesis: 'text-amber-400 bg-amber-500/10 border-amber-500/20',
+  reason: 'text-cyan-400 bg-cyan-500/10 border-cyan-500/20',
+  act: 'text-emerald-400 bg-emerald-500/10 border-emerald-500/20',
+  evaluate: 'text-blue-400 bg-blue-500/10 border-blue-500/20',
+  finding: 'text-pink-400 bg-pink-500/10 border-pink-500/20',
+  synthesize: 'text-orange-400 bg-orange-500/10 border-orange-500/20',
+};
+const statusDotColors: Record<PipelineStep['status'], string> = {
+  pending: 'bg-white/20',
+  active: 'bg-emerald-500',
+  completed: 'bg-emerald-500',
+  failed: 'bg-red-500',
+};
+// ─── Confidence Bar ──────────────────────────────────────────
+const ConfidenceBar: React.FC<{ value: number }> = ({ value }) => (
+  <div className="flex items-center gap-2 mt-1">
+    <div className="flex-1 h-1 bg-white/5 rounded-full overflow-hidden">
+      <motion.div
+        className={cn(
+          "h-full rounded-full",
+          value >= 0.7 ? "bg-emerald-500" : value >= 0.4 ? "bg-amber-500" : "bg-red-400"
+        )}
+        initial={{ width: 0 }}
+        animate={{ width: `${Math.round(value * 100)}%` }}
+        transition={{ duration: 0.6, ease: "easeOut" }}
+      />
+    </div>
+    <span className="text-[10px] font-mono text-white/30 w-8 text-right">
+      {Math.round(value * 100)}%
+    </span>
+  </div>
+);
+// ─── Mode Badge ──────────────────────────────────────────────
+const ModeBadge: React.FC<{ mode: string }> = ({ mode }) => {
+  const config: Record<string, { label: string; color: string; icon: React.ElementType }> = {
+    direct: { label: 'Direct', color: 'bg-emerald-500/10 text-emerald-400 border-emerald-500/20', icon: Zap },
+    investigative: { label: 'Investigative', color: 'bg-cyan-500/10 text-cyan-400 border-cyan-500/20', icon: Search },
+    exploratory: { label: 'Exploratory', color: 'bg-violet-500/10 text-violet-400 border-violet-500/20', icon: FlaskConical },
+  };
+  const { label, color, icon: Icon } = config[mode] || config.direct;
+  return (
+    <span className={cn("inline-flex items-center gap-1.5 px-2 py-0.5 text-[10px] font-medium rounded-full border", color)}>
+      <Icon className="w-3 h-3" />
+      {label} Mode
+    </span>
+  );
+};
+// ─── Single Step Row ─────────────────────────────────────────
+const StepRow: React.FC<{ step: PipelineStep; isLast: boolean }> = ({ step, isLast }) => {
+  const [expanded, setExpanded] = React.useState(false);
+  const Icon = stepIcons[step.type] || Zap;
+  const colorClass = stepColors[step.type] || stepColors.act;
+  const isActive = step.status === 'active';
+  const isCompleted = step.status === 'completed';
+  const isFailed = step.status === 'failed';
+  return (
+    <div className="relative">
+      {/* Connector line */}
+      {!isLast && (
+        <div className={cn(
+          "absolute left-4 top-10 w-px h-[calc(100%-16px)]",
+          isCompleted ? "bg-emerald-500/30" : "bg-white/5"
+        )} />
+      )}
+      <motion.div
+        initial={{ opacity: 0, x: -12 }}
+        animate={{ opacity: 1, x: 0 }}
+        transition={{ duration: 0.3 }}
+        className={cn(
+          "relative flex items-start gap-3 p-2 rounded-lg cursor-pointer transition-colors",
+          isActive && "bg-white/[0.03]",
+          expanded && "bg-white/[0.02]"
+        )}
+        onClick={() => step.detail && setExpanded(!expanded)}
+      >
+        {/* Icon circle */}
+        <div className={cn(
+          "w-8 h-8 rounded-lg flex items-center justify-center shrink-0 border",
+          colorClass,
+          isActive && "animate-pulse"
+        )}>
+          {isActive ? (
+            <Loader2 className="w-4 h-4 animate-spin" />
+          ) : isCompleted ? (
+            <CheckCircle2 className="w-4 h-4 text-emerald-400" />
+          ) : isFailed ? (
+            <XCircle className="w-4 h-4 text-red-400" />
+          ) : (
+            <Icon className="w-4 h-4" />
+          )}
+        </div>
+        {/* Content */}
+        <div className="flex-1 min-w-0">
+          <div className="flex items-center gap-2">
+            <span className={cn(
+              "text-xs font-medium truncate",
+              isActive ? "text-white" : isCompleted ? "text-white/70" : "text-white/40"
+            )}>
+              {step.title}
+            </span>
+            {step.iteration && (
+              <span className="text-[10px] font-mono text-white/20 shrink-0">
+                #{step.iteration}
+              </span>
+            )}
+            {step.detail && (
+              expanded
+                ? <ChevronUp className="w-3 h-3 text-white/20 shrink-0" />
+                : <ChevronDown className="w-3 h-3 text-white/20 shrink-0" />
+            )}
+          </div>
+          {step.subtitle && (
+            <p className="text-[11px] text-white/30 mt-0.5 truncate">
+              {step.subtitle}
+            </p>
+          )}
+          {step.confidence !== undefined && step.confidence > 0 && (
+            <ConfidenceBar value={step.confidence} />
+          )}
+        </div>
+        {/* Status dot */}
+        <div className={cn(
+          "w-2 h-2 rounded-full shrink-0 mt-2",
+          statusDotColors[step.status]
+        )} />
+      </motion.div>
+      {/* Expanded detail */}
+      <AnimatePresence>
+        {expanded && step.detail && (
+          <motion.div
+            initial={{ height: 0, opacity: 0 }}
+            animate={{ height: 'auto', opacity: 1 }}
+            exit={{ height: 0, opacity: 0 }}
+            transition={{ duration: 0.2 }}
+            className="overflow-hidden"
+          >
+            <div className="ml-11 mr-2 mb-2 p-2 rounded-lg bg-white/[0.02] border border-white/5">
+              <p className="text-[11px] text-white/40 leading-relaxed whitespace-pre-wrap">
+                {step.detail}
+              </p>
+            </div>
+          </motion.div>
+        )}
+      </AnimatePresence>
+    </div>
+  );
+};
+// ─── Hypotheses Panel ────────────────────────────────────────
+const HypothesesPanel: React.FC<{ hypotheses: string[] }> = ({ hypotheses }) => {
+  const [collapsed, setCollapsed] = React.useState(false);
+  if (!hypotheses.length) return null;
+  return (
+    <div className="mb-3">
+      <button
+        onClick={() => setCollapsed(!collapsed)}
+        className="flex items-center gap-1.5 text-[10px] font-medium text-amber-400/70 hover:text-amber-400 transition-colors mb-1.5"
+      >
+        <Lightbulb className="w-3 h-3" />
+        <span>{hypotheses.length} Hypotheses</span>
+        {collapsed ? <ChevronDown className="w-3 h-3" /> : <ChevronUp className="w-3 h-3" />}
+      </button>
+      <AnimatePresence>
+        {!collapsed && (
+          <motion.div
+            initial={{ height: 0, opacity: 0 }}
+            animate={{ height: 'auto', opacity: 1 }}
+            exit={{ height: 0, opacity: 0 }}
+            className="overflow-hidden"
+          >
+            <div className="space-y-1 ml-4">
+              {hypotheses.map((h, i) => (
+                <div key={i} className="flex items-start gap-1.5">
+                  <ArrowRight className="w-3 h-3 text-amber-500/30 mt-0.5 shrink-0" />
+                  <span className="text-[11px] text-white/30">{h}</span>
+                </div>
+              ))}
+            </div>
+          </motion.div>
+        )}
+      </AnimatePresence>
+    </div>
+  );
+};
+// ─── Main Pipeline View ──────────────────────────────────────
+export const PipelineView: React.FC<PipelineViewProps> = ({
+  steps,
+  mode,
+  currentStep,
+  isActive,
+  hypotheses = [],
+  className
+}) => {
+  // If no steps yet and not in reasoning mode, show the simple fallback
+  if (!steps.length && !mode) {
+    return (
+      <div className={cn("flex gap-4", className)}>
+        <div className="w-8 h-8 rounded-lg flex items-center justify-center shrink-0 bg-white/5 border border-white/10">
+          <Loader2 className="w-4 h-4 text-indigo-400 animate-spin" />
+        </div>
+        <div className="bg-white/[0.03] p-4 rounded-2xl border border-white/5">
+          <div className="flex items-center gap-3">
+            <div className="flex gap-1">
+              <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.3s]" />
+              <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce [animation-delay:-0.15s]" />
+              <span className="w-1.5 h-1.5 bg-emerald-500 rounded-full animate-bounce" />
+            </div>
+            <span className="text-sm text-white/60">
+              {currentStep || '🔧 Starting analysis...'}
+            </span>
+          </div>
+        </div>
+      </div>
+    );
+  }
+  // Count completed steps
+  const completedCount = steps.filter(s => s.status === 'completed').length;
+  const totalCount = steps.length;
+  const progressPct = totalCount > 0 ? (completedCount / totalCount) * 100 : 0;
+  return (
+    <div className={cn("flex gap-4", className)}>
+      {/* Bot avatar */}
+      <div className="w-8 h-8 rounded-lg flex items-center justify-center shrink-0 bg-white/5 border border-white/10">
+        <Brain className="w-4 h-4 text-cyan-400" />
+      </div>
+      {/* Pipeline card */}
+      <div className="flex-1 bg-white/[0.03] p-4 rounded-2xl border border-white/5 max-w-lg">
+        {/* Header */}
+        <div className="flex items-center justify-between mb-3">
+          <div className="flex items-center gap-2">
+            <span className="text-xs font-semibold text-white/80">Reasoning Pipeline</span>
+            {mode && <ModeBadge mode={mode} />}
+          </div>
+          {isActive && (
+            <div className="flex items-center gap-1.5 text-[10px] text-emerald-400">
+              <Loader2 className="w-3 h-3 animate-spin" />
+              <span>Running</span>
+            </div>
+          )}
+        </div>
+        {/* Progress bar */}
+        <div className="h-1 bg-white/5 rounded-full overflow-hidden mb-3">
+          <motion.div
+            className="h-full bg-gradient-to-r from-cyan-500 to-emerald-500 rounded-full"
+            initial={{ width: 0 }}
+            animate={{ width: `${progressPct}%` }}
+            transition={{ duration: 0.4, ease: "easeOut" }}
+          />
+        </div>
+        {/* Hypotheses (exploratory mode) */}
+        {hypotheses.length > 0 && <HypothesesPanel hypotheses={hypotheses} />}
+        {/* Steps timeline */}
+        <div className="space-y-0.5 max-h-[320px] overflow-y-auto pr-1 scrollbar-thin scrollbar-thumb-white/5">
+          {steps.map((step, i) => (
+            <StepRow key={step.id} step={step} isLast={i === steps.length - 1} />
+          ))}
+        </div>
+        {/* Footer summary */}
+        {!isActive && completedCount > 0 && (
+          <div className="mt-3 pt-2 border-t border-white/5 flex items-center justify-between">
+            <span className="text-[10px] text-white/20">
+              {completedCount} step{completedCount !== 1 ? 's' : ''} completed
+            </span>
+            <span className="text-[10px] text-white/20 font-mono">
+              {steps.filter(s => s.type === 'finding').length} finding{steps.filter(s => s.type === 'finding').length !== 1 ? 's' : ''}
+            </span>
+          </div>
+        )}
+      </div>
+    </div>
+  );
+};
+export default PipelineView;

src/orchestrator.py CHANGED Viewed

@@ -21,6 +21,11 @@ from .tools.tools_registry import TOOLS, get_all_tool_names, get_tools_by_catego
 from .tools.agent_tool_mapping import (get_tools_for_agent, filter_tools_by_names,
                                         get_agent_description, suggest_next_agent)
 from .reasoning.reasoning_trace import get_reasoning_trace, reset_reasoning_trace
 from .session_memory import SessionMemory
 from .session_store import SessionStore
 from .workflow_state import WorkflowState
@@ -2898,6 +2903,526 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
                 "task_type": result_data.get("task_type")
             })
     def analyze(self, file_path: str, task_description: str,
                target_col: Optional[str] = None,
                use_cache: bool = True,
@@ -3032,6 +3557,82 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
                 print("✓ Using cached results")
                 return cached
         # Build initial messages
         # Use dynamic prompts for small context models
         if self.use_compact_prompts:

 from .tools.agent_tool_mapping import (get_tools_for_agent, filter_tools_by_names,
                                         get_agent_description, suggest_next_agent)
 from .reasoning.reasoning_trace import get_reasoning_trace, reset_reasoning_trace
+from .reasoning.findings import FindingsAccumulator, Finding
+from .reasoning.reasoner import Reasoner, ReasoningOutput
+from .reasoning.evaluator import Evaluator, EvaluationOutput
+from .reasoning.synthesizer import Synthesizer
+from .routing.intent_classifier import IntentClassifier, IntentResult
 from .session_memory import SessionMemory
 from .session_store import SessionStore
 from .workflow_state import WorkflowState
                 "task_type": result_data.get("task_type")
             })
+    # ═══════════════════════════════════════════════════════════════════════════
+    # REASONING LOOP INFRASTRUCTURE
+    # Three new methods that power the hypothesis-driven analysis mode:
+    #   _llm_text_call       → Provider-agnostic text LLM call (no tool schemas)
+    #   _get_tools_description → Lightweight text description of available tools
+    #   _run_reasoning_loop   → The core Reason → Act → Evaluate → Loop/Stop cycle
+    # ═══════════════════════════════════════════════════════════════════════════
+    def _llm_text_call(self, system_prompt: str, user_prompt: str, max_tokens: int = 2048) -> str:
+        """
+        Simple text-only LLM call (no tool schemas).
+        Used by Reasoner, Evaluator, and Synthesizer for lightweight
+        reasoning calls. Much cheaper than full tool-calling API calls.
+        Args:
+            system_prompt: System prompt for the LLM
+            user_prompt: User prompt for the LLM
+            max_tokens: Maximum response tokens
+        Returns:
+            Plain text response from the LLM
+        """
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ]
+        # Rate limiting
+        if self.min_api_call_interval > 0:
+            time_since_last_call = time.time() - self.last_api_call_time
+            if time_since_last_call < self.min_api_call_interval:
+                wait_time = self.min_api_call_interval - time_since_last_call
+                time.sleep(wait_time)
+        try:
+            if self.provider == "mistral":
+                if hasattr(self.mistral_client, 'chat') and hasattr(self.mistral_client.chat, 'complete'):
+                    response = self.mistral_client.chat.complete(
+                        model=self.model,
+                        messages=messages,
+                        temperature=0.1,
+                        max_tokens=max_tokens
+                    )
+                else:
+                    response = self.mistral_client.chat(
+                        model=self.model,
+                        messages=messages,
+                        temperature=0.1,
+                        max_tokens=max_tokens
+                    )
+                self.api_calls_made += 1
+                self.last_api_call_time = time.time()
+                if hasattr(response, 'usage') and response.usage:
+                    self.tokens_this_minute += response.usage.total_tokens
+                return self._extract_content_text(response.choices[0].message.content)
+            elif self.provider == "groq":
+                response = self.groq_client.chat.completions.create(
+                    model=self.model,
+                    messages=messages,
+                    temperature=0.1,
+                    max_tokens=max_tokens
+                )
+                self.api_calls_made += 1
+                self.last_api_call_time = time.time()
+                if hasattr(response, 'usage') and response.usage:
+                    self.tokens_this_minute += response.usage.total_tokens
+                return self._extract_content_text(response.choices[0].message.content)
+            elif self.provider == "gemini":
+                full_prompt = f"{system_prompt}\n\n{user_prompt}"
+                response = self.gemini_model.generate_content(
+                    full_prompt,
+                    generation_config={
+                        "temperature": 0.1,
+                        "max_output_tokens": max_tokens
+                    }
+                )
+                self.api_calls_made += 1
+                self.last_api_call_time = time.time()
+                return response.text
+            else:
+                raise ValueError(f"Unsupported provider: {self.provider}")
+        except Exception as e:
+            error_str = str(e)
+            # Handle rate limits
+            if "429" in error_str or "rate_limit" in error_str.lower():
+                print(f"⏳ Rate limit in reasoning call, waiting 10s...")
+                time.sleep(10)
+                return self._llm_text_call(system_prompt, user_prompt, max_tokens)
+            raise
+    def _get_tools_description(self, tool_names: Optional[List[str]] = None) -> str:
+        """
+        Build a lightweight text description of available tools.
+        Used in Reasoner prompts instead of sending full JSON tool schemas.
+        This is much more token-efficient than the OpenAI tools format.
+        Args:
+            tool_names: Optional list of tool names to include (None = all tools)
+        Returns:
+            Formatted text like:
+                - profile_dataset(file_path): Profile a dataset to understand structure
+                - analyze_correlations(file_path, target_col): Analyze column correlations
+                ...
+        """
+        import inspect
+        lines = []
+        tool_map = self.tool_functions
+        # Filter to specific tools if requested
+        if tool_names:
+            tool_map = {k: v for k, v in tool_map.items() if k in tool_names}
+        for name, func in sorted(tool_map.items()):
+            # Get function signature
+            try:
+                sig = inspect.signature(func)
+                params = []
+                for param_name, param in sig.parameters.items():
+                    if param_name in ("kwargs", "args"):
+                        continue
+                    if param.default is inspect.Parameter.empty:
+                        params.append(param_name)
+                    else:
+                        params.append(f"{param_name}=...")
+                params_str = ", ".join(params[:5])  # Max 5 params shown
+                if len(sig.parameters) > 5:
+                    params_str += ", ..."
+            except (ValueError, TypeError):
+                params_str = "..."
+            # Get first line of docstring
+            doc = (func.__doc__ or "").strip().split("\n")[0][:100]
+            lines.append(f"- {name}({params_str}): {doc}")
+        return "\n".join(lines)
+    def _run_reasoning_loop(
+        self,
+        question: str,
+        file_path: str,
+        dataset_info: Dict[str, Any],
+        target_col: Optional[str] = None,
+        mode: str = "investigative",
+        max_iterations: int = 7,
+        tool_names: Optional[List[str]] = None
+    ) -> Dict[str, Any]:
+        """
+        Run the Reasoning Loop: Reason → Act → Evaluate → Loop/Stop → Synthesize.
+        This is the core of the hypothesis-driven analysis mode.
+        Instead of a pipeline, the agent:
+        1. REASONS about what to investigate next
+        2. ACTS (executes one tool)
+        3. EVALUATES the result
+        4. Decides to LOOP (investigate more) or STOP
+        5. SYNTHESIZES all findings into a coherent answer
+        Args:
+            question: User's question or "Analyze this data"
+            file_path: Path to the dataset
+            dataset_info: Schema info from local extraction
+            target_col: Optional target column
+            mode: "investigative" or "exploratory"
+            max_iterations: Max reasoning iterations (default 7)
+            tool_names: Optional subset of tools to use
+        Returns:
+            Dict with status, summary, findings, workflow_history, etc.
+        """
+        start_time = time.time()
+        # Initialize reasoning components (pass our LLM caller)
+        reasoner = Reasoner(llm_caller=self._llm_text_call)
+        evaluator = Evaluator(llm_caller=self._llm_text_call)
+        synthesizer = Synthesizer(llm_caller=self._llm_text_call)
+        findings = FindingsAccumulator(question=question, mode=mode)
+        # Get tools description for the reasoner
+        tools_desc = self._get_tools_description(tool_names)
+        # Track for API response
+        workflow_history = []
+        current_file = file_path  # Tracks the latest output file
+        # Emit mode info for UI
+        if hasattr(self, 'session') and self.session:
+            progress_manager.emit(self.session.session_id, {
+                'type': 'reasoning_mode',
+                'mode': mode,
+                'message': f"🧠 Reasoning Loop activated ({mode} mode)",
+                'question': question
+            })
+        print(f"\n{'='*60}")
+        print(f"🧠 REASONING LOOP ({mode.upper()} mode)")
+        print(f"   Question: {question}")
+        print(f"   Max iterations: {max_iterations}")
+        print(f"{'='*60}")
+        # ── EXPLORATORY MODE: Generate hypotheses first ──
+        if mode == "exploratory":
+            print(f"\n🔬 Generating hypotheses from data profile...")
+            # Profile the dataset first if not already done
+            profile_result = self._execute_tool("profile_dataset", {"file_path": file_path})
+            profile_summary = ""
+            if profile_result.get("success", True):
+                profile_summary = json.dumps(
+                    self._compress_tool_result("profile_dataset",
+                        self._make_json_serializable(profile_result)),
+                    default=str
+                )[:2000]
+                workflow_history.append({
+                    "iteration": 0,
+                    "tool": "profile_dataset",
+                    "arguments": {"file_path": file_path},
+                    "result": profile_result
+                })
+                self._update_workflow_state("profile_dataset", profile_result)
+            # Generate hypotheses
+            hypotheses = reasoner.generate_hypotheses(
+                dataset_info=dataset_info,
+                file_path=file_path,
+                target_col=target_col,
+                profile_summary=profile_summary
+            )
+            print(f"   Generated {len(hypotheses)} hypotheses:")
+            for i, h in enumerate(hypotheses):
+                text = h.get("text", str(h))
+                priority = h.get("priority", 0.5)
+                findings.add_hypothesis(text, priority=priority, source_iteration=0)
+                print(f"   {i+1}. [{priority:.1f}] {text}")
+            # Emit hypothesis info
+            if hasattr(self, 'session') and self.session:
+                progress_manager.emit(self.session.session_id, {
+                    'type': 'hypotheses_generated',
+                    'hypotheses': [h.get("text", str(h)) for h in hypotheses],
+                    'count': len(hypotheses)
+                })
+        # ── MAIN REASONING LOOP ──
+        for iteration in range(1, max_iterations + 1):
+            print(f"\n── Iteration {iteration}/{max_iterations} ──")
+            # STEP 1: REASON - What should we investigate next?
+            print(f"🤔 REASON: Deciding next action...")
+            reasoning_output = reasoner.reason(
+                question=question,
+                dataset_info=dataset_info,
+                findings=findings,
+                available_tools=tools_desc,
+                file_path=current_file,
+                target_col=target_col
+            )
+            print(f"   Status: {reasoning_output.status}")
+            print(f"   Reasoning: {reasoning_output.reasoning}")
+            # Check if done
+            if reasoning_output.status == "done":
+                print(f"✅ Reasoner says: DONE (confidence: {reasoning_output.confidence:.0%})")
+                print(f"   Reason: {reasoning_output.reasoning}")
+                break
+            tool_name = reasoning_output.tool_name
+            tool_args = reasoning_output.arguments
+            hypothesis = reasoning_output.hypothesis
+            if not tool_name or tool_name not in self.tool_functions:
+                print(f"⚠️  Invalid tool: {tool_name}, skipping iteration")
+                continue
+            print(f"   Tool: {tool_name}")
+            print(f"   Hypothesis: {hypothesis}")
+            # Emit reasoning step for UI
+            if hasattr(self, 'session') and self.session:
+                progress_manager.emit(self.session.session_id, {
+                    'type': 'reasoning_step',
+                    'iteration': iteration,
+                    'tool': tool_name,
+                    'hypothesis': hypothesis,
+                    'reasoning': reasoning_output.reasoning
+                })
+            # STEP 2: ACT - Execute the tool
+            print(f"⚡ ACT: Executing {tool_name}...")
+            # Emit tool execution event
+            if hasattr(self, 'session') and self.session:
+                progress_manager.emit(self.session.session_id, {
+                    'type': 'tool_executing',
+                    'tool': tool_name,
+                    'message': f"🔧 Executing: {tool_name}",
+                    'arguments': tool_args
+                })
+            tool_result = self._execute_tool(tool_name, tool_args)
+            # Track output file for next iteration
+            if tool_result.get("success", True):
+                result_data = tool_result.get("result", {})
+                if isinstance(result_data, dict):
+                    new_file = result_data.get("output_file") or result_data.get("output_path")
+                    if new_file:
+                        current_file = new_file
+                # Emit success
+                if hasattr(self, 'session') and self.session:
+                    progress_manager.emit(self.session.session_id, {
+                        'type': 'tool_completed',
+                        'tool': tool_name,
+                        'message': f"✓ Completed: {tool_name}"
+                    })
+                print(f"   ✓ Tool completed successfully")
+            else:
+                error_msg = tool_result.get("error", "Unknown error")
+                print(f"   ❌ Tool failed: {error_msg}")
+                if hasattr(self, 'session') and self.session:
+                    progress_manager.emit(self.session.session_id, {
+                        'type': 'tool_failed',
+                        'tool': tool_name,
+                        'message': f"❌ FAILED: {tool_name}",
+                        'error': error_msg
+                    })
+            # Track in workflow history
+            workflow_history.append({
+                "iteration": iteration,
+                "tool": tool_name,
+                "arguments": tool_args,
+                "result": tool_result
+            })
+            # Update workflow state
+            self._update_workflow_state(tool_name, tool_result)
+            # Checkpoint
+            if tool_result.get("success", True):
+                session_id = self.http_session_key or "default"
+                self.recovery_manager.checkpoint_manager.save_checkpoint(
+                    session_id=session_id,
+                    workflow_state={
+                        'iteration': iteration,
+                        'workflow_history': workflow_history,
+                        'current_file': file_path,
+                        'task_description': question,
+                        'target_col': target_col
+                    },
+                    last_tool=tool_name,
+                    iteration=iteration
+                )
+            # STEP 3: EVALUATE - What did we learn?
+            print(f"📊 EVALUATE: Interpreting results...")
+            evaluation = evaluator.evaluate(
+                question=question,
+                tool_name=tool_name,
+                arguments=tool_args,
+                result=tool_result,
+                findings=findings,
+                result_compressor=lambda tn, r: self._compress_tool_result(
+                    tn, self._make_json_serializable(r)
+                )
+            )
+            print(f"   Interpretation: {evaluation.interpretation}")
+            print(f"   Answered: {evaluation.answered} (confidence: {evaluation.confidence:.0%})")
+            print(f"   Should stop: {evaluation.should_stop}")
+            if evaluation.next_questions:
+                print(f"   Next questions: {evaluation.next_questions}")
+            # Build finding and add to accumulator
+            compressed_result = json.dumps(
+                self._compress_tool_result(tool_name, self._make_json_serializable(tool_result)),
+                default=str
+            )
+            finding = evaluator.build_finding(
+                iteration=iteration,
+                hypothesis=hypothesis,
+                tool_name=tool_name,
+                arguments=tool_args,
+                result_summary=compressed_result,
+                evaluation=evaluation
+            )
+            findings.add_finding(finding)
+            # Emit finding for UI
+            if hasattr(self, 'session') and self.session:
+                progress_manager.emit(self.session.session_id, {
+                    'type': 'finding_discovered',
+                    'iteration': iteration,
+                    'interpretation': evaluation.interpretation,
+                    'confidence': evaluation.confidence,
+                    'answered': evaluation.answered
+                })
+            # Check if we should stop
+            if evaluation.should_stop:
+                print(f"\n✅ Evaluator says: STOP (confidence: {evaluation.confidence:.0%})")
+                break
+        # ── STEP 4: SYNTHESIZE - Build the final answer ──
+        print(f"\n{'='*60}")
+        print(f"📝 SYNTHESIZE: Building final answer from {len(findings.findings)} findings...")
+        print(f"{'='*60}")
+        # Collect artifacts from workflow history
+        artifacts = self._collect_artifacts(workflow_history)
+        # Generate synthesis
+        if mode == "exploratory":
+            summary_text = synthesizer.synthesize_exploratory(
+                findings=findings,
+                artifacts=artifacts
+            )
+        else:
+            summary_text = synthesizer.synthesize(
+                findings=findings,
+                artifacts=artifacts
+            )
+        # Also generate enhanced summary for plots/metrics extraction
+        try:
+            enhanced = self._generate_enhanced_summary(
+                workflow_history, summary_text, question
+            )
+            plots_data = enhanced.get("plots", [])
+            metrics_data = enhanced.get("metrics", {})
+            artifacts_data = enhanced.get("artifacts", {})
+        except Exception as e:
+            print(f"⚠️  Enhanced summary generation failed: {e}")
+            plots_data = []
+            metrics_data = {}
+            artifacts_data = {}
+        # Save to session
+        if self.session:
+            self.session.add_conversation(question, summary_text)
+            self.session_store.save(self.session)
+        result = {
+            "status": "success",
+            "summary": summary_text,
+            "metrics": metrics_data,
+            "artifacts": artifacts_data,
+            "plots": plots_data,
+            "workflow_history": workflow_history,
+            "findings": findings.to_dict(),
+            "reasoning_trace": self.reasoning_trace.get_trace(),
+            "reasoning_summary": self.reasoning_trace.get_trace_summary(),
+            "execution_mode": mode,
+            "iterations": findings.iteration_count,
+            "api_calls": self.api_calls_made,
+            "execution_time": round(time.time() - start_time, 2)
+        }
+        print(f"\n✅ Reasoning loop completed in {result['execution_time']}s")
+        print(f"   Iterations: {findings.iteration_count}")
+        print(f"   Tools used: {', '.join(findings.tools_used)}")
+        print(f"   API calls: {self.api_calls_made}")
+        return result
+    def _collect_artifacts(self, workflow_history: List[Dict]) -> Dict[str, Any]:
+        """Collect plots, files, and other artifacts from workflow history."""
+        plots = []
+        files = []
+        for step in workflow_history:
+            result = step.get("result", {})
+            if not isinstance(result, dict):
+                continue
+            result_data = result.get("result", result)
+            if isinstance(result_data, dict):
+                # Collect output files
+                for key in ["output_file", "output_path", "report_path"]:
+                    if key in result_data and result_data[key]:
+                        files.append(result_data[key])
+                # Collect plots
+                if "plots" in result_data:
+                    for plot in result_data["plots"]:
+                        if isinstance(plot, dict):
+                            plots.append(plot)
+                        elif isinstance(plot, str):
+                            plots.append({"path": plot, "title": step.get("tool", "Plot")})
+                # Check for HTML files (interactive plots)
+                for key in ["html_path", "dashboard_path"]:
+                    if key in result_data and result_data[key]:
+                        plots.append({
+                            "path": result_data[key],
+                            "title": step.get("tool", "Interactive Plot"),
+                            "type": "html"
+                        })
+        return {"plots": plots, "files": files}
     def analyze(self, file_path: str, task_description: str,
                target_col: Optional[str] = None,
                use_cache: bool = True,
                 print("✓ Using cached results")
                 return cached
+        # ═══════════════════════════════════════════════════════════════════════
+        # 🧠 INTENT CLASSIFICATION → MODE SELECTION
+        # Classify the user's request into one of three execution modes:
+        #   DIRECT:        "Make a scatter plot"      → existing pipeline
+        #   INVESTIGATIVE: "Why are customers churning?" → reasoning loop
+        #   EXPLORATORY:   "Analyze this data"        → hypothesis-driven loop
+        # ═══════════════════════════════════════════════════════════════════════
+        intent_classifier = IntentClassifier()
+        intent_result = intent_classifier.classify(
+            query=task_description,
+            dataset_info=schema_info if 'error' not in schema_info else None,
+            has_target_col=bool(target_col)
+        )
+        print(f"\n🎯 Intent Classification:")
+        print(f"   Mode: {intent_result.mode.upper()}")
+        print(f"   Confidence: {intent_result.confidence:.0%}")
+        print(f"   Reasoning: {intent_result.reasoning}")
+        print(f"   Sub-intent: {intent_result.sub_intent}")
+        # Emit intent info for UI
+        if hasattr(self, 'session') and self.session:
+            progress_manager.emit(self.session.session_id, {
+                'type': 'intent_classified',
+                'mode': intent_result.mode,
+                'confidence': intent_result.confidence,
+                'reasoning': intent_result.reasoning,
+                'sub_intent': intent_result.sub_intent
+            })
+        # 📝 Record intent classification in reasoning trace
+        self.reasoning_trace.trace_history.append({
+            "type": "intent_classification",
+            "query": task_description,
+            "mode": intent_result.mode,
+            "confidence": intent_result.confidence,
+            "reasoning": intent_result.reasoning,
+            "sub_intent": intent_result.sub_intent
+        })
+        # ═══════════════════════════════════════════════════════════════════════
+        # 🧠 REASONING LOOP PATH (Investigative / Exploratory modes)
+        # ═══════════════════════════════════════════════════════════════════════
+        if intent_result.mode in ("investigative", "exploratory"):
+            print(f"\n🧠 Routing to REASONING LOOP ({intent_result.mode} mode)")
+            # Determine iteration count based on mode and reasoning effort
+            if intent_result.mode == "exploratory":
+                loop_max = min(max_iterations, 8)  # Exploratory gets more iterations
+            else:
+                loop_max = min(max_iterations, 6)  # Investigative is more focused
+            reasoning_result = self._run_reasoning_loop(
+                question=task_description,
+                file_path=file_path,
+                dataset_info=schema_info if 'error' not in schema_info else {},
+                target_col=target_col,
+                mode=intent_result.mode,
+                max_iterations=loop_max
+            )
+            # Cache the result
+            if use_cache and reasoning_result.get("status") == "success":
+                self.cache.set(cache_key, reasoning_result, metadata={
+                    "file_path": file_path,
+                    "task": task_description,
+                    "mode": intent_result.mode
+                })
+            return reasoning_result
+        # ═══════════════════════════════════════════════════════════════════════
+        # 📋 DIRECT MODE PATH (existing pipeline - below is unchanged)
+        # ═══════════════════════════════════════════════════════════════════════
+        print(f"\n📋 Routing to DIRECT pipeline mode")
         # Build initial messages
         # Use dynamic prompts for small context models
         if self.use_compact_prompts:

src/reasoning/__init__.py CHANGED Viewed

@@ -17,7 +17,16 @@ Architecture:
     Tool: "Here's what I found: {stats}"
     Reasoning: "Based on these stats, this means..."
 Usage:
     from reasoning import get_reasoner
@@ -25,6 +34,12 @@ Usage:
     result = reasoner.explain_data(
         summary={"rows": 1000, "columns": 20, "missing": 50}
     )
 """
 import os

     Tool: "Here's what I found: {stats}"
     Reasoning: "Based on these stats, this means..."
+Reasoning Loop (NEW):
+    REASON → ACT → EVALUATE → LOOP/STOP → SYNTHESIZE
+    Modules:
+    - findings.py:    Accumulated evidence state (step tracker + decision ledger)
+    - reasoner.py:    REASON step - picks next investigation action
+    - evaluator.py:   EVALUATE step - interprets results, decides continue/stop
+    - synthesizer.py: SYNTHESIZE step - builds final answer from evidence
 Usage:
     from reasoning import get_reasoner
     result = reasoner.explain_data(
         summary={"rows": 1000, "columns": 20, "missing": 50}
     )
+    # Reasoning Loop components:
+    from reasoning.findings import FindingsAccumulator
+    from reasoning.reasoner import Reasoner
+    from reasoning.evaluator import Evaluator
+    from reasoning.synthesizer import Synthesizer
 """
 import os

src/reasoning/evaluator.py ADDED Viewed

	@@ -0,0 +1,267 @@

+"""
+Evaluator Module - The EVALUATE step of the Reasoning Loop.
+Interprets tool results and decides:
+- What did we learn from this action?
+- Does this answer the user's question?
+- Should we continue investigating or stop?
+- What follow-up questions emerged?
+The Evaluator transforms raw tool output into understanding.
+Architecture:
+    Tool Result → Evaluator.evaluate() → EvaluationOutput
+        - interpretation: natural language explanation
+        - answered: did this answer the question?
+        - confidence: how confident are we?
+        - should_stop: should the loop stop?
+        - next_questions: what to investigate next
+"""
+import json
+import re
+from dataclasses import dataclass, field
+from typing import Dict, Any, List, Optional, Callable
+from .findings import Finding, FindingsAccumulator
+@dataclass
+class EvaluationOutput:
+    """Output from one EVALUATE step."""
+    interpretation: str            # What we learned from the tool result
+    answered: bool                 # Does this answer the user's question?
+    confidence: float              # 0.0-1.0 confidence
+    should_stop: bool              # Should the reasoning loop stop?
+    next_questions: List[str]      # Follow-up questions to investigate
+    key_metric: Optional[str] = None  # Most important metric extracted
+EVALUATOR_SYSTEM_PROMPT = """You are a senior data scientist interpreting analysis results.
+Your job:
+1. Interpret what the tool result MEANS (not just what it shows)
+2. Decide if this answers the user's original question
+3. Identify follow-up questions worth investigating
+4. Assign confidence level to your interpretation
+Be concise but insightful. Focus on:
+- Statistical significance (not just numbers)
+- Business implications (not just patterns)
+- Confounders and caveats
+- What's surprising vs expected
+CRITICAL: Output ONLY valid JSON, no other text."""
+EVALUATOR_USER_TEMPLATE = """**User's original question**: {question}
+**Action taken**: {tool_name}({arguments})
+**Tool result** (compressed):
+{result_summary}
+**What we knew before this step**:
+{prior_findings}
+Evaluate this result. Respond with ONLY this JSON:
+{{
+  "interpretation": "1-3 sentences: What does this result MEAN for answering the question?",
+  "answered": true/false,
+  "confidence": 0.0-1.0,
+  "should_stop": true/false,
+  "next_questions": ["follow-up question 1", "follow-up question 2"],
+  "key_metric": "most important number or finding (optional)"
+}}
+Guidelines for should_stop:
+- true: Question is fully answered OR we've gathered enough evidence OR no more useful actions
+- false: Important aspects remain uninvestigated
+Guidelines for confidence:
+- 0.0-0.3: Weak evidence, need more investigation
+- 0.3-0.6: Moderate evidence, some aspects unclear
+- 0.6-0.8: Strong evidence, minor questions remain
+- 0.8-1.0: Very strong evidence, question well answered"""
+class Evaluator:
+    """
+    The EVALUATE step of the Reasoning Loop.
+    Takes a tool result and interprets it in the context of
+    the user's question and prior findings.
+    Usage:
+        evaluator = Evaluator(llm_caller=orchestrator._llm_text_call)
+        evaluation = evaluator.evaluate(
+            question="Why are customers churning?",
+            tool_name="analyze_correlations",
+            arguments={"file_path": "data.csv", "target_col": "churn"},
+            result=tool_result,
+            findings=findings_accumulator
+        )
+        if evaluation.should_stop:
+            # Move to synthesis
+            ...
+        else:
+            # Continue reasoning loop
+            ...
+    """
+    def __init__(self, llm_caller: Callable):
+        """
+        Args:
+            llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
+        """
+        self.llm_caller = llm_caller
+    def evaluate(
+        self,
+        question: str,
+        tool_name: str,
+        arguments: Dict[str, Any],
+        result: Dict[str, Any],
+        findings: FindingsAccumulator,
+        result_compressor: Optional[Callable] = None
+    ) -> EvaluationOutput:
+        """
+        Evaluate a tool result.
+        Args:
+            question: User's original question
+            tool_name: Name of the tool that was executed
+            arguments: Tool arguments used
+            result: Raw tool result dict
+            findings: Accumulated findings so far
+            result_compressor: Optional function to compress tool results
+        Returns:
+            EvaluationOutput with interpretation and next steps
+        """
+        # Compress the result for LLM consumption
+        if result_compressor:
+            result_summary = json.dumps(result_compressor(tool_name, result), default=str)
+        else:
+            result_summary = self._default_compress(result)
+        # Truncate if too long
+        if len(result_summary) > 3000:
+            result_summary = result_summary[:3000] + "... [truncated]"
+        # Build argument string
+        args_str = json.dumps(arguments, default=str)
+        if len(args_str) > 500:
+            args_str = args_str[:500] + "..."
+        user_prompt = EVALUATOR_USER_TEMPLATE.format(
+            question=question,
+            tool_name=tool_name,
+            arguments=args_str,
+            result_summary=result_summary,
+            prior_findings=findings.get_context_for_reasoning(max_findings=3)
+        )
+        response_text = self.llm_caller(
+            system_prompt=EVALUATOR_SYSTEM_PROMPT,
+            user_prompt=user_prompt,
+            max_tokens=1024
+        )
+        return self._parse_response(response_text, result_summary)
+    def build_finding(
+        self,
+        iteration: int,
+        hypothesis: str,
+        tool_name: str,
+        arguments: Dict[str, Any],
+        result_summary: str,
+        evaluation: "EvaluationOutput"
+    ) -> Finding:
+        """
+        Build a Finding from a completed iteration.
+        Convenience method that combines the action and evaluation
+        into a single Finding for the accumulator.
+        """
+        return Finding(
+            iteration=iteration,
+            hypothesis=hypothesis,
+            action=tool_name,
+            arguments=arguments,
+            result_summary=result_summary[:1000],  # Cap size
+            interpretation=evaluation.interpretation,
+            confidence=evaluation.confidence,
+            answered_question=evaluation.answered,
+            next_questions=evaluation.next_questions
+        )
+    def _parse_response(self, response_text: str, result_summary: str) -> EvaluationOutput:
+        """Parse LLM response into EvaluationOutput."""
+        try:
+            data = json.loads(response_text.strip())
+        except json.JSONDecodeError:
+            # Try to extract JSON
+            json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
+            if json_match:
+                try:
+                    data = json.loads(json_match.group(0))
+                except json.JSONDecodeError:
+                    return self._fallback_evaluation(response_text, result_summary)
+            else:
+                return self._fallback_evaluation(response_text, result_summary)
+        return EvaluationOutput(
+            interpretation=data.get("interpretation", "Result processed."),
+            answered=data.get("answered", False),
+            confidence=min(1.0, max(0.0, float(data.get("confidence", 0.3)))),
+            should_stop=data.get("should_stop", False),
+            next_questions=data.get("next_questions", []),
+            key_metric=data.get("key_metric")
+        )
+    def _fallback_evaluation(self, response_text: str, result_summary: str) -> EvaluationOutput:
+        """Fallback when JSON parsing fails."""
+        # Use the raw response as interpretation
+        interpretation = response_text.strip()[:500] if response_text else "Analysis step completed."
+        return EvaluationOutput(
+            interpretation=interpretation,
+            answered=False,
+            confidence=0.3,
+            should_stop=False,
+            next_questions=[],
+            key_metric=None
+        )
+    def _default_compress(self, result: Dict[str, Any]) -> str:
+        """Default compression for tool results."""
+        if not isinstance(result, dict):
+            return str(result)[:2000]
+        compressed = {}
+        # Always include status
+        if "success" in result:
+            compressed["success"] = result["success"]
+        if "error" in result:
+            compressed["error"] = str(result["error"])[:300]
+        # Include key result fields
+        result_data = result.get("result", result)
+        if isinstance(result_data, dict):
+            for key in ["num_rows", "num_columns", "missing_percentage", "task_type",
+                        "best_model", "best_score", "models", "correlations",
+                        "output_file", "output_path", "plots", "summary",
+                        "total_issues", "columns_affected", "features_created",
+                        "accuracy", "r2_score", "rmse", "f1_score"]:
+                if key in result_data:
+                    value = result_data[key]
+                    # Truncate long values
+                    if isinstance(value, (list, dict)):
+                        compressed[key] = str(value)[:500]
+                    else:
+                        compressed[key] = value
+        return json.dumps(compressed, default=str)

src/reasoning/findings.py ADDED Viewed

	@@ -0,0 +1,302 @@

+"""
+Findings Accumulator - Core state for the Reasoning Loop.
+Tracks everything discovered during investigation:
+- Individual findings (action + result + interpretation)
+- Hypotheses being tested
+- Decision ledger (why each action was taken)
+- Confidence tracking
+This replaces the need for separate "step tracker" and "decision ledger" -
+they're natural byproducts of the accumulated findings.
+Architecture:
+    ReasoningLoop iteration 1: Reason → Act → Evaluate → Finding #1
+    ReasoningLoop iteration 2: Reason → Act → Evaluate → Finding #2
+    ...
+    Synthesizer reads all findings → produces final answer
+"""
+from dataclasses import dataclass, field
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+import json
+@dataclass
+class Finding:
+    """A single finding from one reasoning loop iteration."""
+    iteration: int
+    hypothesis: str                # What we were testing
+    action: str                    # Tool name executed
+    arguments: Dict[str, Any]      # Tool arguments used
+    result_summary: str            # Compressed result (what tool returned)
+    interpretation: str            # What we learned from this result
+    confidence: float              # 0.0-1.0 confidence in this finding
+    answered_question: bool        # Did this iteration answer the user's question?
+    next_questions: List[str]      # Follow-up questions generated
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "iteration": self.iteration,
+            "hypothesis": self.hypothesis,
+            "action": self.action,
+            "arguments": self.arguments,
+            "result_summary": self.result_summary,
+            "interpretation": self.interpretation,
+            "confidence": self.confidence,
+            "answered": self.answered_question,
+            "next_questions": self.next_questions,
+            "timestamp": self.timestamp
+        }
+@dataclass
+class Hypothesis:
+    """A hypothesis being tested during exploration."""
+    text: str
+    status: str = "untested"      # untested, testing, supported, refuted, inconclusive
+    evidence_for: List[str] = field(default_factory=list)
+    evidence_against: List[str] = field(default_factory=list)
+    priority: float = 0.5         # 0.0-1.0, higher = investigate first
+    source_iteration: int = 0     # Which iteration generated this hypothesis
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "text": self.text,
+            "status": self.status,
+            "evidence_for": self.evidence_for,
+            "evidence_against": self.evidence_against,
+            "priority": self.priority,
+            "source_iteration": self.source_iteration
+        }
+class FindingsAccumulator:
+    """
+    Accumulates findings across the reasoning loop.
+    This is the central state object that the Reasoner reads from and
+    the Evaluator writes to. It serves as:
+    - Step tracker (each finding records what was done)
+    - Decision ledger (each finding records WHY it was done)
+    - Evidence accumulator (interpretations build the answer)
+    - Hypothesis manager (for exploratory analysis)
+    Usage:
+        findings = FindingsAccumulator(question="Why are customers churning?")
+        # After each iteration:
+        findings.add_finding(Finding(
+            iteration=1,
+            hypothesis="High churn correlates with low engagement",
+            action="analyze_correlations",
+            arguments={"file_path": "data.csv", "target_col": "churn"},
+            result_summary="Found 0.72 correlation between login_frequency and churn",
+            interpretation="Strong evidence: infrequent logins predict churn",
+            confidence=0.8,
+            answered_question=False,
+            next_questions=["Is there a threshold for login frequency?"]
+        ))
+        # For the Reasoner prompt:
+        context = findings.get_context_for_reasoning()
+        # For the Synthesizer:
+        all_findings = findings.get_all_findings()
+    """
+    def __init__(self, question: str, mode: str = "investigative"):
+        """
+        Initialize findings accumulator.
+        Args:
+            question: The user's original question
+            mode: "investigative" or "exploratory"
+        """
+        self.question = question
+        self.mode = mode
+        self.findings: List[Finding] = []
+        self.hypotheses: List[Hypothesis] = []
+        self.tools_used: List[str] = []
+        self.files_produced: List[str] = []
+        self.is_answered = False
+        self.answer_confidence = 0.0
+        self.started_at = datetime.now().isoformat()
+    @property
+    def iteration_count(self) -> int:
+        """Number of completed iterations."""
+        return len(self.findings)
+    def add_finding(self, finding: Finding):
+        """Add a finding from a completed iteration."""
+        self.findings.append(finding)
+        if finding.action not in self.tools_used:
+            self.tools_used.append(finding.action)
+        # Track answer progress
+        if finding.answered_question:
+            self.is_answered = True
+            self.answer_confidence = max(self.answer_confidence, finding.confidence)
+        # Add new hypotheses from next_questions
+        for q in finding.next_questions:
+            if not any(h.text == q for h in self.hypotheses):
+                self.hypotheses.append(Hypothesis(
+                    text=q,
+                    status="untested",
+                    priority=0.5,
+                    source_iteration=finding.iteration
+                ))
+    def add_hypothesis(self, text: str, priority: float = 0.5, source_iteration: int = 0):
+        """Add a hypothesis to test."""
+        if not any(h.text == text for h in self.hypotheses):
+            self.hypotheses.append(Hypothesis(
+                text=text,
+                status="untested",
+                priority=priority,
+                source_iteration=source_iteration
+            ))
+    def update_hypothesis(self, text: str, status: str, evidence: str, is_supporting: bool = True):
+        """Update a hypothesis with new evidence."""
+        for h in self.hypotheses:
+            if h.text == text:
+                h.status = status
+                if is_supporting:
+                    h.evidence_for.append(evidence)
+                else:
+                    h.evidence_against.append(evidence)
+                return
+    def get_untested_hypotheses(self) -> List[Hypothesis]:
+        """Get hypotheses that haven't been tested yet, sorted by priority."""
+        untested = [h for h in self.hypotheses if h.status == "untested"]
+        return sorted(untested, key=lambda h: h.priority, reverse=True)
+    def get_last_output_file(self) -> Optional[str]:
+        """Get the most recent output file from tool results."""
+        for finding in reversed(self.findings):
+            # Check if result mentions an output file
+            result = finding.result_summary
+            if "output_file" in result or "output_path" in result:
+                try:
+                    # Try to parse as JSON
+                    result_dict = json.loads(result) if isinstance(result, str) else result
+                    return result_dict.get("output_file") or result_dict.get("output_path")
+                except (json.JSONDecodeError, TypeError):
+                    pass
+            # Check arguments for file paths
+            for key in ["file_path", "input_path"]:
+                if key in finding.arguments:
+                    return finding.arguments[key]
+        return None
+    def get_context_for_reasoning(self, max_findings: int = 5) -> str:
+        """
+        Build context string for the Reasoner's prompt.
+        Returns a concise summary of what's been discovered so far,
+        formatted for LLM consumption.
+        Args:
+            max_findings: Maximum number of recent findings to include
+        """
+        if not self.findings:
+            return "No investigations completed yet. This is the first step."
+        parts = []
+        # Summary of what's been done
+        parts.append(f"**Investigations completed**: {len(self.findings)}")
+        parts.append(f"**Tools used**: {', '.join(self.tools_used)}")
+        # Recent findings (most relevant for next decision)
+        recent = self.findings[-max_findings:]
+        parts.append("\n**Recent findings**:")
+        for f in recent:
+            parts.append(
+                f"  Step {f.iteration}: Ran `{f.action}` to test: \"{f.hypothesis}\"\n"
+                f"    → Result: {f.interpretation}\n"
+                f"    → Confidence: {f.confidence:.0%}"
+            )
+        # Unanswered questions
+        untested = self.get_untested_hypotheses()
+        if untested:
+            parts.append(f"\n**Open questions** ({len(untested)} remaining):")
+            for h in untested[:3]:
+                parts.append(f"  - {h.text} (priority: {h.priority:.1f})")
+        # Overall progress
+        if self.is_answered:
+            parts.append(f"\n**Status**: Question partially answered (confidence: {self.answer_confidence:.0%})")
+        else:
+            parts.append(f"\n**Status**: Still investigating")
+        return "\n".join(parts)
+    def get_context_for_synthesis(self) -> str:
+        """
+        Build context string for the Synthesizer.
+        Returns the complete investigative history with all findings
+        and hypothesis statuses.
+        """
+        parts = []
+        parts.append(f"**Original question**: {self.question}")
+        parts.append(f"**Mode**: {self.mode}")
+        parts.append(f"**Total iterations**: {len(self.findings)}")
+        parts.append(f"**Tools used**: {', '.join(self.tools_used)}")
+        # All findings in order
+        parts.append("\n## Investigation Steps\n")
+        for f in self.findings:
+            parts.append(
+                f"### Step {f.iteration}: {f.action}\n"
+                f"**Hypothesis**: {f.hypothesis}\n"
+                f"**Arguments**: {json.dumps(f.arguments, default=str)}\n"
+                f"**Result**: {f.result_summary}\n"
+                f"**Interpretation**: {f.interpretation}\n"
+                f"**Confidence**: {f.confidence:.0%}\n"
+            )
+        # Hypothesis outcomes (for exploratory mode)
+        if self.hypotheses:
+            parts.append("\n## Hypothesis Outcomes\n")
+            for h in self.hypotheses:
+                status_emoji = {
+                    "supported": "✅",
+                    "refuted": "❌",
+                    "inconclusive": "❓",
+                    "testing": "🔄",
+                    "untested": "⬜"
+                }.get(h.status, "⬜")
+                parts.append(f"{status_emoji} **{h.text}** → {h.status}")
+                if h.evidence_for:
+                    parts.append(f"  Evidence for: {'; '.join(h.evidence_for)}")
+                if h.evidence_against:
+                    parts.append(f"  Evidence against: {'; '.join(h.evidence_against)}")
+        return "\n".join(parts)
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize for API response / session storage."""
+        return {
+            "question": self.question,
+            "mode": self.mode,
+            "iteration_count": self.iteration_count,
+            "is_answered": self.is_answered,
+            "answer_confidence": self.answer_confidence,
+            "tools_used": self.tools_used,
+            "files_produced": self.files_produced,
+            "findings": [f.to_dict() for f in self.findings],
+            "hypotheses": [h.to_dict() for h in self.hypotheses],
+            "started_at": self.started_at
+        }

src/reasoning/reasoner.py ADDED Viewed

	@@ -0,0 +1,344 @@

+"""
+Reasoner Module - The REASON step of the Reasoning Loop.
+Decides what to investigate next based on:
+- The user's original question
+- What we've discovered so far (findings)
+- Available tools
+- Dataset schema
+The Reasoner does NOT execute anything. It only produces a structured
+decision about what action to take next.
+Architecture:
+    Reasoner.reason() → ReasoningOutput
+        - status: "investigating" | "done"
+        - reasoning: why this action (decision ledger entry)
+        - tool_name: which tool to run
+        - arguments: tool arguments
+        - hypothesis: what we're testing
+This replaces the old approach where a massive system prompt told the LLM
+"follow steps 1-15." Instead, the Reasoner makes a strategic decision
+each iteration based on what it's learned so far.
+"""
+import json
+import re
+from dataclasses import dataclass, field
+from typing import Dict, Any, List, Optional, Callable
+from .findings import FindingsAccumulator
+@dataclass
+class ReasoningOutput:
+    """Output from one REASON step."""
+    status: str                    # "investigating" or "done"
+    reasoning: str                 # Why this action was chosen
+    tool_name: Optional[str]       # Tool to execute (None if done)
+    arguments: Dict[str, Any]      # Tool arguments
+    hypothesis: str                # What we're testing with this action
+    confidence: float = 0.0        # How confident the reasoner is (0-1)
+    @classmethod
+    def done(cls, reasoning: str, confidence: float = 0.8) -> "ReasoningOutput":
+        """Create a 'done' output (no more investigation needed)."""
+        return cls(
+            status="done",
+            reasoning=reasoning,
+            tool_name=None,
+            arguments={},
+            hypothesis="",
+            confidence=confidence
+        )
+# System prompt for the Reasoner LLM call
+REASONER_SYSTEM_PROMPT = """You are a senior data scientist. Your job is to decide the SINGLE MOST IMPORTANT next investigation step.
+You are given:
+1. The user's question
+2. What has been discovered so far
+3. The dataset schema
+4. Available tools
+Your task: Decide ONE action to take next. Be strategic:
+- Start with understanding (profiling, correlations) before acting
+- Test the most impactful hypothesis first
+- Don't repeat actions that have already been done
+- Stop when you have enough evidence to answer the question confidently
+CRITICAL RULES:
+- Output ONLY valid JSON, no other text
+- Use EXACT tool names from the available tools list
+- Use EXACT column names from the dataset schema
+- The file_path argument should use the most recent output file when available
+- For visualization, pick the chart type that best answers the question
+- NEVER hallucinate column names - use only columns from the schema"""
+REASONER_USER_TEMPLATE = """**User's question**: {question}
+**Dataset info**:
+- File: {file_path}
+- Rows: {num_rows:,} | Columns: {num_columns}
+- Numeric columns: {numeric_columns}
+- Categorical columns: {categorical_columns}
+{target_info}
+**Investigation so far**:
+{findings_context}
+**Available tools**:
+{tools_description}
+Decide the next action. Respond with ONLY this JSON:
+{{
+  "status": "investigating" or "done",
+  "reasoning": "1-2 sentence explanation of why this action is needed",
+  "tool_name": "exact_tool_name",
+  "arguments": {{"arg1": "value1", "arg2": "value2"}},
+  "hypothesis": "what we expect to learn from this action"
+}}
+If you have enough evidence to answer the user's question, respond:
+{{
+  "status": "done",
+  "reasoning": "We have sufficient evidence because...",
+  "tool_name": null,
+  "arguments": {{}},
+  "hypothesis": ""
+}}"""
+# System prompt for generating hypotheses (Exploratory mode)
+HYPOTHESIS_SYSTEM_PROMPT = """You are a senior data scientist examining a dataset for the first time.
+Given the dataset profile, generate 3-5 hypotheses worth investigating.
+Focus on:
+- Surprising patterns (unexpected correlations, outliers)
+- Business-relevant relationships (what drives the target variable?)
+- Data quality issues that could affect analysis
+- Distribution anomalies
+Output ONLY valid JSON array of hypotheses, ranked by priority (most interesting first)."""
+HYPOTHESIS_USER_TEMPLATE = """**Dataset**: {file_path}
+- Rows: {num_rows:,} | Columns: {num_columns}
+- Numeric: {numeric_columns}
+- Categorical: {categorical_columns}
+{target_info}
+{profile_summary}
+Generate hypotheses as JSON:
+[
+  {{"text": "hypothesis description", "priority": 0.9, "suggested_tool": "tool_name"}},
+  ...
+]"""
+class Reasoner:
+    """
+    The REASON step of the Reasoning Loop.
+    Makes a strategic decision about what to investigate next,
+    based on the user's question and accumulated findings.
+    Usage:
+        reasoner = Reasoner(llm_caller=orchestrator._llm_text_call)
+        output = reasoner.reason(
+            question="Why are customers churning?",
+            dataset_info=schema_info,
+            findings=findings_accumulator,
+            available_tools=tools_description,
+            file_path="data.csv"
+        )
+        if output.status == "investigating":
+            result = execute_tool(output.tool_name, output.arguments)
+        else:
+            # Done investigating, synthesize answer
+            ...
+    """
+    def __init__(self, llm_caller: Callable):
+        """
+        Args:
+            llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
+                        Wraps the orchestrator's provider-specific LLM call.
+        """
+        self.llm_caller = llm_caller
+    def reason(
+        self,
+        question: str,
+        dataset_info: Dict[str, Any],
+        findings: FindingsAccumulator,
+        available_tools: str,
+        file_path: str,
+        target_col: Optional[str] = None
+    ) -> ReasoningOutput:
+        """
+        Decide the next investigation step.
+        Args:
+            question: User's original question
+            dataset_info: Dataset schema (columns, types, stats)
+            findings: Accumulated findings from previous iterations
+            available_tools: Text description of available tools
+            file_path: Current file path (latest output or original)
+            target_col: Optional target column
+        Returns:
+            ReasoningOutput with the next action to take
+        """
+        # Build the user prompt
+        numeric_cols = dataset_info.get("numeric_columns", [])
+        categorical_cols = dataset_info.get("categorical_columns", [])
+        target_info = ""
+        if target_col:
+            target_info = f"- Target column: '{target_col}'"
+        user_prompt = REASONER_USER_TEMPLATE.format(
+            question=question,
+            file_path=file_path,
+            num_rows=dataset_info.get("num_rows", 0),
+            num_columns=dataset_info.get("num_columns", 0),
+            numeric_columns=", ".join([f"'{c}'" for c in numeric_cols[:15]]),
+            categorical_columns=", ".join([f"'{c}'" for c in categorical_cols[:15]]),
+            target_info=target_info,
+            findings_context=findings.get_context_for_reasoning(),
+            tools_description=available_tools
+        )
+        # Call LLM
+        response_text = self.llm_caller(
+            system_prompt=REASONER_SYSTEM_PROMPT,
+            user_prompt=user_prompt,
+            max_tokens=1024
+        )
+        # Parse response
+        return self._parse_response(response_text, file_path)
+    def generate_hypotheses(
+        self,
+        dataset_info: Dict[str, Any],
+        file_path: str,
+        target_col: Optional[str] = None,
+        profile_summary: str = ""
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate hypotheses for exploratory analysis.
+        Called at the start of Exploratory mode to seed the
+        reasoning loop with interesting questions to investigate.
+        Args:
+            dataset_info: Dataset schema
+            file_path: Path to dataset
+            target_col: Optional target column
+            profile_summary: Optional profiling results summary
+        Returns:
+            List of hypothesis dicts with text, priority, suggested_tool
+        """
+        numeric_cols = dataset_info.get("numeric_columns", [])
+        categorical_cols = dataset_info.get("categorical_columns", [])
+        target_info = ""
+        if target_col:
+            target_info = f"- Target column: '{target_col}'"
+        user_prompt = HYPOTHESIS_USER_TEMPLATE.format(
+            file_path=file_path,
+            num_rows=dataset_info.get("num_rows", 0),
+            num_columns=dataset_info.get("num_columns", 0),
+            numeric_columns=", ".join([f"'{c}'" for c in numeric_cols[:15]]),
+            categorical_columns=", ".join([f"'{c}'" for c in categorical_cols[:15]]),
+            target_info=target_info,
+            profile_summary=profile_summary or "No profile available yet."
+        )
+        response_text = self.llm_caller(
+            system_prompt=HYPOTHESIS_SYSTEM_PROMPT,
+            user_prompt=user_prompt,
+            max_tokens=1024
+        )
+        return self._parse_hypotheses(response_text)
+    def _parse_response(self, response_text: str, file_path: str) -> ReasoningOutput:
+        """Parse LLM response into ReasoningOutput."""
+        try:
+            # Try direct JSON parse
+            data = json.loads(response_text.strip())
+        except json.JSONDecodeError:
+            # Try to extract JSON from markdown/text
+            json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
+            if json_match:
+                try:
+                    data = json.loads(json_match.group(0))
+                except json.JSONDecodeError:
+                    # Fallback: return a profiling action
+                    return ReasoningOutput(
+                        status="investigating",
+                        reasoning="Could not parse LLM response, defaulting to profiling",
+                        tool_name="profile_dataset",
+                        arguments={"file_path": file_path},
+                        hypothesis="Understanding the data structure first"
+                    )
+            else:
+                return ReasoningOutput(
+                    status="investigating",
+                    reasoning="Could not parse LLM response, defaulting to profiling",
+                    tool_name="profile_dataset",
+                    arguments={"file_path": file_path},
+                    hypothesis="Understanding the data structure first"
+                )
+        status = data.get("status", "investigating")
+        tool_name = data.get("tool_name")
+        arguments = data.get("arguments", {})
+        # Ensure file_path is in arguments if tool needs it
+        if tool_name and "file_path" not in arguments and tool_name not in [
+            "execute_python_code", "get_smart_summary"
+        ]:
+            arguments["file_path"] = file_path
+        return ReasoningOutput(
+            status=status,
+            reasoning=data.get("reasoning", ""),
+            tool_name=tool_name if status == "investigating" else None,
+            arguments=arguments,
+            hypothesis=data.get("hypothesis", ""),
+            confidence=data.get("confidence", 0.5)
+        )
+    def _parse_hypotheses(self, response_text: str) -> List[Dict[str, Any]]:
+        """Parse hypothesis generation response."""
+        try:
+            data = json.loads(response_text.strip())
+            if isinstance(data, list):
+                return data
+        except json.JSONDecodeError:
+            pass
+        # Try to extract JSON array
+        array_match = re.search(r'\[.*\]', response_text, re.DOTALL)
+        if array_match:
+            try:
+                data = json.loads(array_match.group(0))
+                if isinstance(data, list):
+                    return data
+            except json.JSONDecodeError:
+                pass
+        # Fallback: generate basic hypotheses
+        return [
+            {"text": "What are the key statistical properties of this dataset?", "priority": 0.9, "suggested_tool": "profile_dataset"},
+            {"text": "Are there any significant correlations between variables?", "priority": 0.8, "suggested_tool": "analyze_correlations"},
+            {"text": "What does the distribution of key variables look like?", "priority": 0.7, "suggested_tool": "generate_eda_plots"}
+        ]

src/reasoning/synthesizer.py ADDED Viewed

	@@ -0,0 +1,195 @@

+"""
+Synthesizer Module - The SYNTHESIZE step of the Reasoning Loop.
+Takes all accumulated findings and produces a coherent, narrative answer.
+Unlike the old approach (where the LLM's last response WAS the summary),
+the Synthesizer deliberately constructs the answer from evidence:
+- Connects findings into a coherent story
+- Cites evidence for each claim
+- Highlights confidence levels
+- Notes what wasn't investigated (limitations)
+- Produces actionable insights, not just numbers
+Architecture:
+    FindingsAccumulator → Synthesizer.synthesize() → Markdown narrative
+"""
+import json
+from typing import Dict, Any, List, Optional, Callable
+from .findings import FindingsAccumulator
+SYNTHESIS_SYSTEM_PROMPT = """You are a senior data scientist writing a concise analysis report.
+Given the investigation findings, synthesize a clear, evidence-based answer to the user's question.
+STRUCTURE (use markdown):
+1. **Executive Summary** (2-3 sentences answering the question directly)
+2. **Key Findings** (bullet points with evidence references)
+3. **Supporting Evidence** (specific metrics, correlations, patterns)
+4. **Visualizations** (mention any plots/charts generated, with file paths)
+5. **Limitations & Caveats** (what we didn't investigate, caveats)
+6. **Recommendations** (actionable next steps)
+RULES:
+- Lead with the answer, then show evidence
+- Use specific numbers (not "high correlation" but "r=0.72")
+- Mention generated files/plots so user can find them
+- Be honest about confidence levels
+- Keep it under 500 words unless complex analysis warrants more
+- Use markdown formatting (headers, bullets, bold for emphasis)"""
+SYNTHESIS_USER_TEMPLATE = """**Original question**: {question}
+**Investigation summary**:
+{findings_context}
+**Generated artifacts**:
+{artifacts_summary}
+Write the analysis report now. Focus on answering the question with evidence from the investigation."""
+class Synthesizer:
+    """
+    The SYNTHESIZE step of the Reasoning Loop.
+    Produces the final answer from accumulated evidence.
+    Usage:
+        synthesizer = Synthesizer(llm_caller=orchestrator._llm_text_call)
+        report = synthesizer.synthesize(
+            findings=findings_accumulator,
+            artifacts={"plots": [...], "files": [...]}
+        )
+    """
+    def __init__(self, llm_caller: Callable):
+        """
+        Args:
+            llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
+        """
+        self.llm_caller = llm_caller
+    def synthesize(
+        self,
+        findings: FindingsAccumulator,
+        artifacts: Optional[Dict[str, Any]] = None,
+        max_tokens: int = 3000
+    ) -> str:
+        """
+        Synthesize all findings into a coherent answer.
+        Args:
+            findings: Accumulated findings from the reasoning loop
+            artifacts: Optional dict of generated artifacts (plots, files, models)
+            max_tokens: Max tokens for synthesis response
+        Returns:
+            Markdown-formatted analysis report
+        """
+        # Build artifacts summary
+        artifacts_summary = self._format_artifacts(artifacts or {}, findings)
+        user_prompt = SYNTHESIS_USER_TEMPLATE.format(
+            question=findings.question,
+            findings_context=findings.get_context_for_synthesis(),
+            artifacts_summary=artifacts_summary
+        )
+        response = self.llm_caller(
+            system_prompt=SYNTHESIS_SYSTEM_PROMPT,
+            user_prompt=user_prompt,
+            max_tokens=max_tokens
+        )
+        return response.strip()
+    def synthesize_exploratory(
+        self,
+        findings: FindingsAccumulator,
+        artifacts: Optional[Dict[str, Any]] = None,
+        max_tokens: int = 3000
+    ) -> str:
+        """
+        Synthesize findings from exploratory analysis (no specific question).
+        Uses a different prompt that focuses on discovering patterns
+        rather than answering a specific question.
+        """
+        exploratory_system = """You are a senior data scientist presenting exploratory analysis results.
+The user asked for a general analysis. Present the most interesting discoveries.
+STRUCTURE (use markdown):
+1. **Dataset Overview** (size, structure, key characteristics)
+2. **Most Interesting Discoveries** (ranked by insight value)
+3. **Key Patterns & Relationships** (correlations, distributions, trends)
+4. **Data Quality Notes** (missing data, outliers, issues found)
+5. **Visualizations Generated** (list with descriptions)
+6. **Recommended Next Steps** (what to investigate deeper)
+RULES:
+- Lead with the most surprising/important finding
+- Use specific numbers and metrics
+- Mention all generated visualizations with file paths
+- Suggest actionable next analysis steps
+- Keep it engaging but data-driven"""
+        artifacts_summary = self._format_artifacts(artifacts or {}, findings)
+        user_prompt = f"""**Analysis request**: {findings.question}
+**Investigation summary**:
+{findings.get_context_for_synthesis()}
+**Generated artifacts**:
+{artifacts_summary}
+Write the exploratory analysis report."""
+        response = self.llm_caller(
+            system_prompt=exploratory_system,
+            user_prompt=user_prompt,
+            max_tokens=max_tokens
+        )
+        return response.strip()
+    def _format_artifacts(self, artifacts: Dict[str, Any], findings: FindingsAccumulator) -> str:
+        """Format artifacts for the synthesis prompt."""
+        parts = []
+        # Extract plots from findings
+        plots = artifacts.get("plots", [])
+        if plots:
+            parts.append("**Plots generated**:")
+            for plot in plots:
+                if isinstance(plot, dict):
+                    parts.append(f"  - {plot.get('title', 'Plot')}: {plot.get('url', plot.get('path', 'N/A'))}")
+                else:
+                    parts.append(f"  - {plot}")
+        # Extract files from findings
+        files = artifacts.get("files", [])
+        if files:
+            parts.append("**Output files**:")
+            for f in files:
+                parts.append(f"  - {f}")
+        # Extract from findings history
+        for finding in findings.findings:
+            result = finding.result_summary
+            if "output_file" in result or "output_path" in result or ".html" in result or ".png" in result:
+                parts.append(f"  - Step {finding.iteration} ({finding.action}): output in result")
+        # Tools used summary
+        if findings.tools_used:
+            parts.append(f"\n**Tools used**: {', '.join(findings.tools_used)}")
+        if not parts:
+            return "No artifacts generated yet."
+        return "\n".join(parts)

src/routing/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""
+Routing Module - Intent Classification and Request Routing.
+Determines how the orchestrator should handle a user request:
+- Direct: SBERT routing → tool execution (existing pipeline)
+- Investigative: Reasoning loop with hypothesis testing
+- Exploratory: Auto-hypothesis generation → reasoning loop
+"""
+from .intent_classifier import IntentClassifier, IntentResult
+__all__ = ["IntentClassifier", "IntentResult"]

src/routing/intent_classifier.py ADDED Viewed

	@@ -0,0 +1,249 @@

+"""
+Intent Classifier - Determines execution mode for the Reasoning Loop.
+Three execution modes:
+1. DIRECT: "Make a scatter plot" → SBERT routing → tool → done
+   - Clear, specific command with obvious tool mapping
+   - No reasoning loop needed
+2. INVESTIGATIVE: "Why are customers churning?" → reasoning loop
+   - Analytical question requiring hypothesis testing
+   - Reasoning loop drives tool selection
+3. EXPLORATORY: "Analyze this data" → auto-hypothesis → reasoning loop
+   - Open-ended request with no specific question
+   - First profiles data, generates hypotheses, then investigates
+The classifier uses keyword patterns + semantic features to decide.
+This is a lightweight classification (no LLM call needed).
+"""
+import re
+from typing import Optional, Dict, Any, Tuple
+from dataclasses import dataclass
+@dataclass
+class IntentResult:
+    """Result of intent classification."""
+    mode: str                      # "direct", "investigative", "exploratory"
+    confidence: float              # 0.0-1.0
+    reasoning: str                 # Why this mode was chosen
+    sub_intent: Optional[str]      # More specific intent (e.g., "visualization", "cleaning")
+# Patterns that indicate DIRECT mode (specific tool commands)
+DIRECT_PATTERNS = [
+    # Visualization commands
+    (r"\b(make|create|generate|build|show|draw|plot)\b.*(scatter|histogram|heatmap|box\s*plot|bar\s*chart|pie\s*chart|line\s*chart|dashboard|time\s*series)", "visualization"),
+    (r"\b(scatter|histogram|heatmap|boxplot|bar\s*chart)\b.*\b(of|for|between|showing)\b", "visualization"),
+    # Data cleaning commands
+    (r"\b(clean|remove|drop|fill|impute|handle)\b.*(missing|null|nan|outlier|duplicate)", "cleaning"),
+    (r"\b(fix|convert|change)\b.*(data\s*type|dtype|column\s*type)", "cleaning"),
+    # Feature engineering commands
+    (r"\b(create|add|extract|generate)\b.*(feature|time\s*feature|interaction|encoding)", "feature_engineering"),
+    (r"\b(encode|one-hot|label\s*encode|ordinal)\b.*\b(categorical|column)", "feature_engineering"),
+    # Model training commands
+    (r"\b(train|build|fit|run)\b.*(model|classifier|regressor|baseline|xgboost|random\s*forest)", "training"),
+    (r"\b(tune|optimize)\b.*\b(hyperparameter|model|parameter)", "training"),
+    (r"\b(cross[\s-]?valid)", "training"),
+    # Profiling commands
+    (r"\b(profile|describe|summarize)\b.*\b(dataset|data|table|file)", "profiling"),
+    (r"\b(data\s*quality|quality\s*check|check\s*quality)", "profiling"),
+    # Report generation
+    (r"\b(generate|create|build)\b.*\b(report|eda\s*report|profiling\s*report)", "reporting"),
+]
+# Patterns that indicate INVESTIGATIVE mode (analytical questions)
+INVESTIGATIVE_PATTERNS = [
+    # Causal / explanatory questions
+    (r"\bwhy\b.*(are|is|do|does|did)\b", "causal"),
+    (r"\bwhat\b.*(cause|driv|factor|reason|explain|lead)", "causal"),
+    (r"\bwhat\b.*(affect|impact|influence|determine)", "causal"),
+    # Relationship / correlation questions
+    (r"\bhow\b.*(does|do|is|are)\b.*\b(relate|correlat|affect|impact|change|vary)", "relationship"),
+    (r"\b(relationship|correlation|association)\b.*\bbetween\b", "relationship"),
+    # Comparison questions
+    (r"\b(differ|compar|contrast)\b.*\bbetween\b", "comparison"),
+    (r"\bwhich\b.*(better|worse|higher|lower|more|less|best|worst)", "comparison"),
+    # Pattern / trend questions
+    (r"\b(pattern|trend|anomal|outlier|unusual|interesting)\b", "pattern"),
+    (r"\bis\s+there\b.*(pattern|trend|relationship|correlation|difference)", "pattern"),
+    # Prediction-oriented questions (but NOT direct "train a model" commands)
+    (r"\bcan\s+(we|i|you)\b.*(predict|forecast|estimate|determine)", "predictive"),
+    (r"\bwhat\b.*(predict|forecast|expect|happen)", "predictive"),
+    # Segmentation / grouping questions
+    (r"\b(segment|group|cluster|categori)\b", "segmentation"),
+    (r"\bwhat\b.*(type|kind|group|segment)\b.*\b(customer|user|product)", "segmentation"),
+]
+# Patterns that indicate EXPLORATORY mode (open-ended requests)
+EXPLORATORY_PATTERNS = [
+    (r"^analyze\b.*\b(this|the|my)\b.*\b(data|dataset|file|csv)", "general_analysis"),
+    (r"^(tell|show)\b.*\b(me|us)\b.*\b(about|everything|what)", "general_analysis"),
+    (r"^(explore|investigate|examine|look\s*(at|into))\b.*\b(this|the|my)\b", "general_analysis"),
+    (r"^what\b.*\b(can|do)\b.*\b(you|we)\b.*\b(find|learn|discover|see)", "general_analysis"),
+    (r"^(give|provide)\b.*\b(overview|summary|insight|analysis)", "general_analysis"),
+    (r"^(run|do|perform)\b.*\b(full|complete|comprehensive|end.to.end)\b.*\b(analysis|pipeline|workflow)", "full_pipeline"),
+    (r"^(find|discover|uncover)\b.*\b(insight|pattern|trend|interesting)", "general_analysis"),
+]
+class IntentClassifier:
+    """
+    Classifies user intent into one of three execution modes.
+    Uses pattern matching (no LLM call needed) for fast classification.
+    Falls back to heuristics when patterns don't match.
+    Usage:
+        classifier = IntentClassifier()
+        result = classifier.classify("Why are customers churning?")
+        # IntentResult(mode="investigative", confidence=0.9, ...)
+        result = classifier.classify("Make a scatter plot of age vs income")
+        # IntentResult(mode="direct", confidence=0.95, ...)
+        result = classifier.classify("Analyze this dataset")
+        # IntentResult(mode="exploratory", confidence=0.85, ...)
+    """
+    def classify(
+        self,
+        query: str,
+        dataset_info: Optional[Dict[str, Any]] = None,
+        has_target_col: bool = False
+    ) -> IntentResult:
+        """
+        Classify user intent into execution mode.
+        Args:
+            query: User's natural language query
+            dataset_info: Optional dataset schema info
+            has_target_col: Whether user provided a target column
+        Returns:
+            IntentResult with mode, confidence, and reasoning
+        """
+        query_lower = query.lower().strip()
+        # Phase 1: Check for DIRECT patterns (strongest evidence)
+        direct_match = self._match_patterns(query_lower, DIRECT_PATTERNS)
+        if direct_match:
+            pattern, sub_intent = direct_match
+            return IntentResult(
+                mode="direct",
+                confidence=0.90,
+                reasoning=f"Direct command detected: {sub_intent} (pattern: {pattern[:50]})",
+                sub_intent=sub_intent
+            )
+        # Phase 2: Check for INVESTIGATIVE patterns
+        invest_match = self._match_patterns(query_lower, INVESTIGATIVE_PATTERNS)
+        if invest_match:
+            pattern, sub_intent = invest_match
+            return IntentResult(
+                mode="investigative",
+                confidence=0.85,
+                reasoning=f"Analytical question detected: {sub_intent}",
+                sub_intent=sub_intent
+            )
+        # Phase 3: Check for EXPLORATORY patterns
+        explore_match = self._match_patterns(query_lower, EXPLORATORY_PATTERNS)
+        if explore_match:
+            pattern, sub_intent = explore_match
+            # Special case: "full pipeline" with target col → direct ML pipeline
+            if sub_intent == "full_pipeline" and has_target_col:
+                return IntentResult(
+                    mode="direct",
+                    confidence=0.85,
+                    reasoning="Full ML pipeline requested with target column",
+                    sub_intent="full_ml_pipeline"
+                )
+            return IntentResult(
+                mode="exploratory",
+                confidence=0.80,
+                reasoning=f"Open-ended analysis request: {sub_intent}",
+                sub_intent=sub_intent
+            )
+        # Phase 4: Heuristic fallback
+        return self._heuristic_classify(query_lower, has_target_col)
+    def _match_patterns(self, query: str, patterns: list) -> Optional[Tuple[str, str]]:
+        """Try to match query against a list of (pattern, sub_intent) tuples."""
+        for pattern, sub_intent in patterns:
+            if re.search(pattern, query, re.IGNORECASE):
+                return (pattern, sub_intent)
+        return None
+    def _heuristic_classify(self, query: str, has_target_col: bool) -> IntentResult:
+        """Fallback classification using simple heuristics."""
+        # Question words → investigative
+        if query.startswith(("why", "how", "what", "which", "is there", "are there", "does", "do")):
+            return IntentResult(
+                mode="investigative",
+                confidence=0.60,
+                reasoning="Query starts with question word, likely analytical",
+                sub_intent="general_question"
+            )
+        # Very short queries → likely direct commands
+        word_count = len(query.split())
+        if word_count <= 5:
+            return IntentResult(
+                mode="direct",
+                confidence=0.55,
+                reasoning="Short query, likely a direct command",
+                sub_intent="short_command"
+            )
+        # Has target column + action verbs → direct ML pipeline
+        if has_target_col and any(w in query for w in ["predict", "train", "model", "classify", "regression"]):
+            return IntentResult(
+                mode="direct",
+                confidence=0.75,
+                reasoning="Target column provided with ML action verb",
+                sub_intent="ml_pipeline"
+            )
+        # Default: exploratory (safest default for data science)
+        return IntentResult(
+            mode="exploratory",
+            confidence=0.40,
+            reasoning="No strong pattern match, defaulting to exploratory analysis",
+            sub_intent="default"
+        )
+    @staticmethod
+    def is_follow_up(query: str) -> bool:
+        """
+        Detect if this is a follow-up question (uses context from previous analysis).
+        Follow-ups should generally be INVESTIGATIVE (they're asking about
+        something specific in the context of previous results).
+        """
+        follow_up_patterns = [
+            r"^(now|next|also|and|then)\b",
+            r"\b(the same|that|this|those|these)\b.*\b(data|model|result|plot|chart)",
+            r"\b(more|another|different)\b.*\b(plot|chart|analysis|model)",
+            r"\b(what about|how about|can you also)\b",
+            r"\b(using|with)\b.*\b(the same|that|this)\b",
+        ]
+        query_lower = query.lower().strip()
+        return any(re.search(p, query_lower) for p in follow_up_patterns)