import React, { useState, useRef, useEffect } from 'react'; import { motion, AnimatePresence } from 'framer-motion'; import { Send, Plus, Search, Settings, MoreHorizontal, User, Bot, ArrowLeft, Paperclip, Sparkles, Trash2, X, Upload, Package, FileText, BarChart3, ChevronRight } from 'lucide-react'; import { cn } from '../lib/utils'; import { Logo } from './Logo'; import ReactMarkdown from 'react-markdown'; interface Message { id: string; role: 'user' | 'assistant'; content: string; timestamp: Date; file?: { name: string; size: number; }; reports?: Array<{ name: string; path: string; }>; plots?: Array<{ title: string; url: string; type?: 'image' | 'html'; }>; } interface ChatSession { id: string; title: string; messages: Message[]; updatedAt: Date; } export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => { const [sessions, setSessions] = useState([ { id: '1', title: 'ML Model Analysis', messages: [], updatedAt: new Date(), } ]); const [activeSessionId, setActiveSessionId] = useState('1'); // Start with default session, update to UUID after first API call const [input, setInput] = useState(''); const [isTyping, setIsTyping] = useState(false); const [currentStep, setCurrentStep] = useState(''); const [uploadedFile, setUploadedFile] = useState(null); const [reportModalUrl, setReportModalUrl] = useState(null); const [reportModalTitle, setReportModalTitle] = useState('Visualization'); const [showAssets, setShowAssets] = useState(false); const fileInputRef = useRef(null); const scrollRef = useRef(null); const eventSourceRef = useRef(null); const processedAnalysisRef = useRef>(new Set()); // Track processed analysis_complete events const activeSession = sessions.find(s => s.id === activeSessionId) || sessions[0]; useEffect(() => { if (scrollRef.current) { scrollRef.current.scrollTop = scrollRef.current.scrollHeight; } }, [activeSession.messages, isTyping]); // Clear uploaded file when switching sessions useEffect(() => { setUploadedFile(null); if (fileInputRef.current) { fileInputRef.current.value = ''; } }, [activeSessionId]); // Connect to SSE when we receive a valid backend UUID useEffect(() => { // Only connect if we have a backend UUID (contains hyphens) if (!activeSessionId || !activeSessionId.includes('-')) { return; } // Check if we need a new connection for this session // Close old connection if it exists and belongs to a different session or is closed if (eventSourceRef.current) { const currentSource = eventSourceRef.current; // If readyState is CLOSED (2), we need a new connection // If it's CONNECTING (0) or OPEN (1) for the same session, reuse it if (currentSource.readyState === 2) { console.log('๐Ÿ”„ Existing connection closed, creating new one'); currentSource.close(); eventSourceRef.current = null; } else { console.log('โ™ป๏ธ Reusing existing SSE connection'); return; } } // Connect to SSE stream - will receive history + any new events const API_URL = window.location.origin; console.log(`๐Ÿ”Œ Connecting SSE to session: ${activeSessionId}`); const eventSource = new EventSource(`${API_URL}/api/progress/stream/${activeSessionId}`); eventSource.onopen = () => { console.log('โœ… SSE connection established'); }; // Handle all incoming messages eventSource.onmessage = (e) => { console.log('๐Ÿ“จ SSE received:', e.data); try { const data = JSON.parse(e.data); // Handle different event types if (data.type === 'connected') { console.log('๐Ÿ”— Connected to progress stream'); } else if (data.type === 'tool_executing') { setCurrentStep(data.message || `๐Ÿ”ง Executing: ${data.tool}`); } else if (data.type === 'tool_completed') { setCurrentStep(data.message || `โœ“ Completed: ${data.tool}`); } else if (data.type === 'tool_failed') { setCurrentStep(data.message || `โŒ Failed: ${data.tool}`); } else if (data.type === 'token_update') { // Optional: Display token budget updates console.log('๐Ÿ’ฐ Token update:', data.message); } else if (data.type === 'analysis_complete') { console.log('โœ… Analysis completed', data.result); setIsTyping(false); // Create a unique key based on actual workflow content to prevent duplicates // Use the last tool executed + summary hash for uniqueness const lastTool = data.result?.workflow_history?.[data.result.workflow_history.length - 1]?.tool || 'unknown'; const summarySnippet = (data.result?.summary || '').substring(0, 50); const resultKey = `${activeSessionId}-${lastTool}-${summarySnippet}`; // Only process if we haven't seen this exact result before if (!processedAnalysisRef.current.has(resultKey)) { console.log('๐Ÿ†• New analysis result, processing...', resultKey); processedAnalysisRef.current.add(resultKey); // Process the final result with the current session ID if (data.result) { processAnalysisResult(data.result, activeSessionId); } } else { console.log('โญ๏ธ Skipping duplicate analysis result', resultKey); } } } catch (err) { console.error('โŒ Error parsing SSE event:', err, e.data); } }; // Handle errors - DON'T immediately close, just log eventSource.onerror = (err) => { console.error('โŒ SSE connection error/closed:', err); // Don't close here - let it reconnect naturally on next request // The readyState check above will handle creating a new connection if needed }; eventSourceRef.current = eventSource; // Cleanup on unmount or session change return () => { if (eventSourceRef.current) { console.log('๐Ÿงน Cleaning up SSE connection'); eventSourceRef.current.close(); eventSourceRef.current = null; } }; }, [activeSessionId]); const processAnalysisResult = (result: any, sessionId: string) => { // Extract and display the analysis result from SSE let assistantContent = 'โœ… Analysis Complete!\n\n'; let reports: Array<{name: string, path: string}> = []; let plots: Array<{title: string, url: string, type?: 'image' | 'html'}> = []; // PRIORITY 1: Extract plots from main result.plots array (backend enhanced summary) if (result.plots && Array.isArray(result.plots)) { result.plots.forEach((plot: any) => { plots.push({ title: plot.title || 'Visualization', url: plot.url || plot.path, type: plot.type || (plot.url?.endsWith('.html') ? 'html' : 'image') }); }); } // PRIORITY 2: Extract plots and reports from workflow_history (for backward compatibility) if (result.workflow_history) { const reportTools = ['generate_ydata_profiling_report', 'generate_plotly_dashboard', 'generate_all_plots']; const plotTools = [ 'generate_interactive_correlation_heatmap', 'generate_interactive_scatter', 'generate_interactive_histogram', 'generate_interactive_box_plots', 'generate_interactive_time_series', 'generate_eda_plots', 'generate_data_quality_plots', 'analyze_correlations' ]; result.workflow_history.forEach((step: any) => { if (reportTools.includes(step.tool)) { const reportPath = step.result?.output_path || step.result?.report_path || step.arguments?.output_path; if (reportPath && (step.result?.success !== false)) { reports.push({ name: step.tool.replace('generate_', '').replace(/_/g, ' ').trim(), path: reportPath }); } } // Only extract from workflow if not already in result.plots if (plotTools.includes(step.tool) && step.result?.result?.output_path && plots.length === 0) { const outputPath = step.result.result.output_path; plots.push({ title: step.tool.replace('generate_', '').replace('interactive_', '').replace(/_/g, ' ').trim(), url: outputPath.startsWith('/') ? outputPath : `/outputs/${outputPath.replace('./outputs/', '')}`, type: outputPath.endsWith('.html') ? 'html' : 'image' }); } }); } if (reports.length > 0) { assistantContent += '๐Ÿ“Š **Generated Reports:**\n'; reports.forEach(r => assistantContent += `- ${r.name}\n`); assistantContent += '\n'; } if (plots.length > 0) { assistantContent += `๐Ÿ“ˆ **Generated ${plots.length} Visualizations**\n\n`; } // Extract summary from backend (field changed from final_answer to summary) const summaryText = result.summary || result.final_answer || 'Analysis complete. Check the generated artifacts.'; assistantContent += summaryText; // Add assistant message with result const assistantMessage: Message = { id: Date.now().toString(), role: 'assistant', content: assistantContent, timestamp: new Date(), reports, plots }; // Get current session and add message setSessions(prev => prev.map(s => { if (s.id === sessionId) { return { ...s, messages: [...s.messages, assistantMessage], updatedAt: new Date() }; } return s; })); }; const handleSend = async () => { if ((!input.trim() && !uploadedFile) || isTyping) return; const userMessage: Message = { id: Date.now().toString(), role: 'user', content: input || (uploadedFile ? `Uploaded: ${uploadedFile.name}` : ''), timestamp: new Date(), file: uploadedFile ? { name: uploadedFile.name, size: uploadedFile.size } : undefined, }; const newMessages = [...activeSession.messages, userMessage]; updateSession(activeSessionId, newMessages); setInput(''); // Show loading indicator immediately (for UI feedback) setIsTyping(true); try { // Use the current origin if running on same server, otherwise use env variable const API_URL = window.location.origin; console.log('API URL:', API_URL); let response; const sessionKey = activeSessionId || 'default'; // Check if there's a recent file analysis in the conversation const recentFileMessage = newMessages.slice(-5).find(m => m.file || m.content.includes('Uploaded:')); const hasRecentFile = recentFileMessage && !uploadedFile; if (uploadedFile || hasRecentFile) { // Use /run endpoint for file analysis or follow-up questions about uploaded data const formData = new FormData(); if (uploadedFile) { formData.append('file', uploadedFile); formData.append('task_description', input || 'Analyze this dataset and provide insights'); formData.append('session_id', sessionKey); // Add session_id for progress tracking } else if (hasRecentFile) { // For follow-up questions, extract the filename from recent context const fileNameMatch = recentFileMessage?.content.match(/Uploaded: (.+)/); const fileName = fileNameMatch ? fileNameMatch[1] : 'dataset.csv'; // Send follow-up request as a new task description formData.append('task_description', input); formData.append('session_id', sessionKey); // Use same session key // Note: Backend needs to support session-based file context // For now, just send the task which should work with session memory } formData.append('use_cache', 'true'); formData.append('max_iterations', '20'); response = await fetch(`${API_URL}/run-async`, { method: 'POST', body: formData }); setUploadedFile(null); } else { response = await fetch(`${API_URL}/chat`, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ messages: newMessages.map(m => ({ role: m.role, content: m.content })), stream: false }) }); } if (!response.ok) { throw new Error(`API error: ${response.status}`); } const data = await response.json(); // Store UUID from backend to trigger SSE connection if (data.session_id) { console.log(`๐Ÿ”‘ Session UUID from backend: ${data.session_id}`); const newSessionId = data.session_id; // CRITICAL: Update sessions first, then activeSessionId // React 18 batches these updates automatically, preventing flicker setSessions(prev => prev.map(s => s.id === activeSessionId ? { ...s, id: newSessionId } : s )); setActiveSessionId(newSessionId); } // For async endpoint, result comes via SSE analysis_complete event // For now, just wait for SSE to deliver the result if (data.status === 'started') { console.log('๐Ÿš€ Analysis started, waiting for SSE events...'); return; // Don't process result here, will come via SSE } // Legacy sync endpoint handling (if data.result exists) let assistantContent = ''; let reports: Array<{name: string, path: string}> = []; let plots: Array<{title: string, url: string, type?: 'image' | 'html'}> = []; // Check for reports in any /run endpoint response (not just when file is uploaded) if (data.result) { const result = data.result; assistantContent = `โœ… Analysis Complete!\n\n`; // Extract plots from workflow_history (PRIMARY SOURCE) if (result.workflow_history) { const reportTools = ['generate_ydata_profiling_report', 'generate_plotly_dashboard', 'generate_all_plots']; const plotTools = [ 'generate_interactive_correlation_heatmap', 'generate_interactive_scatter', 'generate_interactive_histogram', 'generate_interactive_box_plots', 'generate_interactive_time_series', 'generate_eda_plots', 'generate_data_quality_plots', 'analyze_correlations' ]; result.workflow_history.forEach((step: any) => { // Extract reports if (reportTools.includes(step.tool)) { const reportPath = step.result?.output_path || step.result?.report_path || step.arguments?.output_path; if (reportPath && (step.result?.success !== false)) { reports.push({ name: step.tool.replace('generate_', '').replace(/_/g, ' ').replace('report', '').trim(), path: reportPath }); } } // Extract plots if (plotTools.includes(step.tool)) { const plotPath = step.result?.output_path || step.arguments?.output_path; if (plotPath && (step.result?.success !== false)) { const plotTitle = step.tool .replace('generate_', '') .replace('interactive_', '') .replace(/_/g, ' ') .replace('plots', 'plot') .trim(); plots.push({ title: plotTitle.charAt(0).toUpperCase() + plotTitle.slice(1), url: plotPath.replace('./outputs/', '/outputs/'), type: plotPath.endsWith('.html') ? 'html' : 'image' }); } } }); } // Also check for report paths mentioned in the summary text if (result.summary && !reports.length) { const reportPathMatch = result.summary.match(/\.(\/outputs\/reports\/[^\s]+\.html)/); if (reportPathMatch) { reports.push({ name: 'ydata profiling', path: reportPathMatch[1] }); } } if (result.summary) { assistantContent += `**Summary:**\n${result.summary}\n\n`; } if (result.workflow_history && result.workflow_history.length > 0) { assistantContent += `**Tools Used:** ${result.workflow_history.length} steps\n\n`; assistantContent += `**Final Result:**\n${result.final_result || 'Analysis completed successfully'}`; } } else if (data.success && data.message) { assistantContent = data.message; } else { throw new Error('Invalid response from API'); } // Aggressive text cleaning to remove malformed content assistantContent = assistantContent // Remove broken markdown tables (lines with just | symbols) .replace(/^\s*\|\s*\|\s*$/gm, '') // Remove confusing phrases .replace(/Printed in logs \(see above\)/gi, '') .replace(/\(see above\)/gi, '') .replace(/see above/gi, '') // Remove broken table rows (just dashes and pipes) .replace(/^\s*[-|]+\s*$/gm, '') // Remove code block markers without content .replace(/```\s*```/g, '') // Remove empty markdown sections .replace(/\n{3,}/g, '\n\n') // Clean up broken table syntax .replace(/\|\s*\n\s*\|/g, '') .trim(); updateSession(activeSessionId, [...newMessages, { id: (Date.now() + 1).toString(), role: 'assistant', content: assistantContent, timestamp: new Date(), reports: reports.length > 0 ? reports : undefined, plots: plots.length > 0 ? plots : undefined }]); } catch (error: any) { console.error("Chat Error:", error); let errorMessage = "I'm sorry, I encountered an error processing your request."; if (error.message) { errorMessage += `\n\n**Error:** ${error.message}`; } // Try to parse response error try { const errorText = await error.text?.(); if (errorText) { const errorData = JSON.parse(errorText); if (errorData.detail) { errorMessage = `**Error:** ${typeof errorData.detail === 'string' ? errorData.detail : JSON.stringify(errorData.detail)}`; } } } catch (e) { // Ignore parsing errors } updateSession(activeSessionId, [...newMessages, { id: 'err-' + Date.now(), role: 'assistant', content: errorMessage, timestamp: new Date() }]); // On error, stop loading indicator setIsTyping(false); } // NOTE: No finally block - isTyping is set to false by SSE analysis_complete event }; const updateSession = (id: string, messages: Message[]) => { setSessions(prev => prev.map(s => { if (s.id === id) { return { ...s, messages, updatedAt: new Date() }; } return s; })); }; const createNewChat = () => { const newId = Date.now().toString(); const newSession: ChatSession = { id: newId, title: 'New Chat', messages: [], updatedAt: new Date() }; setSessions([newSession, ...sessions]); setActiveSessionId(newId); // Clear file upload state for new chat setUploadedFile(null); if (fileInputRef.current) { fileInputRef.current.value = ''; } }; const deleteSession = (e: React.MouseEvent, id: string) => { e.stopPropagation(); if (sessions.length === 1) return; setSessions(prev => prev.filter(s => s.id !== id)); if (activeSessionId === id) { setActiveSessionId(sessions.find(s => s.id !== id)?.id || ''); } }; const handleFileSelect = (e: React.ChangeEvent) => { const file = e.target.files?.[0]; if (file) { const validTypes = ['.csv', '.parquet']; const fileExt = file.name.substring(file.name.lastIndexOf('.')).toLowerCase(); if (validTypes.includes(fileExt)) { setUploadedFile(file); } else { alert('Please upload a CSV or Parquet file'); } } }; const removeFile = () => { setUploadedFile(null); if (fileInputRef.current) { fileInputRef.current.value = ''; } }; return (
{/* Sidebar */} {/* Main Chat Area */}
{/* Top Header */}

{activeSession.title}

{activeSession.messages.length} messages in session

{/* Message List */}
{activeSession.messages.length === 0 ? (

Welcome, Data Scientist

I'm your autonomous agent ready to profile data, train models, or build dashboards. Try uploading a dataset or describing your ML objective.

{[ "Profile my sales.csv", "Train a XGBoost classifier", "Generate a correlation heatmap", "Explain feature importance" ].map(prompt => ( ))}
) : ( activeSession.messages.map((msg) => (
{msg.role === 'user' ? : }
{msg.file && (
{msg.file.name} ({(msg.file.size / 1024).toFixed(1)} KB)
)} {msg.role === 'assistant' ? (

, ul: ({node, ...props}) =>

    , ol: ({node, ...props}) =>
      , li: ({node, ...props}) =>
    1. , strong: ({node, ...props}) => , code: ({node, inline, ...props}: any) => inline ? : }} > {msg.content || ''} ) : ( msg.content )} {msg.reports && msg.reports.length > 0 && (
      {msg.reports.map((report, idx) => { // Normalize the report path: remove leading ./ and ensure it starts with / const normalizedPath = report.path.replace(/^\.\//, '/'); return ( ); })}
      )} {msg.plots && msg.plots.length > 0 && ( <>
      ๐Ÿ“Š Generated Visualizations ({msg.plots.length})
      {msg.plots.map((plot, idx) => ( ))}
      )}
      {msg.timestamp.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}
)) )} {isTyping && (
{currentStep || '๐Ÿ”ง Starting analysis...'}
)}
{/* Input Bar */}
{uploadedFile && (
{uploadedFile.name}
)}