import React, { useMemo, useState, useRef, useCallback } from 'react'; import { useQuery } from '@tanstack/react-query'; import { Activity, Zap, Target, TrendingUp, Database, Cpu, Globe, Play, Pause, ChevronDown, ChevronRight, Terminal, Plug, Eye, Bot, X, Check, Layers, FileText, Plus, Info, Link, MessageSquare, Image as ImageIcon, FolderOpen, Trash2, AlertCircle, Download, Copy, Navigation, Search, Code, CheckCircle, XCircle, Clock, FileJson, Sparkles, Brain, Compass, Shield, type LucideIcon, } from 'lucide-react'; import { Badge } from '@/components/ui/Badge'; import { classNames } from '@/utils/helpers'; import { apiClient, type ScrapeStep, type ScrapeResponse, type ScrapeRequest } from '@/api/client'; // Step action to icon mapping const getStepIcon = (action: string): LucideIcon => { const iconMap: Record = { 'initialize': Sparkles, 'navigate': Navigation, 'extract': Search, 'plugins': Plug, 'planner': Brain, 'planner_python': Code, 'navigator': Compass, 'navigator_python': Code, 'extractor_python': Code, 'verify': Shield, 'verifier': Shield, 'complete': CheckCircle, 'mcp_search': Search, 'python_sandbox': Terminal, 'site_template': FileText, 'tool_call': Zap, 'error': XCircle, }; return iconMap[action] || Activity; }; // Step action color mapping const getStepColor = (action: string, status: string): string => { if (status === 'failed') return 'text-red-400 bg-red-500/20 border-red-500/30'; if (status === 'running') return 'text-cyan-400 bg-cyan-500/20 border-cyan-500/30 animate-pulse'; const colorMap: Record = { 'initialize': 'text-purple-400 bg-purple-500/20 border-purple-500/30', 'navigate': 'text-blue-400 bg-blue-500/20 border-blue-500/30', 'extract': 'text-emerald-400 bg-emerald-500/20 border-emerald-500/30', 'plugins': 'text-amber-400 bg-amber-500/20 border-amber-500/30', 'planner': 'text-pink-400 bg-pink-500/20 border-pink-500/30', 'planner_python': 'text-orange-400 bg-orange-500/20 border-orange-500/30', 'navigator': 'text-indigo-400 bg-indigo-500/20 border-indigo-500/30', 'navigator_python': 'text-orange-400 bg-orange-500/20 border-orange-500/30', 'extractor_python': 'text-orange-400 bg-orange-500/20 border-orange-500/30', 'verify': 'text-teal-400 bg-teal-500/20 border-teal-500/30', 'verifier': 'text-teal-400 bg-teal-500/20 border-teal-500/30', 'complete': 'text-green-400 bg-green-500/20 border-green-500/30', 'mcp_search': 'text-cyan-400 bg-cyan-500/20 border-cyan-500/30', 'python_sandbox': 'text-yellow-400 bg-yellow-500/20 border-yellow-500/30', 'site_template': 'text-violet-400 bg-violet-500/20 border-violet-500/30', 'tool_call': 'text-yellow-300 bg-yellow-500/20 border-yellow-500/30', }; return colorMap[action] || 'text-slate-400 bg-slate-500/20 border-slate-500/30'; }; const isAgentPluginId = (pluginId: string): boolean => { const lowered = pluginId.toLowerCase(); return lowered.startsWith('skill-') || lowered === 'web_scraper'; }; // Step Accordion Component interface StepAccordionItemProps { step: ScrapeStep; isExpanded: boolean; onToggle: () => void; isLatest: boolean; } const StepAccordionItem: React.FC = ({ step, isExpanded, onToggle, isLatest }) => { const Icon = getStepIcon(step.action); const colorClasses = getStepColor(step.action, step.status); // Check if this is a tool call const isToolCall = step.action === 'tool_call'; const toolName = (step.extracted_data?.tool_name as string) || ''; const toolDescription = (step.extracted_data?.tool_description as string) || ''; const toolParameters = (step.extracted_data?.parameters as Record) || {}; const toolResult = (step.extracted_data?.result as Record) || {}; return (
{isExpanded && (
{/* Tool Call Specific Details */} {isToolCall && ( <>
Tool Call Details
{toolDescription && (
Description: {toolDescription}
)} {Object.keys(toolParameters).length > 0 && (
Parameters:
                      {JSON.stringify(toolParameters, null, 2)}
                    
)} {Object.keys(toolResult).length > 0 && (
Result:
                      {JSON.stringify(toolResult, null, 2)}
                    
)}
)} {/* Step Details */}
Action: {step.action}
Status: {step.status}
{step.url && (
URL: {step.url}
)} {step.duration_ms && (
Duration: {step.duration_ms.toFixed(0)}ms
)}
Reward: {step.reward.toFixed(2)}
{/* Extracted Data (non-tool calls or if additional data exists) */} {step.extracted_data && Object.keys(step.extracted_data).length > 0 && !isToolCall && (

Extracted Data:

                {JSON.stringify(step.extracted_data, null, 2)}
              
)} {/* Timestamp */}
{new Date(step.timestamp).toLocaleTimeString()}
)}
); }; // Types interface TaskInput { urls: string[]; instruction: string; outputInstruction: string; taskType: 'low' | 'medium' | 'high'; selectedModel: string; selectedVisionModel: string; selectedAgents: string[]; enabledPlugins: string[]; } interface LogEntry { id: string; timestamp: string; level: 'info' | 'warn' | 'error' | 'debug'; message: string; source?: string; } interface Asset { id: string; type: 'url' | 'image' | 'file' | 'data'; name: string; source: 'user' | 'ai'; content: string; timestamp: string; } interface MemoryEntry { id: string; type: 'short_term' | 'working' | 'long_term' | 'shared'; content: string; timestamp: string; } interface PluginInfo { id: string; name: string; description: string; category: string; installed: boolean; } interface AgentInfo { type: string; name: string; description: string; } interface ModelInfo { provider: string; model: string; name: string; description?: string; } // View type type ViewType = 'input' | 'dashboard'; // Info Popup Component const InfoPopup: React.FC<{ isOpen: boolean; onClose: () => void; title: string; description: string; details?: Record; }> = ({ isOpen, onClose, title, description, details }) => { if (!isOpen) return null; return (

{title}

{description}

{details && (
{Object.entries(details).map(([key, value]) => (
{key} {value}
))}
)}
); }; // Popup Components interface PopupProps { title: string; isOpen: boolean; onClose: () => void; children: React.ReactNode; size?: 'sm' | 'md' | 'lg'; } const Popup: React.FC = ({ title, isOpen, onClose, children, size = 'md' }) => { if (!isOpen) return null; const sizeClasses = { sm: 'max-w-sm', md: 'max-w-lg', lg: 'max-w-2xl', }; return (

{title}

{children}
); }; // Accordion Component for sidebar interface AccordionProps { title: string; icon: React.ElementType; badge?: string | number; color: string; children: React.ReactNode; defaultOpen?: boolean; } const Accordion: React.FC = ({ title, icon: Icon, badge, color, children, defaultOpen = false }) => { const [isOpen, setIsOpen] = useState(defaultOpen); return (
{isOpen &&
{children}
}
); }; // Main Dashboard Component export const Dashboard: React.FC = () => { // View state - 'input' or 'dashboard' const [currentView, setCurrentView] = useState('input'); // Task input state const [taskInput, setTaskInput] = useState({ urls: [], instruction: '', outputInstruction: '', taskType: 'medium', selectedModel: 'groq/gpt-oss-120b', selectedVisionModel: '', selectedAgents: [], enabledPlugins: [], }); // URL input for adding const [newUrl, setNewUrl] = useState(''); // Logs const [logs, setLogs] = useState([]); // Running state const [isRunning, setIsRunning] = useState(false); // Streaming state const [sessionId, setSessionId] = useState(null); const [currentStep, setCurrentStep] = useState(null); const [allSteps, setAllSteps] = useState([]); const [expandedStepIndex, setExpandedStepIndex] = useState(null); const [scrapeResult, setScrapeResult] = useState(null); const [progress, setProgress] = useState({ urlIndex: 0, totalUrls: 0, currentUrl: '' }); const [extractedData, setExtractedData] = useState>({}); const abortControllerRef = useRef<{ abort: () => void } | null>(null); const startLockRef = useRef(false); const seenStepKeysRef = useRef>(new Set()); const lastSessionInitRef = useRef(null); // Assets const [assets, setAssets] = useState([]); // Memories const [memories, setMemories] = useState([]); const [newMemory, setNewMemory] = useState(''); // Popup states const [showModelPopup, setShowModelPopup] = useState(false); const [showVisionPopup, setShowVisionPopup] = useState(false); const [showAgentPopup, setShowAgentPopup] = useState(false); const [showPluginPopup, setShowPluginPopup] = useState(false); const [showTaskTypePopup, setShowTaskTypePopup] = useState(false); const [showMemoriesPopup, setShowMemoriesPopup] = useState(false); const [showAssetsPopup, setShowAssetsPopup] = useState(false); // Info popup const [infoPopup, setInfoPopup] = useState<{ isOpen: boolean; title: string; description: string; details?: Record }>({ isOpen: false, title: '', description: '', }); // Episode stats - session-based, start at 0 const [stats, setStats] = useState({ episodes: 0, steps: 0, totalReward: 0, avgReward: 0 }); // API Queries const { data: health, isError: healthError } = useQuery({ queryKey: ['health'], queryFn: () => apiClient.healthCheck(), refetchInterval: 5000, }); const { data: agentsData } = useQuery({ queryKey: ['agents'], queryFn: async () => { const res = await fetch('/api/agents/list'); if (!res.ok) return { agent_types: [] }; return res.json(); }, }); const { data: pluginsData } = useQuery({ queryKey: ['plugins'], queryFn: async () => { const res = await fetch('/api/plugins'); if (!res.ok) return { plugins: {} }; return res.json(); }, }); const { data: memoryData } = useQuery({ queryKey: ['memory-stats'], queryFn: async () => { const res = await fetch('/api/memory/stats/overview'); if (!res.ok) return { total_count: 0 }; return res.json(); }, refetchInterval: 3000, }); const { data: settingsData } = useQuery({ queryKey: ['client-settings'], queryFn: async () => { const res = await fetch('/api/settings'); if (!res.ok) return { available_models: [], api_keys_configured: {} }; return res.json(); }, }); // Get installed plugins only const getInstalledPlugins = () => { if (!pluginsData?.plugins) return { mcps: [], apis: [], processors: [] }; const result: Record = {}; for (const [category, plugins] of Object.entries(pluginsData.plugins)) { if (category === 'skills') continue; result[category] = (plugins as PluginInfo[]).filter(p => p.installed); } return result; }; const installedPlugins = getInstalledPlugins(); const enabledNonAgentPlugins = useMemo( () => taskInput.enabledPlugins.filter((pluginId) => !isAgentPluginId(pluginId)), [taskInput.enabledPlugins] ); // Get agents const agents: AgentInfo[] = agentsData?.agent_types || []; // Get models grouped by provider const modelsByProvider = (): Record => { const models = settingsData?.available_models || []; const grouped: Record = {}; models.forEach((m: ModelInfo) => { if (!grouped[m.provider]) grouped[m.provider] = []; grouped[m.provider].push(m); }); return grouped; }; // Vision models const visionModels: ModelInfo[] = [ { provider: 'openai', model: 'gpt-4-vision-preview', name: 'GPT-4 Vision', description: 'OpenAI vision model' }, { provider: 'google', model: 'gemini-pro-vision', name: 'Gemini Pro Vision', description: 'Google vision model' }, { provider: 'anthropic', model: 'claude-3-opus-vision', name: 'Claude 3 Vision', description: 'Anthropic vision model' }, ]; // Task types const taskTypes = [ { id: 'low', name: 'Low', description: 'Simple single-page extraction', color: 'emerald', icon: '🟢' }, { id: 'medium', name: 'Medium', description: 'Multi-page navigation', color: 'amber', icon: '🟡' }, { id: 'high', name: 'High', description: 'Complex interactive tasks', color: 'red', icon: '🔴' }, ]; const detectOutputFormat = (outputInstruction: string): ScrapeRequest['output_format'] => { const normalized = outputInstruction.toLowerCase(); if (normalized.includes('csv')) return 'csv'; if (normalized.includes('markdown') || normalized.includes('md')) return 'markdown'; if (normalized.includes('text') || normalized.includes('plain')) return 'text'; return 'json'; }; // Add URL to list const handleAddUrl = () => { if (newUrl.trim() && !taskInput.urls.includes(newUrl.trim())) { const url = newUrl.trim(); setTaskInput(p => ({ ...p, urls: [...p.urls, url] })); // Also add to assets setAssets(prev => [...prev, { id: Date.now().toString(), type: 'url', name: url, source: 'user', content: url, timestamp: new Date().toISOString(), }]); setNewUrl(''); } }; // Remove URL const handleRemoveUrl = (url: string) => { setTaskInput(p => ({ ...p, urls: p.urls.filter(u => u !== url) })); setAssets(prev => prev.filter(a => a.content !== url)); }; // Add memory const handleAddMemory = () => { if (newMemory.trim()) { setMemories(prev => [...prev, { id: Date.now().toString(), type: 'working', content: newMemory.trim(), timestamp: new Date().toISOString(), }]); setNewMemory(''); } }; // Start task with streaming const handleStart = useCallback(() => { if (taskInput.urls.length === 0 && !taskInput.instruction) return; if (startLockRef.current || abortControllerRef.current) return; startLockRef.current = true; seenStepKeysRef.current.clear(); lastSessionInitRef.current = null; setStats(prev => ({ ...prev, episodes: prev.episodes + 1, steps: 0, totalReward: 0, avgReward: 0 })); setIsRunning(true); setCurrentView('dashboard'); setSessionId(null); setProgress({ urlIndex: 0, totalUrls: taskInput.urls.length, currentUrl: '' }); setScrapeResult(null); setExtractedData({}); setCurrentStep(null); setAllSteps([]); setExpandedStepIndex(null); // Build scrape request const scrapeRequest: ScrapeRequest = { assets: taskInput.urls, instructions: taskInput.instruction, output_instructions: taskInput.outputInstruction || 'Return as JSON', output_format: detectOutputFormat(taskInput.outputInstruction), complexity: taskInput.taskType, model: taskInput.selectedModel.split('/')[1] || 'llama-3.3-70b', provider: taskInput.selectedModel.split('/')[0] || 'nvidia', enable_memory: true, enable_plugins: enabledNonAgentPlugins, selected_agents: taskInput.selectedAgents, max_steps: 50, }; // Add initial log setLogs(prev => [...prev, { id: Date.now().toString(), timestamp: new Date().toISOString(), level: 'info', message: `Starting scrape with ${taskInput.urls.length} URLs`, source: 'system', }]); // Start streaming scrape abortControllerRef.current = apiClient.streamScrape( scrapeRequest, // onInit (sid) => { if (lastSessionInitRef.current === sid) return; lastSessionInitRef.current = sid; setSessionId(sid); setLogs(prev => [...prev, { id: Date.now().toString(), timestamp: new Date().toISOString(), level: 'info', message: `Session started: ${sid.slice(0, 8)}...`, source: 'scraper', }]); }, // onUrlStart (url, index, total) => { setProgress({ urlIndex: index, totalUrls: total, currentUrl: url }); setLogs(prev => [...prev, { id: Date.now().toString(), timestamp: new Date().toISOString(), level: 'info', message: `Processing URL ${index + 1}/${total}: ${url}`, source: 'scraper', }]); }, // onStep (step) => { const stepKey = `${step.step_number}|${step.action}|${step.url ?? ''}|${step.status}|${step.message}|${step.timestamp}`; if (seenStepKeysRef.current.has(stepKey)) return; seenStepKeysRef.current.add(stepKey); setCurrentStep(step); setAllSteps(prev => [...prev, step]); setStats(prev => { const steps = prev.steps + 1; const totalReward = prev.totalReward + step.reward; return { ...prev, steps, totalReward, avgReward: totalReward / steps, }; }); // Update extracted data if (step.extracted_data) { setExtractedData(prev => ({ ...prev, ...step.extracted_data })); } setLogs(prev => [...prev, { id: Date.now().toString(), timestamp: new Date().toISOString(), level: step.status === 'failed' ? 'error' : 'info', message: `[${step.action}] ${step.message} (reward: ${step.reward.toFixed(2)})`, source: step.url?.slice(0, 30) || 'step', }]); }, // onUrlComplete (url, _index) => { setLogs(prev => [...prev, { id: Date.now().toString(), timestamp: new Date().toISOString(), level: 'info', message: `Completed: ${url}`, source: 'scraper', }]); }, // onComplete (response) => { startLockRef.current = false; abortControllerRef.current = null; setScrapeResult(response); setIsRunning(false); setStats(prev => ({ ...prev, totalReward: response.total_reward, avgReward: response.total_reward / Math.max(prev.steps, 1), })); const extractedAssets = Object.entries(response.extracted_data).map(([url, data]) => ({ id: `${Date.now()}-${url}`, type: 'data' as const, name: `Data from ${url}`, source: 'ai' as const, content: JSON.stringify(data), timestamp: new Date().toISOString(), })); setAssets(prev => [...prev, ...extractedAssets]); setLogs(prev => [...prev, { id: Date.now().toString(), timestamp: new Date().toISOString(), level: response.errors.length > 0 ? 'warn' : 'info', message: `Scrape complete! Processed ${response.urls_processed} URLs, total reward: ${response.total_reward.toFixed(2)}`, source: 'system', }]); }, // onError (error, url) => { if (!url) { startLockRef.current = false; abortControllerRef.current = null; setIsRunning(false); } setLogs(prev => [...prev, { id: Date.now().toString(), timestamp: new Date().toISOString(), level: 'error', message: `Error${url ? ` (${url})` : ''}: ${error}`, source: 'scraper', }]); } ); }, [taskInput, enabledNonAgentPlugins]); // Stop task const handleStop = useCallback(() => { if (abortControllerRef.current) { abortControllerRef.current.abort(); abortControllerRef.current = null; } startLockRef.current = false; setIsRunning(false); setLogs(prev => [...prev, { id: Date.now().toString(), timestamp: new Date().toISOString(), level: 'warn', message: 'Scraping stopped by user', source: 'system', }]); }, []); // Copy result to clipboard const handleCopyResult = useCallback(() => { if (scrapeResult?.output) { navigator.clipboard.writeText(scrapeResult.output); setLogs(prev => [...prev, { id: Date.now().toString(), timestamp: new Date().toISOString(), level: 'info', message: 'Result copied to clipboard', source: 'system', }]); } }, [scrapeResult]); // Download result const handleDownloadResult = useCallback(() => { if (scrapeResult?.output) { const fileType = scrapeResult.output_format === 'csv' ? 'text/csv' : scrapeResult.output_format === 'markdown' ? 'text/markdown' : 'application/json'; const extension = scrapeResult.output_format === 'csv' ? 'csv' : scrapeResult.output_format === 'markdown' ? 'md' : scrapeResult.output_format === 'text' ? 'txt' : 'json'; const blob = new Blob([scrapeResult.output], { type: fileType }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = `scrape-result-${sessionId?.slice(0, 8) || 'unknown'}.${extension}`; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); } }, [scrapeResult, sessionId]); // Format time const formatTime = (isoString: string) => { return new Date(isoString).toLocaleTimeString('en-US', { hour12: false }); }; const safeHostname = (url: string) => { try { return new URL(url).hostname; } catch { return url; } }; // Log level colors const getLogLevelColor = (level: LogEntry['level']) => { const colors = { info: 'text-cyan-400', warn: 'text-amber-400', error: 'text-red-400', debug: 'text-gray-400' }; return colors[level]; }; // Check system status const normalizedHealthStatus = typeof health?.status === 'string' ? health.status.toLowerCase() : null; const isSystemOnline = !healthError && ( normalizedHealthStatus === null || normalizedHealthStatus === 'healthy' || normalizedHealthStatus === 'ok' || normalizedHealthStatus === 'ready' ); // Show info popup const showInfo = (title: string, description: string, details?: Record) => { setInfoPopup({ isOpen: true, title, description, details }); }; // ========== INPUT VIEW ========== if (currentView === 'input') { return (
{/* System Status Banner */} {!isSystemOnline && (
System is offline. Please check your connection.
)} {/* Main Content - Full Screen Navy Blue Theme */}
{/* Header */}

ScrapeRL

AI-Powered Intelligent Web Scraping

{/* Assets Section */}
Assets {taskInput.urls.length} URLs
{/* URL Input */}
setNewUrl(e.target.value)} onKeyDown={(e) => e.key === 'Enter' && handleAddUrl()} className="flex-1 px-4 py-3 bg-slate-900/70 border border-cyan-500/30 rounded-xl text-white placeholder-slate-500 focus:outline-none focus:ring-2 focus:ring-cyan-500/50 focus:border-cyan-500/50 transition-all" />
{/* URL List */} {taskInput.urls.length > 0 && (
{taskInput.urls.map((url, index) => (
{url}
))}
)}
{/* Instructions Section */}
Instructions