Spaces:

vanishingradient
/

SCoDA

Sleeping

App Files Files Community

vanishingradient commited on Feb 11

Commit

9281fab

0 Parent(s):

Added init files

Browse files

Files changed (28) hide show

.gitignore +6 -0
LICENSE +21 -0
README.md +190 -0
app.py +220 -0
coda/__init__.py +31 -0
coda/agents/__init__.py +29 -0
coda/agents/code_generator.py +162 -0
coda/agents/data_processor.py +280 -0
coda/agents/debug_agent.py +252 -0
coda/agents/design_explorer.py +207 -0
coda/agents/query_analyzer.py +112 -0
coda/agents/search_agent.py +295 -0
coda/agents/visual_evaluator.py +228 -0
coda/agents/viz_mapping.py +164 -0
coda/config.py +91 -0
coda/core/__init__.py +17 -0
coda/core/agent_factory.py +135 -0
coda/core/base_agent.py +220 -0
coda/core/llm.py +192 -0
coda/core/memory.py +148 -0
coda/orchestrator.py +293 -0
main.py +162 -0
requirements.txt +22 -0
sample_data.csv +16 -0
tests/__init__.py +1 -0
tests/test_agents.py +177 -0
tests/test_llm.py +126 -0
tests/test_memory.py +165 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.venv/
+__pycache__/
+*.pyc
+.env
+outputs/
+*.log

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2026 M Saqlain
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,190 @@

+---
+title: CoDA
+emoji: 🎨
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+---
+# CoDA: Collaborative Data Visualization Agents
+A production-grade multi-agent system for automated data visualization from natural language queries.
+[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+## Overview
+CoDA reframes data visualization as a collaborative multi-agent problem. Instead of treating it as a monolithic task, CoDA employs specialized LLM agents that work together:
+- **Query Analyzer** - Interprets natural language and extracts visualization intent
+- **Data Processor** - Extracts metadata without token-heavy data loading
+- **VizMapping Agent** - Maps semantics to visualization primitives
+- **Search Agent** - Retrieves relevant code patterns
+- **Design Explorer** - Generates aesthetic specifications
+- **Code Generator** - Synthesizes executable Python code
+- **Debug Agent** - Executes code and fixes errors
+- **Visual Evaluator** - Assesses quality and triggers refinement
+## Quick Start
+### Installation
+```bash
+# Clone the repository
+git clone https://github.com/yourusername/CoDA.git
+cd CoDA
+# Install dependencies
+pip install -r requirements.txt
+# Configure API key
+cp .env.example .env
+# Edit .env and add your GROQ_API_KEY
+```
+### Usage
+#### Web Interface (Gradio)
+```bash
+python app.py
+```
+Open http://localhost:7860 in your browser.
+#### Command Line
+```bash
+python main.py --query "Create a bar chart of sales by category" --data sales.csv
+```
+Options:
+- `-q, --query`: Visualization query (required)
+- `-d, --data`: Data file path(s) (required)
+- `-o, --output`: Output directory (default: outputs)
+- `--max-iterations`: Refinement iterations (default: 3)
+- `--min-score`: Quality threshold (default: 7.0)
+### Python API
+```python
+from coda.orchestrator import CodaOrchestrator
+orchestrator = CodaOrchestrator()
+result = orchestrator.run(
+    query="Show sales trends over time",
+    data_paths=["sales_data.csv"]
+)
+if result.success:
+    print(f"Visualization saved to: {result.output_file}")
+    print(f"Quality Score: {result.scores['overall']}/10")
+```
+## Hugging Face Spaces Deployment
+1. Create a new Space on [Hugging Face](https://huggingface.co/new-space)
+2. Select "Gradio" as the SDK
+3. Upload all files from this repository
+4. Add `GROQ_API_KEY` as a Secret in Space Settings
+5. The Space will automatically build and deploy
+## Architecture
+```
+Natural Language Query + Data Files
+            │
+            ▼
+    ┌───────────────┐
+    │ Query Analyzer │ ─── Extracts intent, TODO list
+    └───────────────┘
+            │
+            ▼
+    ┌───────────────┐
+    │ Data Processor │ ─── Metadata extraction (no full load)
+    └───────────────┘
+            │
+            ▼
+    ┌───────────────┐
+    │ VizMapping    │ ─── Chart type, encodings
+    └───────────────┘
+            │
+            ▼
+    ┌───────────────┐
+    │ Search Agent  │ ─── Code examples
+    └───────────────┘
+            │
+            ▼
+    ┌───────────────┐
+    │Design Explorer│ ─── Colors, layout, styling
+    └───────────────┘
+            │
+            ▼
+    ┌───────────────┐
+    │Code Generator │ ─── Python visualization code
+    └───────────────┘
+            │
+            ▼
+    ┌───────────────┐
+    │ Debug Agent   │ ─── Execute & fix errors
+    └───────────────┘
+            │
+            ▼
+    ┌───────────────┐
+    │Visual Evaluator│ ─── Quality assessment
+    └───────────────┘
+            │
+     ───────┴───────
+    ↓ Feedback Loop ↓
+    (if quality < threshold)
+```
+## Configuration
+Environment variables (in `.env`):
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `GROQ_API_KEY` | Required | Your Groq API key |
+| `CODA_DEFAULT_MODEL` | llama-3.3-70b-versatile | Text model |
+| `CODA_VISION_MODEL` | llama-3.2-90b-vision-preview | Vision model |
+| `CODA_MIN_OVERALL_SCORE` | 7.0 | Quality threshold |
+| `CODA_MAX_ITERATIONS` | 3 | Max refinement loops |
+## Supported Data Formats
+- CSV (`.csv`)
+- JSON (`.json`)
+- Excel (`.xlsx`, `.xls`)
+- Parquet (`.parquet`)
+## Requirements
+- Python 3.10+
+- Groq API key ([Get one free](https://console.groq.com))
+## License
+MIT License - See LICENSE for details.
+## Citation
+If you use CoDA in your research, please cite:
+```bibtex
+@article{chen2025coda,
+  title={CoDA: Agentic Systems for Collaborative Data Visualization},
+  author={Chen, Zichen and Chen, Jiefeng and Arik, Sercan {\"O}. and Sra, Misha and Pfister, Tomas and Yoon, Jinsung},
+  journal={arXiv preprint arXiv:2510.03194},
+  year={2025},
+  url={https://arxiv.org/abs/2510.03194},
+  doi={10.48550/arXiv.2510.03194}
+}
+```
+**Paper**: [arXiv:2510.03194](https://arxiv.org/abs/2510.03194)

app.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+Gradio Web Interface for CoDA.
+Provides a user-friendly web UI for the CoDA visualization system,
+designed for deployment on Hugging Face Spaces.
+"""
+import logging
+import os
+import tempfile
+from pathlib import Path
+from typing import Optional
+import gradio as gr
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+def create_coda_interface():
+    """Create the Gradio interface for CoDA."""
+    def process_visualization(
+        query: str,
+        data_file,
+        progress=gr.Progress()
+    ) -> tuple[Optional[str], str, str]:
+        """
+        Process a visualization request.
+        Args:
+            query: Natural language visualization query
+            data_file: Uploaded data file
+            progress: Gradio progress tracker
+        Returns:
+            Tuple of (image_path, status_message, details)
+        """
+        if not query.strip():
+            return None, "❌ Error", "Please enter a visualization query."
+        if data_file is None:
+            return None, "❌ Error", "Please upload a data file."
+        try:
+            from coda.config import Config
+            from coda.orchestrator import CodaOrchestrator
+        except ImportError as e:
+            return None, "❌ Import Error", f"Failed to import CoDA: {e}"
+        groq_api_key = os.getenv("GROQ_API_KEY", "")
+        if not groq_api_key:
+            return (
+                None,
+                "❌ Configuration Error",
+                "GROQ_API_KEY environment variable is not set. "
+                "Please add your API key in the Spaces settings."
+            )
+        with tempfile.TemporaryDirectory() as temp_dir:
+            data_path = Path(temp_dir) / Path(data_file.name).name
+            with open(data_file.name, 'rb') as src:
+                with open(data_path, 'wb') as dst:
+                    dst.write(src.read())
+            def update_progress(status: str, pct: float):
+                progress(pct, desc=status)
+            try:
+                config = Config(
+                    groq_api_key=groq_api_key,
+                )
+                orchestrator = CodaOrchestrator(
+                    config=config,
+                    progress_callback=update_progress,
+                )
+                result = orchestrator.run(
+                    query=query,
+                    data_paths=[str(data_path)],
+                )
+                if result.success and result.output_file:
+                    scores = result.scores or {}
+                    details = format_results(result, scores)
+                    return result.output_file, "✅ Success", details
+                else:
+                    error_msg = result.error or "Unknown error occurred"
+                    return None, "❌ Failed", f"Visualization failed: {error_msg}"
+            except Exception as e:
+                logger.exception("Pipeline error")
+                return None, "❌ Error", f"An error occurred: {str(e)}"
+    def format_results(result, scores: dict) -> str:
+        """Format the results for display."""
+        lines = [
+            f"**Iterations:** {result.iterations}",
+            "",
+            "### Quality Scores",
+        ]
+        if scores:
+            for key, value in scores.items():
+                emoji = "🟢" if value >= 7 else "🟡" if value >= 5 else "🔴"
+                lines.append(f"- {key.title()}: {emoji} {value:.1f}/10")
+        if result.evaluation:
+            if result.evaluation.strengths:
+                lines.extend(["", "### Strengths"])
+                for s in result.evaluation.strengths[:3]:
+                    lines.append(f"- {s}")
+            if result.evaluation.recommendations:
+                lines.extend(["", "### Recommendations"])
+                for r in result.evaluation.recommendations[:3]:
+                    lines.append(f"- {r}")
+        return "\n".join(lines)
+    with gr.Blocks(
+        title="CoDA - Collaborative Data Visualization",
+        theme=gr.themes.Soft(),
+        css="""
+        .main-title {
+            text-align: center;
+            margin-bottom: 1rem;
+        }
+        .status-box {
+            padding: 1rem;
+            border-radius: 8px;
+            margin-top: 1rem;
+        }
+        """
+    ) as interface:
+        gr.Markdown(
+            """
+            # 🎨 CoDA: Collaborative Data Visualization Agents
+            Transform your data into beautiful visualizations using natural language.
+            Simply upload your data and describe what you want to see!
+            """,
+            elem_classes=["main-title"]
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                query_input = gr.Textbox(
+                    label="Visualization Query",
+                    placeholder="e.g., 'Create a line chart showing sales trends over time'",
+                    lines=3,
+                )
+                file_input = gr.File(
+                    label="Upload Data File",
+                    file_types=[".csv", ".json", ".xlsx", ".xls", ".parquet"],
+                )
+                submit_btn = gr.Button(
+                    "🚀 Generate Visualization",
+                    variant="primary",
+                    size="lg",
+                )
+                gr.Markdown(
+                    """
+                    ### Supported Formats
+                    - CSV, JSON, Excel (.xlsx, .xls), Parquet
+                    ### Example Queries
+                    - "Show me a bar chart of sales by category"
+                    - "Create a scatter plot of price vs quantity"
+                    - "Plot the distribution of ages as a histogram"
+                    """
+                )
+            with gr.Column(scale=2):
+                output_image = gr.Image(
+                    label="Generated Visualization",
+                    type="filepath",
+                )
+                with gr.Row():
+                    status_output = gr.Textbox(
+                        label="Status",
+                        interactive=False,
+                    )
+                details_output = gr.Markdown(
+                    label="Details",
+                )
+        gr.Examples(
+            examples=[
+                ["Create a bar chart showing the top 10 values", None],
+                ["Plot a line chart of trends over time", None],
+                ["Show a scatter plot with correlation", None],
+                ["Create a pie chart of category distribution", None],
+            ],
+            inputs=[query_input, file_input],
+        )
+        submit_btn.click(
+            fn=process_visualization,
+            inputs=[query_input, file_input],
+            outputs=[output_image, status_output, details_output],
+        )
+    return interface
+app = create_coda_interface()
+if __name__ == "__main__":
+    app.launch()

coda/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+CoDA - Collaborative Data Visualization Agents
+A multi-agent system for automated data visualization from natural language queries.
+"""
+from coda.config import Config, get_config
+from coda.orchestrator import CodaOrchestrator, PipelineResult
+from coda.core import (
+    LLMProvider,
+    GroqLLM,
+    SharedMemory,
+    BaseAgent,
+    AgentContext,
+    AgentFactory,
+)
+__version__ = "1.0.0"
+__all__ = [
+    "Config",
+    "get_config",
+    "CodaOrchestrator",
+    "PipelineResult",
+    "LLMProvider",
+    "GroqLLM",
+    "SharedMemory",
+    "BaseAgent",
+    "AgentContext",
+    "AgentFactory",
+]

coda/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""Agent implementations for CoDA visualization pipeline."""
+from coda.agents.query_analyzer import QueryAnalyzerAgent, QueryAnalysis
+from coda.agents.data_processor import DataProcessorAgent, DataAnalysis
+from coda.agents.viz_mapping import VizMappingAgent, VisualMapping
+from coda.agents.search_agent import SearchAgent, SearchResult
+from coda.agents.design_explorer import DesignExplorerAgent, DesignSpec
+from coda.agents.code_generator import CodeGeneratorAgent, GeneratedCode
+from coda.agents.debug_agent import DebugAgent, ExecutionResult
+from coda.agents.visual_evaluator import VisualEvaluatorAgent, VisualEvaluation
+__all__ = [
+    "QueryAnalyzerAgent",
+    "QueryAnalysis",
+    "DataProcessorAgent",
+    "DataAnalysis",
+    "VizMappingAgent",
+    "VisualMapping",
+    "SearchAgent",
+    "SearchResult",
+    "DesignExplorerAgent",
+    "DesignSpec",
+    "CodeGeneratorAgent",
+    "GeneratedCode",
+    "DebugAgent",
+    "ExecutionResult",
+    "VisualEvaluatorAgent",
+    "VisualEvaluation",
+]

coda/agents/code_generator.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""
+Code Generator Agent for CoDA.
+Synthesizes executable Python visualization code by integrating
+specifications from upstream agents.
+"""
+from typing import Optional
+from pydantic import BaseModel, Field
+from coda.core.base_agent import AgentContext, BaseAgent
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+class GeneratedCode(BaseModel):
+    """Structured output from the Code Generator."""
+    code: str = Field(default="", description="The generated Python code")
+    dependencies: list[str] = Field(
+        default_factory=lambda: ["matplotlib", "pandas"],
+        description="Required Python packages"
+    )
+    output_filename: str = Field(
+        default="output.png",
+        description="Name of the output visualization file"
+    )
+    documentation: str = Field(
+        default="Generated visualization code",
+        description="Brief documentation of the code"
+    )
+    quality_score: float = Field(
+        default=5.0,
+        description="Self-assessed code quality (0-10)"
+    )
+    potential_issues: list[str] = Field(
+        default_factory=list,
+        description="Potential issues or edge cases"
+    )
+class CodeGeneratorAgent(BaseAgent[GeneratedCode]):
+    """
+    Generates executable Python visualization code.
+    Integrates all upstream specifications (data processing, visual mapping,
+    design specs) into working code.
+    """
+    MEMORY_KEY = "generated_code"
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: SharedMemory,
+        name: Optional[str] = None,
+    ) -> None:
+        super().__init__(llm, memory, name or "CodeGenerator")
+    def _get_system_prompt(self) -> str:
+        return """You are an expert Python Developer specializing in data visualization.
+Your expertise is in writing clean, efficient, and well-documented Python code for data visualization using matplotlib, seaborn, and pandas.
+Your responsibilities:
+1. Generate complete, executable Python code
+2. Integrate all specifications from the design and mapping agents
+3. Handle data loading and transformation correctly
+4. Apply proper styling and formatting
+5. Include error handling for robustness
+6. Write clear documentation
+Code requirements:
+- Use matplotlib and seaborn as primary libraries
+- Include all necessary imports at the top
+- Load data from the specified file paths
+- Apply all transformations before plotting
+- Set figure size, colors, and labels as specified
+- Save the output to a file (PNG format)
+- Use descriptive variable names
+- Add comments for complex operations
+IMPORTANT styling rules:
+- For seaborn barplots, ALWAYS use hue parameter: sns.barplot(..., hue='category_column', legend=False)
+- Use ONLY these reliable palettes: 'viridis', 'plasma', 'inferno', 'magma', 'cividis', 'Deep', 'Muted', 'Pastel'
+- DO NOT use complex or custom named palettes like 'tableau10' or 'husl' unless you are sure.
+- When in doubt, omit the palette argument or use 'viridis'.
+- Always use plt.tight_layout() before saving.
+Always respond with a valid JSON object containing the code and metadata."""
+    def _build_prompt(self, context: AgentContext) -> str:
+        data_analysis = self._get_from_memory("data_analysis") or {}
+        visual_mapping = self._get_from_memory("visual_mapping") or {}
+        design_spec = self._get_from_memory("design_spec") or {}
+        search_results = self._get_from_memory("search_results") or {}
+        file_info = data_analysis.get("files", [])
+        data_paths = [f.get("file_path", "") for f in file_info] if file_info else context.data_paths
+        code_examples = search_results.get("examples", [])
+        examples_section = ""
+        if code_examples:
+            examples_section = "\nReference Code Examples:\n"
+            for ex in code_examples[:2]:
+                if isinstance(ex, dict):
+                    examples_section += f"```python\n# {ex.get('title', 'Example')}\n{ex.get('code', '')}\n```\n"
+        feedback_section = ""
+        if context.feedback:
+            feedback_section = f"""
+Code Feedback (iteration {context.iteration}):
+{context.feedback}
+Fix the issues mentioned in the feedback.
+"""
+        return f"""Generate Python visualization code based on the following specifications.
+User Query: {context.query}
+Data Files: {data_paths}
+Visual Mapping:
+- Chart Type: {visual_mapping.get('chart_type', 'line')}
+- X-Axis: {visual_mapping.get('x_axis') or 'Not specified (infer from data or chart type)'}
+- Y-Axis: {visual_mapping.get('y_axis') or 'Not specified (infer from data or chart type)'}
+- Color Encoding: {visual_mapping.get('color_encoding')}
+- Transformations: {visual_mapping.get('transformations', [])}
+Design Specification:
+- Colors: {design_spec.get('color_scheme', {})}
+- Layout: {design_spec.get('layout', {})}
+- Typography: {design_spec.get('typography', {})}
+- Annotations: {design_spec.get('annotations', [])}
+- Guidelines: {design_spec.get('implementation_guidelines', [])}
+{examples_section}{feedback_section}
+Generate a complete Python script that:
+1. Imports all necessary libraries
+2. Loads the data file(s)
+3. Applies required transformations
+4. Creates the visualization with specified styling
+5. Saves to 'output.png'
+Respond with a JSON object:
+- code: Complete Python code as a string
+- dependencies: List of required packages
+- output_filename: Output file name
+- documentation: Brief description
+- quality_score: Self-assessment 0-10
+- potential_issues: List of potential issues
+JSON Response:"""
+    def _parse_response(self, response: str) -> GeneratedCode:
+        data = self._extract_json(response)
+        return GeneratedCode(**data)
+    def _get_output_key(self) -> str:
+        return self.MEMORY_KEY

coda/agents/data_processor.py ADDED Viewed

	@@ -0,0 +1,280 @@

+"""
+Data Processor Agent for CoDA.
+Extracts metadata and insights from data files without loading full datasets,
+enabling the system to work within token limits while providing rich context
+for visualization decisions.
+"""
+import logging
+from pathlib import Path
+from typing import Any, Optional
+import pandas as pd
+from pydantic import BaseModel, Field
+from coda.core.base_agent import AgentContext, BaseAgent
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+logger = logging.getLogger(__name__)
+class ColumnInfo(BaseModel):
+    """Information about a single column."""
+    name: str
+    dtype: str
+    non_null_count: int
+    unique_count: int
+    sample_values: list[Any]
+class DataFileInfo(BaseModel):
+    """Metadata about a single data file."""
+    file_path: str
+    file_type: str
+    shape: tuple[int, int]
+    columns: list[ColumnInfo]
+    memory_usage_mb: float
+class DataAnalysis(BaseModel):
+    """Structured output from the Data Processor."""
+    files: list[DataFileInfo] = Field(
+        description="Metadata for each processed data file"
+    )
+    insights: list[str] = Field(
+        description="Key insights about the data (patterns, outliers, etc.)"
+    )
+    processing_steps: list[str] = Field(
+        description="Recommended data processing steps"
+    )
+    aggregations_needed: list[str] = Field(
+        default_factory=list,
+        description="Suggested aggregations for visualization"
+    )
+    visualization_hints: list[str] = Field(
+        default_factory=list,
+        description="Hints for visualization based on data characteristics"
+    )
+    potential_issues: list[str] = Field(
+        default_factory=list,
+        description="Potential data quality issues"
+    )
+class DataProcessorAgent(BaseAgent[DataAnalysis]):
+    """
+    Processes data files to extract metadata and insights.
+    Uses lightweight analysis to avoid token limits while providing
+    comprehensive data understanding for downstream agents.
+    """
+    MEMORY_KEY = "data_analysis"
+    SUPPORTED_EXTENSIONS = {".csv", ".json", ".xlsx", ".xls", ".parquet"}
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: SharedMemory,
+        name: Optional[str] = None,
+    ) -> None:
+        super().__init__(llm, memory, name or "DataProcessor")
+    def execute(self, context: AgentContext) -> DataAnalysis:
+        """Override to include data extraction before LLM analysis."""
+        logger.info(f"[{self._name}] Processing {len(context.data_paths)} data files")
+        file_infos = []
+        for path in context.data_paths:
+            info = self._extract_file_metadata(path)
+            if info:
+                file_infos.append(info)
+        self._memory.store(
+            key="raw_file_info",
+            value=[f.model_dump() for f in file_infos],
+            agent_name=self._name,
+        )
+        return super().execute(context)
+    def _extract_file_metadata(self, file_path: str) -> Optional[DataFileInfo]:
+        """Extract metadata from a data file using pandas."""
+        path = Path(file_path)
+        if not path.exists():
+            logger.warning(f"File not found: {path}")
+            return None
+        if path.suffix.lower() not in self.SUPPORTED_EXTENSIONS:
+            logger.warning(f"Unsupported file type: {path.suffix}")
+            return None
+        try:
+            df = self._load_dataframe(path)
+            columns = self._analyze_columns(df)
+            return DataFileInfo(
+                file_path=str(path),
+                file_type=path.suffix.lower(),
+                shape=(len(df), len(df.columns)),
+                columns=columns,
+                memory_usage_mb=df.memory_usage(deep=True).sum() / (1024 * 1024),
+            )
+        except Exception as e:
+            logger.error(f"Failed to process {path}: {e}")
+            return None
+    def _load_dataframe(self, path: Path) -> pd.DataFrame:
+        """Load a dataframe from various file formats."""
+        suffix = path.suffix.lower()
+        if suffix == ".csv":
+            return pd.read_csv(path)
+        elif suffix == ".json":
+            return pd.read_json(path)
+        elif suffix in {".xlsx", ".xls"}:
+            return pd.read_excel(path)
+        elif suffix == ".parquet":
+            return pd.read_parquet(path)
+        else:
+            raise ValueError(f"Unsupported format: {suffix}")
+    def _analyze_columns(self, df: pd.DataFrame) -> list[ColumnInfo]:
+        """Analyze each column in the dataframe."""
+        columns = []
+        for col in df.columns:
+            series = df[col]
+            sample_values = series.dropna().head(5).tolist()
+            columns.append(ColumnInfo(
+                name=str(col),
+                dtype=str(series.dtype),
+                non_null_count=int(series.count()),
+                unique_count=int(series.nunique()),
+                sample_values=sample_values,
+            ))
+        return columns
+    def _get_system_prompt(self) -> str:
+        return """You are a Data Analyst specialist in a data visualization team.
+Your expertise is in understanding data structures, identifying patterns, and recommending processing steps for effective visualization.
+Your responsibilities:
+1. Analyze metadata to understand data characteristics
+2. Identify insights and patterns relevant to visualization
+3. Recommend data processing and aggregation steps
+4. Suggest visualization approaches based on data types
+5. Flag potential data quality issues
+Always respond with a valid JSON object matching the required schema."""
+    def _build_prompt(self, context: AgentContext) -> str:
+        file_info = self._get_from_memory("raw_file_info") or []
+        query_analysis = self._get_from_memory("query_analysis") or {}
+        file_summary = self._format_file_info(file_info)
+        query_context = ""
+        if query_analysis:
+            query_context = f"""
+Query Analysis:
+- Visualization Types: {query_analysis.get('visualization_types', [])}
+- Key Points: {query_analysis.get('key_points', [])}
+- Data Requirements: {query_analysis.get('data_requirements', [])}
+"""
+        return f"""Analyze the following data files for visualization purposes.
+User Query: {context.query}
+{query_context}
+Data Files:
+{file_summary}
+Based on this metadata, provide a JSON object with these fields.
+IMPORTANT: All list fields must contain SIMPLE STRINGS, not objects.
+{{
+  "insights": ["string1", "string2", ...],  // Simple string descriptions of patterns
+  "processing_steps": ["step1", "step2", ...],  // Simple string descriptions of steps
+  "aggregations_needed": ["agg1", "agg2", ...],  // Simple string descriptions
+  "visualization_hints": ["hint1", "hint2", ...],  // Simple string hints
+  "potential_issues": ["issue1", "issue2", ...]  // Simple string issues
+}}
+JSON Response:"""
+    def _format_file_info(self, file_info: list[dict]) -> str:
+        """Format file information for the prompt."""
+        if not file_info:
+            return "No data files available."
+        lines = []
+        for f in file_info:
+            lines.append(f"\nFile: {f['file_path']}")
+            lines.append(f"  Type: {f['file_type']}")
+            lines.append(f"  Shape: {f['shape'][0]} rows × {f['shape'][1]} columns")
+            lines.append("  Columns:")
+            for col in f.get("columns", []):
+                samples = ", ".join(str(v) for v in col.get("sample_values", [])[:3])
+                lines.append(
+                    f"    - {col['name']} ({col['dtype']}): "
+                    f"{col['unique_count']} unique, samples: [{samples}]"
+                )
+        return "\n".join(lines)
+    def _normalize_list_field(self, value: Any) -> list[str]:
+        """Normalize a field that should be a list of strings."""
+        if value is None:
+            return []
+        if isinstance(value, dict):
+            return [f"{k}: {v}" for k, v in value.items()]
+        if isinstance(value, list):
+            result = []
+            for item in value:
+                if isinstance(item, str):
+                    result.append(item)
+                elif isinstance(item, dict):
+                    desc_keys = ["description", "desc", "text", "value", "step", "hint", "issue"]
+                    for key in desc_keys:
+                        if key in item:
+                            result.append(str(item[key]))
+                            break
+                    else:
+                        result.append(str(item))
+                else:
+                    result.append(str(item))
+            return result
+        return [str(value)]
+    def _parse_response(self, response: str) -> DataAnalysis:
+        data = self._extract_json(response)
+        data["insights"] = self._normalize_list_field(data.get("insights"))
+        data["processing_steps"] = self._normalize_list_field(data.get("processing_steps"))
+        data["aggregations_needed"] = self._normalize_list_field(data.get("aggregations_needed"))
+        data["visualization_hints"] = self._normalize_list_field(data.get("visualization_hints"))
+        data["potential_issues"] = self._normalize_list_field(data.get("potential_issues"))
+        file_info = self._get_from_memory("raw_file_info") or []
+        data["files"] = file_info
+        return DataAnalysis(**data)
+    def _get_output_key(self) -> str:
+        return self.MEMORY_KEY

coda/agents/debug_agent.py ADDED Viewed

	@@ -0,0 +1,252 @@

+"""
+Debug Agent for CoDA.
+Executes generated code, diagnoses errors, and applies fixes
+to produce working visualizations.
+"""
+import logging
+import os
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+from typing import Optional
+from pydantic import BaseModel, Field
+from coda.core.base_agent import AgentContext, BaseAgent
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+logger = logging.getLogger(__name__)
+class ExecutionResult(BaseModel):
+    """Structured output from the Debug Agent."""
+    success: bool = Field(
+        description="Whether execution succeeded"
+    )
+    output_file: Optional[str] = Field(
+        default=None,
+        description="Path to the generated visualization"
+    )
+    stdout: str = Field(
+        default="",
+        description="Standard output from execution"
+    )
+    stderr: str = Field(
+        default="",
+        description="Error output from execution"
+    )
+    error_diagnosis: Optional[str] = Field(
+        default=None,
+        description="Diagnosis of any errors"
+    )
+    corrected_code: Optional[str] = Field(
+        default=None,
+        description="Fixed code if errors occurred"
+    )
+    fix_applied: bool = Field(
+        default=False,
+        description="Whether a fix was applied"
+    )
+    execution_time_seconds: float = Field(
+        default=0.0,
+        description="Time taken to execute"
+    )
+class DebugAgent(BaseAgent[ExecutionResult]):
+    """
+    Executes generated code and handles errors.
+    Runs the visualization code in a subprocess with timeout,
+    diagnoses errors, and attempts automatic fixes.
+    """
+    MEMORY_KEY = "execution_result"
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: SharedMemory,
+        timeout_seconds: int = 60,
+        output_directory: str = "outputs",
+        name: Optional[str] = None,
+    ) -> None:
+        super().__init__(llm, memory, name or "DebugAgent")
+        self._timeout = timeout_seconds
+        self._output_dir = Path(output_directory)
+        self._output_dir.mkdir(parents=True, exist_ok=True)
+    def execute(self, context: AgentContext) -> ExecutionResult:
+        """Execute the generated code and handle errors."""
+        logger.info(f"[{self._name}] Starting code execution")
+        generated_code = self._get_from_memory("generated_code")
+        if not generated_code:
+            return ExecutionResult(
+                success=False,
+                stderr="No generated code found in memory",
+            )
+        code = generated_code.get("code", "")
+        output_filename = generated_code.get("output_filename", "output.png")
+        code = self._prepare_code(code, output_filename)
+        result = self._execute_code(code)
+        if not result.success and result.stderr:
+            logger.warning(f"[{self._name}] Code execution failed: {result.stderr[:500]}")
+            logger.info(f"[{self._name}] Attempting to fix errors")
+            fixed_result = self._attempt_fix(code, result.stderr, context)
+            if fixed_result.success:
+                self._store_result(fixed_result)
+                logger.info(f"[{self._name}] Fix successful!")
+                return fixed_result
+            logger.warning(f"[{self._name}] Fix attempt failed")
+            result.error_diagnosis = fixed_result.error_diagnosis
+            result.corrected_code = fixed_result.corrected_code
+        self._store_result(result)
+        logger.info(f"[{self._name}] Execution complete: success={result.success}")
+        return result
+    def _prepare_code(self, code: str, output_filename: str) -> str:
+        """Prepare code for execution by setting up paths."""
+        output_path = self._output_dir / output_filename
+        code = code.replace(
+            f"'{output_filename}'",
+            f"r'{output_path}'"
+        )
+        code = code.replace(
+            f'"{output_filename}"',
+            f"r'{output_path}'"
+        )
+        if "plt.savefig" not in code and "fig.savefig" not in code:
+            code += f"\nplt.savefig(r'{output_path}', dpi=150, bbox_inches='tight')\n"
+        return code
+    def _execute_code(self, code: str) -> ExecutionResult:
+        """Execute Python code in a subprocess."""
+        import time
+        start_time = time.time()
+        with tempfile.NamedTemporaryFile(
+            mode="w",
+            suffix=".py",
+            delete=False,
+            encoding="utf-8"
+        ) as f:
+            f.write(code)
+            temp_file = f.name
+        try:
+            result = subprocess.run(
+                [sys.executable, temp_file],
+                capture_output=True,
+                text=True,
+                timeout=self._timeout,
+                cwd=str(self._output_dir.parent),
+            )
+            execution_time = time.time() - start_time
+            output_files = list(self._output_dir.glob("*.png"))
+            output_file = str(output_files[-1]) if output_files else None
+            return ExecutionResult(
+                success=result.returncode == 0,
+                output_file=output_file,
+                stdout=result.stdout,
+                stderr=result.stderr,
+                execution_time_seconds=execution_time,
+            )
+        except subprocess.TimeoutExpired:
+            return ExecutionResult(
+                success=False,
+                stderr=f"Execution timed out after {self._timeout} seconds",
+            )
+        except Exception as e:
+            return ExecutionResult(
+                success=False,
+                stderr=str(e),
+            )
+        finally:
+            try:
+                os.unlink(temp_file)
+            except OSError:
+                pass
+    def _attempt_fix(
+        self,
+        original_code: str,
+        error_message: str,
+        context: AgentContext,
+    ) -> ExecutionResult:
+        """Attempt to fix code errors using the LLM."""
+        fix_prompt = f"""The following Python visualization code produced an error. Please fix it.
+Original Code:
+```python
+{original_code}
+```
+Error Message:
+{error_message}
+Provide a JSON response with:
+- diagnosis: What caused the error
+- corrected_code: The fixed Python code
+IMPORTANT: Return ONLY valid JSON. Do not include markdown formatting or explanations outside the JSON.
+Safe to assume standard libraries (matplotlib, seaborn, pandas, numpy) are available.
+JSON Response:"""
+        response = self._llm.complete(
+            prompt=fix_prompt,
+            system_prompt="You are an expert Python debugger. Fix the code error and provide corrected code.",
+        )
+        try:
+            data = self._extract_json(response.content)
+            diagnosis = data.get("diagnosis", "Unknown error")
+            corrected_code = data.get("corrected_code", "")
+            if corrected_code:
+                output_filename = "output.png"
+                corrected_code = self._prepare_code(corrected_code, output_filename)
+                result = self._execute_code(corrected_code)
+                result.error_diagnosis = diagnosis
+                result.corrected_code = corrected_code
+                result.fix_applied = result.success
+                return result
+        except Exception as e:
+            logger.error(f"Failed to parse fix response: {e}")
+        return ExecutionResult(
+            success=False,
+            stderr=error_message,
+            error_diagnosis="Failed to automatically fix the error",
+        )
+    def _build_prompt(self, context: AgentContext) -> str:
+        return ""
+    def _get_system_prompt(self) -> str:
+        return ""
+    def _parse_response(self, response: str) -> ExecutionResult:
+        return ExecutionResult(success=False)
+    def _get_output_key(self) -> str:
+        return self.MEMORY_KEY

coda/agents/design_explorer.py ADDED Viewed

	@@ -0,0 +1,207 @@

+"""
+Design Explorer Agent for CoDA.
+Generates aesthetic and content specifications for visualizations,
+optimizing for user experience and effective communication.
+"""
+import logging
+from typing import Any, Optional
+from pydantic import BaseModel, Field
+from coda.core.base_agent import AgentContext, BaseAgent
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+logger = logging.getLogger(__name__)
+class ColorScheme(BaseModel):
+    """Color scheme specification."""
+    primary: str = "#4A90D9"
+    secondary: list[str] = Field(default_factory=lambda: ["#67B7DC", "#A5D6A7"])
+    background: str = "#FFFFFF"
+    text: str = "#333333"
+    accent: Optional[str] = "#FF6B6B"
+class LayoutSpec(BaseModel):
+    """Layout specification."""
+    figure_size: tuple[int, int] = (10, 6)
+    margins: dict[str, float] = Field(default_factory=lambda: {"top": 0.1, "bottom": 0.15, "left": 0.1, "right": 0.1})
+    title_position: str = "top"
+    legend_position: str = "right"
+    grid_visible: bool = True
+class DesignSpec(BaseModel):
+    """Structured output from the Design Explorer."""
+    color_scheme: ColorScheme = Field(default_factory=ColorScheme)
+    layout: LayoutSpec = Field(default_factory=LayoutSpec)
+    typography: dict = Field(
+        default_factory=lambda: {"title_size": 14, "label_size": 12, "tick_size": 10, "font_family": "sans-serif"}
+    )
+    annotations: list[dict] = Field(
+        default_factory=list,
+        description="Text annotations to add"
+    )
+    implementation_guidelines: list[str] = Field(
+        default_factory=list,
+        description="Specific implementation instructions"
+    )
+    quality_metrics: dict = Field(
+        default_factory=lambda: {"readability": "high", "aesthetics": "clean", "clarity": "clear"}
+    )
+    alternatives: list[dict] = Field(
+        default_factory=list,
+        description="Alternative design approaches"
+    )
+    success_indicators: list[str] = Field(
+        default_factory=list,
+        description="How to know if the design is successful"
+    )
+class DesignExplorerAgent(BaseAgent[DesignSpec]):
+    """
+    Generates aesthetic design specifications for visualizations.
+    Focuses on creating visually appealing and effective designs
+    that communicate data insights clearly.
+    """
+    MEMORY_KEY = "design_spec"
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: SharedMemory,
+        name: Optional[str] = None,
+    ) -> None:
+        super().__init__(llm, memory, name or "DesignExplorer")
+    def _get_system_prompt(self) -> str:
+        return """You are a Visualization Design specialist in a data visualization team.
+Your expertise is in creating aesthetically pleasing and effective data visualizations that communicate insights clearly.
+Your responsibilities:
+1. Design harmonious color schemes suitable for the data
+2. Specify optimal layouts for readability
+3. Choose appropriate typography
+4. Plan meaningful annotations
+5. Define quality metrics for evaluation
+6. Consider accessibility and best practices
+Design principles to follow:
+- Use color purposefully, not decoratively
+- Ensure sufficient contrast for readability
+- Maintain consistent visual hierarchy
+- Minimize chart junk and maximize data-ink ratio
+- Consider colorblind-friendly palettes when appropriate
+Always respond with a valid JSON object matching the required schema."""
+    def _build_prompt(self, context: AgentContext) -> str:
+        query_analysis = self._get_from_memory("query_analysis") or {}
+        data_analysis = self._get_from_memory("data_analysis") or {}
+        visual_mapping = self._get_from_memory("visual_mapping") or {}
+        feedback_section = ""
+        if context.feedback:
+            feedback_section = f"""
+Design Feedback (iteration {context.iteration}):
+{context.feedback}
+Please address the feedback in your revised design.
+"""
+        return f"""Create a design specification for the following visualization.
+User Query: {context.query}
+Visualization Type: {visual_mapping.get('chart_type', 'Unknown')}
+Visualization Goals: {visual_mapping.get('visualization_goals', [])}
+Styling Hints: {visual_mapping.get('styling_hints', {})}
+{feedback_section}
+Provide a JSON object containing:
+- color_scheme: {{
+    "primary": "#hex",
+    "secondary": ["#hex", ...],
+    "background": "#hex",
+    "text": "#hex",
+    "accent": "#hex" (optional)
+  }}
+- layout: {{
+    "figure_size": [width, height],
+    "margins": {{"top": 0.1, "bottom": 0.1, "left": 0.1, "right": 0.1}},
+    "title_position": "top|center",
+    "legend_position": "right|bottom|none",
+    "grid_visible": true|false
+  }}
+- typography: {{
+    "title_size": 16,
+    "label_size": 12,
+    "tick_size": 10,
+    "font_family": "sans-serif"
+  }}
+- annotations: List of {{"text": "...", "position": "...", "style": "..."}}
+- implementation_guidelines: Specific instructions for implementation
+- quality_metrics: {{"readability": "...", "aesthetics": "...", "clarity": "..."}}
+- alternatives: Alternative design approaches
+- success_indicators: How to evaluate design success
+JSON Response:"""
+    def _normalize_to_list(self, value: Any, as_dicts: bool = False) -> list:
+        """Normalize a value to a list."""
+        if value is None:
+            return []
+        if isinstance(value, str):
+            if as_dicts:
+                return [{"description": value}]
+            return [value] if value.strip() else []
+        if isinstance(value, list):
+            if as_dicts:
+                return [item if isinstance(item, dict) else {"description": str(item)} for item in value]
+            return [str(item) if not isinstance(item, str) else item for item in value]
+        return []
+    def _parse_response(self, response: str) -> DesignSpec:
+        data = self._extract_json(response)
+        # Normalize list fields
+        data["implementation_guidelines"] = self._normalize_to_list(data.get("implementation_guidelines"))
+        data["success_indicators"] = self._normalize_to_list(data.get("success_indicators"))
+        data["alternatives"] = self._normalize_to_list(data.get("alternatives"), as_dicts=True)
+        data["annotations"] = self._normalize_to_list(data.get("annotations"), as_dicts=True)
+        # Ensure nested models have valid data
+        if "color_scheme" not in data or not isinstance(data["color_scheme"], dict):
+            data["color_scheme"] = {}
+        if "layout" not in data or not isinstance(data["layout"], dict):
+            data["layout"] = {}
+        else:
+            # Sanitize figure_size to prevent crashes (e.g. LLM giving pixels instead of inches)
+            figsize = data["layout"].get("figure_size")
+            if isinstance(figsize, (list, tuple)) and len(figsize) == 2:
+                w, h = figsize
+                # If width > 50, assume pixels and scale down, or just clamp
+                if w > 50 or h > 50:
+                    logger.warning(f"Extremely large figure size detected: {figsize}. Clamping to (12, 8).")
+                    data["layout"]["figure_size"] = (12, 8)
+        if "typography" not in data or not isinstance(data["typography"], dict):
+            data["typography"] = {}
+        if "quality_metrics" not in data or not isinstance(data["quality_metrics"], dict):
+            data["quality_metrics"] = {}
+        return DesignSpec(**data)
+    def _get_output_key(self) -> str:
+        return self.MEMORY_KEY

coda/agents/query_analyzer.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+Query Analyzer Agent for CoDA.
+Interprets natural language queries to extract visualization intent,
+decompose requirements into actionable items, and provide guidance
+for downstream agents.
+"""
+from typing import Optional
+from pydantic import BaseModel, Field
+from coda.core.base_agent import AgentContext, BaseAgent
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+class QueryAnalysis(BaseModel):
+    """Structured output from the Query Analyzer."""
+    visualization_types: list[str] = Field(
+        description="Suggested visualization types (e.g., line chart, bar chart)"
+    )
+    key_points: list[str] = Field(
+        description="Key data points or aspects to visualize"
+    )
+    todo_list: list[str] = Field(
+        description="Decomposed list of tasks for visualization creation"
+    )
+    data_requirements: list[str] = Field(
+        description="Required data columns or features"
+    )
+    constraints: list[str] = Field(
+        default_factory=list,
+        description="Any constraints mentioned in the query"
+    )
+    ambiguities: list[str] = Field(
+        default_factory=list,
+        description="Ambiguous aspects that may need clarification"
+    )
+class QueryAnalyzerAgent(BaseAgent[QueryAnalysis]):
+    """
+    Analyzes natural language queries to extract visualization intent.
+    This agent is the first in the pipeline, responsible for understanding
+    what the user wants to visualize and breaking it down into actionable steps.
+    """
+    MEMORY_KEY = "query_analysis"
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: SharedMemory,
+        name: Optional[str] = None,
+    ) -> None:
+        super().__init__(llm, memory, name or "QueryAnalyzer")
+    def _get_system_prompt(self) -> str:
+        return """You are a Query Analyzer specialist in a data visualization team.
+Your expertise lies in interpreting natural language requests and extracting clear, actionable requirements for creating visualizations.
+Your responsibilities:
+1. Identify the type(s) of visualizations that best suit the request
+2. Extract key data points and features to be visualized
+3. Decompose the request into a clear TODO list for the visualization pipeline
+4. Identify required data columns or features
+5. Note any constraints or preferences mentioned
+6. Flag ambiguities that might affect the visualization
+Always respond with a valid JSON object matching the required schema."""
+    def _build_prompt(self, context: AgentContext) -> str:
+        metadata = self._get_from_memory("metadata_summary")
+        metadata_section = ""
+        if metadata:
+            metadata_section = f"""
+Available Metadata:
+{metadata}
+"""
+        feedback_section = ""
+        if context.feedback:
+            feedback_section = f"""
+Previous Feedback (iteration {context.iteration}):
+{context.feedback}
+"""
+        return f"""Analyze the following visualization query and extract structured requirements.
+Query: {context.query}
+{metadata_section}{feedback_section}
+Respond with a JSON object containing:
+- visualization_types: List of suggested chart types
+- key_points: Key aspects or data points to highlight
+- todo_list: Step-by-step tasks for creating the visualization
+- data_requirements: Required data columns or features
+- constraints: Any mentioned constraints or preferences
+- ambiguities: Unclear aspects that may need clarification
+JSON Response:"""
+    def _parse_response(self, response: str) -> QueryAnalysis:
+        data = self._extract_json(response)
+        return QueryAnalysis(**data)
+    def _get_output_key(self) -> str:
+        return self.MEMORY_KEY

coda/agents/search_agent.py ADDED Viewed

	@@ -0,0 +1,295 @@

+"""
+Search Agent for CoDA.
+Retrieves relevant code examples and patterns from a knowledge base
+to assist code generation with proven implementations.
+"""
+from typing import Optional
+from pydantic import BaseModel, Field
+from coda.core.base_agent import AgentContext, BaseAgent
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+class CodeExample(BaseModel):
+    """A retrieved code example."""
+    title: str = ""
+    description: str = ""
+    code: str = ""
+    library: str = "matplotlib"
+    relevance_score: float = 0.5
+class SearchResult(BaseModel):
+    """Structured output from the Search Agent."""
+    search_queries: list[str] = Field(
+        default_factory=list,
+        description="Queries used to find examples"
+    )
+    examples: list[CodeExample] = Field(
+        default_factory=list,
+        description="Retrieved code examples"
+    )
+    recommended_libraries: list[str] = Field(
+        default_factory=list,
+        description="Recommended visualization libraries"
+    )
+    implementation_notes: list[str] = Field(
+        default_factory=list,
+        description="Notes on implementing the visualization"
+    )
+# Built-in code examples for common visualization patterns
+CODE_EXAMPLES_DB: dict[str, list[dict]] = {
+    "line": [
+        {
+            "title": "Basic Line Chart",
+            "description": "Simple line chart with matplotlib",
+            "code": """import matplotlib.pyplot as plt
+fig, ax = plt.subplots(figsize=(10, 6))
+ax.plot(x_data, y_data, marker='o', linewidth=2)
+ax.set_xlabel('X Label')
+ax.set_ylabel('Y Label')
+ax.set_title('Line Chart Title')
+ax.grid(True, alpha=0.3)
+plt.tight_layout()
+plt.savefig('output.png', dpi=150, bbox_inches='tight')""",
+            "library": "matplotlib",
+            "relevance_score": 0.9,
+        },
+        {
+            "title": "Multi-line Chart with Legend",
+            "description": "Multiple lines with different colors and legend",
+            "code": """import matplotlib.pyplot as plt
+fig, ax = plt.subplots(figsize=(10, 6))
+for label, data in grouped_data.items():
+    ax.plot(data['x'], data['y'], label=label, marker='o')
+ax.set_xlabel('X Label')
+ax.set_ylabel('Y Label')
+ax.set_title('Multi-line Chart')
+ax.legend(loc='best')
+ax.grid(True, alpha=0.3)
+plt.tight_layout()
+plt.savefig('output.png', dpi=150, bbox_inches='tight')""",
+            "library": "matplotlib",
+            "relevance_score": 0.85,
+        },
+    ],
+    "bar": [
+        {
+            "title": "Basic Bar Chart",
+            "description": "Vertical bar chart with matplotlib",
+            "code": """import matplotlib.pyplot as plt
+fig, ax = plt.subplots(figsize=(10, 6))
+bars = ax.bar(categories, values, color='steelblue', edgecolor='black')
+ax.set_xlabel('Category')
+ax.set_ylabel('Value')
+ax.set_title('Bar Chart Title')
+ax.bar_label(bars, fmt='%.1f')
+plt.xticks(rotation=45, ha='right')
+plt.tight_layout()
+plt.savefig('output.png', dpi=150, bbox_inches='tight')""",
+            "library": "matplotlib",
+            "relevance_score": 0.9,
+        },
+        {
+            "title": "Grouped Bar Chart",
+            "description": "Side-by-side bars for comparison",
+            "code": """import matplotlib.pyplot as plt
+import numpy as np
+x = np.arange(len(categories))
+width = 0.35
+fig, ax = plt.subplots(figsize=(10, 6))
+bars1 = ax.bar(x - width/2, values1, width, label='Group 1')
+bars2 = ax.bar(x + width/2, values2, width, label='Group 2')
+ax.set_xlabel('Category')
+ax.set_ylabel('Value')
+ax.set_title('Grouped Bar Chart')
+ax.set_xticks(x)
+ax.set_xticklabels(categories)
+ax.legend()
+plt.tight_layout()
+plt.savefig('output.png', dpi=150, bbox_inches='tight')""",
+            "library": "matplotlib",
+            "relevance_score": 0.85,
+        },
+    ],
+    "scatter": [
+        {
+            "title": "Basic Scatter Plot",
+            "description": "Scatter plot with optional color encoding",
+            "code": """import matplotlib.pyplot as plt
+fig, ax = plt.subplots(figsize=(10, 6))
+scatter = ax.scatter(x_data, y_data, c=color_data, s=50, alpha=0.7, cmap='viridis')
+ax.set_xlabel('X Label')
+ax.set_ylabel('Y Label')
+ax.set_title('Scatter Plot Title')
+plt.colorbar(scatter, label='Color Label')
+plt.tight_layout()
+plt.savefig('output.png', dpi=150, bbox_inches='tight')""",
+            "library": "matplotlib",
+            "relevance_score": 0.9,
+        },
+    ],
+    "pie": [
+        {
+            "title": "Pie Chart",
+            "description": "Pie chart with percentages",
+            "code": """import matplotlib.pyplot as plt
+fig, ax = plt.subplots(figsize=(10, 8))
+wedges, texts, autotexts = ax.pie(
+    values, labels=labels, autopct='%1.1f%%',
+    startangle=90, colors=plt.cm.Pastel1.colors
+)
+ax.set_title('Pie Chart Title')
+plt.tight_layout()
+plt.savefig('output.png', dpi=150, bbox_inches='tight')""",
+            "library": "matplotlib",
+            "relevance_score": 0.9,
+        },
+    ],
+    "heatmap": [
+        {
+            "title": "Heatmap with Seaborn",
+            "description": "Correlation or matrix heatmap",
+            "code": """import matplotlib.pyplot as plt
+import seaborn as sns
+fig, ax = plt.subplots(figsize=(12, 8))
+sns.heatmap(data_matrix, annot=True, fmt='.2f', cmap='coolwarm', ax=ax)
+ax.set_title('Heatmap Title')
+plt.tight_layout()
+plt.savefig('output.png', dpi=150, bbox_inches='tight')""",
+            "library": "seaborn",
+            "relevance_score": 0.9,
+        },
+    ],
+    "histogram": [
+        {
+            "title": "Histogram",
+            "description": "Distribution histogram with optional KDE",
+            "code": """import matplotlib.pyplot as plt
+import seaborn as sns
+fig, ax = plt.subplots(figsize=(10, 6))
+sns.histplot(data, kde=True, ax=ax, color='steelblue')
+ax.set_xlabel('Value')
+ax.set_ylabel('Frequency')
+ax.set_title('Histogram Title')
+plt.tight_layout()
+plt.savefig('output.png', dpi=150, bbox_inches='tight')""",
+            "library": "seaborn",
+            "relevance_score": 0.9,
+        },
+    ],
+}
+class SearchAgent(BaseAgent[SearchResult]):
+    """
+    Searches for relevant code examples to guide code generation.
+    Uses a built-in knowledge base of visualization patterns
+    and can be extended to search external resources.
+    """
+    MEMORY_KEY = "search_results"
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: SharedMemory,
+        name: Optional[str] = None,
+    ) -> None:
+        super().__init__(llm, memory, name or "SearchAgent")
+    def _get_system_prompt(self) -> str:
+        return """You are a Code Search specialist in a data visualization team.
+Your expertise is in finding and recommending relevant code examples and patterns for visualization implementation.
+Your responsibilities:
+1. Formulate effective search queries based on requirements
+2. Select the most relevant examples from available patterns
+3. Recommend appropriate libraries for the task
+4. Provide implementation guidance
+Consider the specific chart type, data characteristics, and styling requirements when selecting examples.
+Always respond with a valid JSON object matching the required schema."""
+    def _build_prompt(self, context: AgentContext) -> str:
+        visual_mapping = self._get_from_memory("visual_mapping") or {}
+        query_analysis = self._get_from_memory("query_analysis") or {}
+        chart_type = visual_mapping.get("chart_type", "")
+        chart_subtype = visual_mapping.get("chart_subtype", "")
+        styling_hints = visual_mapping.get("styling_hints", {})
+        available_examples = list(CODE_EXAMPLES_DB.keys())
+        return f"""Find relevant code examples for the following visualization requirements.
+User Query: {context.query}
+Visualization Mapping:
+- Chart Type: {chart_type}
+- Chart Subtype: {chart_subtype}
+- Styling: {styling_hints}
+- Goals: {visual_mapping.get('visualization_goals', [])}
+Available Example Categories: {available_examples}
+Provide a JSON object containing:
+- search_queries: List of search queries you would use
+- examples: List of relevant examples (select from available categories)
+- recommended_libraries: Libraries best suited for this visualization
+- implementation_notes: Tips for implementing this specific visualization
+For examples, include:
+- title, description, code (from your knowledge of matplotlib/seaborn)
+- library used
+- relevance_score (0.0 to 1.0)
+JSON Response:"""
+    def _parse_response(self, response: str) -> SearchResult:
+        data = self._extract_json(response)
+        # Normalize examples to ensure required fields have defaults
+        if "examples" in data and isinstance(data["examples"], list):
+            normalized_examples = []
+            for ex in data["examples"]:
+                if isinstance(ex, dict):
+                    ex.setdefault("library", "matplotlib")
+                    ex.setdefault("title", "")
+                    ex.setdefault("description", "")
+                    ex.setdefault("code", "")
+                    ex.setdefault("relevance_score", 0.5)
+                    normalized_examples.append(ex)
+            data["examples"] = normalized_examples
+        visual_mapping = self._get_from_memory("visual_mapping") or {}
+        chart_type = visual_mapping.get("chart_type", "line").lower()
+        # Use built-in examples if LLM didn't provide any
+        if chart_type in CODE_EXAMPLES_DB and not data.get("examples"):
+            data["examples"] = CODE_EXAMPLES_DB[chart_type]
+        return SearchResult(**data)
+    def _get_output_key(self) -> str:
+        return self.MEMORY_KEY

coda/agents/visual_evaluator.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""
+Visual Evaluator Agent for CoDA.
+Assesses generated visualizations across multiple quality dimensions
+using multimodal LLM capabilities to analyze the output image.
+"""
+import logging
+from pathlib import Path
+from typing import Optional
+from pydantic import BaseModel, Field
+from coda.core.base_agent import AgentContext, BaseAgent
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+logger = logging.getLogger(__name__)
+class QualityScores(BaseModel):
+    """Quality scores for different dimensions."""
+    overall: float = Field(ge=0, le=10, description="Overall quality score")
+    readability: float = Field(ge=0, le=10, description="How easy to read and understand")
+    accuracy: float = Field(ge=0, le=10, description="How accurately it represents the data")
+    aesthetics: float = Field(ge=0, le=10, description="Visual appeal and design quality")
+    layout: float = Field(ge=0, le=10, description="Layout and spacing quality")
+    correctness: float = Field(ge=0, le=10, description="Technical correctness")
+class VisualEvaluation(BaseModel):
+    """Structured output from the Visual Evaluator."""
+    scores: QualityScores = Field(default_factory=lambda: QualityScores(
+        overall=5.0, readability=5.0, accuracy=5.0, aesthetics=5.0, layout=5.0, correctness=5.0
+    ))
+    strengths: list[str] = Field(default_factory=list)
+    issues: list[str] = Field(default_factory=list)
+    priority_fixes: list[str] = Field(default_factory=list)
+    todo_completion: dict[str, bool] = Field(default_factory=dict)
+    recommendations: list[str] = Field(default_factory=list)
+    passes_threshold: bool = Field(default=False)
+class VisualEvaluatorAgent(BaseAgent[VisualEvaluation]):
+    """
+    Evaluates visualization quality using multimodal analysis.
+    Analyzes the output image against the original requirements
+    and provides detailed feedback for iterative refinement.
+    """
+    MEMORY_KEY = "visual_evaluation"
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: SharedMemory,
+        min_overall_score: float = 7.0,
+        name: Optional[str] = None,
+    ) -> None:
+        super().__init__(llm, memory, name or "VisualEvaluator")
+        self._min_score = min_overall_score
+    def execute(self, context: AgentContext) -> VisualEvaluation:
+        """Execute visual evaluation using the vision model."""
+        logger.info(f"[{self._name}] Evaluating visualization quality")
+        execution_result = self._get_from_memory("execution_result")
+        if not execution_result or not execution_result.get("success"):
+            return VisualEvaluation(
+                scores=QualityScores(
+                    overall=0, readability=0, accuracy=0,
+                    aesthetics=0, layout=0, correctness=0
+                ),
+                strengths=[],
+                issues=["Visualization generation failed"],
+                priority_fixes=["Fix code execution errors"],
+                todo_completion={},
+                recommendations=["Debug and fix code errors first"],
+                passes_threshold=False,
+            )
+        output_file = execution_result.get("output_file")
+        if not output_file or not Path(output_file).exists():
+            return VisualEvaluation(
+                scores=QualityScores(
+                    overall=0, readability=0, accuracy=0,
+                    aesthetics=0, layout=0, correctness=0
+                ),
+                strengths=[],
+                issues=["Output file not found"],
+                priority_fixes=["Ensure code saves output correctly"],
+                todo_completion={},
+                recommendations=["Check savefig call in code"],
+                passes_threshold=False,
+            )
+        prompt = self._build_evaluation_prompt(context)
+        system_prompt = self._get_system_prompt()
+        try:
+            response = self._llm.complete_with_image(
+                prompt=prompt,
+                image_path=output_file,
+                system_prompt=system_prompt,
+            )
+            result = self._parse_response(response.content)
+            self._store_result(result)
+            logger.info(
+                f"[{self._name}] Evaluation complete: "
+                f"overall={result.scores.overall}, passes={result.passes_threshold}"
+            )
+            return result
+        except Exception as e:
+            logger.error(f"[{self._name}] Evaluation failed: {e}")
+            # Return a fallback evaluation instead of crashing
+            fallback = VisualEvaluation(
+                scores=QualityScores(
+                    overall=5.0, readability=5.0, accuracy=5.0,
+                    aesthetics=5.0, layout=5.0, correctness=5.0
+                ),
+                strengths=["Backup evaluation (parsing failed)"],
+                issues=[f"Evaluation parsing error: {str(e)}"],
+                priority_fixes=[],
+                todo_completion={},
+                recommendations=[],
+                passes_threshold=False
+            )
+            self._store_result(fallback)
+            return fallback
+    def _get_system_prompt(self) -> str:
+        return """You are a Visualization Quality Evaluator specialist.
+Your expertise is in assessing data visualizations for quality, effectiveness, and adherence to best practices.
+Evaluate visualizations on these dimensions:
+1. Readability: Clear labels, appropriate font sizes, uncluttered design
+2. Accuracy: Correct representation of data, appropriate scales
+3. Aesthetics: Visual appeal, harmonious colors, professional appearance
+4. Layout: Good use of space, proper alignment, balanced composition
+5. Correctness: Technically correct chart type, proper axis handling
+Be rigorous but fair in your assessment. Provide specific, actionable feedback.
+Always respond with a valid JSON object matching the required schema."""
+    def _build_evaluation_prompt(self, context: AgentContext) -> str:
+        query_analysis = self._get_from_memory("query_analysis") or {}
+        visual_mapping = self._get_from_memory("visual_mapping") or {}
+        design_spec = self._get_from_memory("design_spec") or {}
+        todo_list = query_analysis.get("todo_list", [])
+        return f"""Evaluate this visualization against the original requirements.
+Original Query: {context.query}
+Requirements:
+- Visualization Type: {visual_mapping.get('chart_type', 'Unknown')}
+- Goals: {visual_mapping.get('visualization_goals', [])}
+- TODO Items: {todo_list}
+Design Specifications:
+- Color Scheme: {design_spec.get('color_scheme', {})}
+- Success Indicators: {design_spec.get('success_indicators', [])}
+Evaluate the visualization image and provide a JSON response with:
+- scores: {{
+    "overall": 0-10,
+    "readability": 0-10,
+    "accuracy": 0-10,
+    "aesthetics": 0-10,
+    "layout": 0-10,
+    "correctness": 0-10
+  }}
+- strengths: List of positive aspects
+- issues: List of problems found
+- priority_fixes: Most important fixes (max 3)
+- todo_completion: {{"todo_item": true/false}} for each TODO
+- recommendations: Improvement suggestions
+- passes_threshold: true if overall >= {self._min_score}
+JSON Response:"""
+    def _build_prompt(self, context: AgentContext) -> str:
+        return self._build_evaluation_prompt(context)
+    def _parse_response(self, response: str) -> VisualEvaluation:
+        data = self._extract_json(response)
+        # Ensure scores exists and is properly formatted
+        scores_data = data.get("scores", {})
+        if isinstance(scores_data, dict):
+            # Ensure all required fields have defaults
+            scores_data.setdefault("overall", 5.0)
+            scores_data.setdefault("readability", 5.0)
+            scores_data.setdefault("accuracy", 5.0)
+            scores_data.setdefault("aesthetics", 5.0)
+            scores_data.setdefault("layout", 5.0)
+            scores_data.setdefault("correctness", 5.0)
+            data["scores"] = QualityScores(**scores_data)
+        # Ensure list fields are lists
+        for field in ["strengths", "issues", "priority_fixes", "recommendations"]:
+            if field not in data or not isinstance(data[field], list):
+                data[field] = [data[field]] if isinstance(data.get(field), str) else []
+        # Ensure todo_completion is a dict
+        if not isinstance(data.get("todo_completion"), dict):
+            data["todo_completion"] = {}
+        # Calculate passes_threshold if not provided
+        if "passes_threshold" not in data:
+            overall = data.get("scores")
+            if isinstance(overall, QualityScores):
+                data["passes_threshold"] = overall.overall >= self._min_score
+            else:
+                data["passes_threshold"] = False
+        return VisualEvaluation(**data)
+    def _get_output_key(self) -> str:
+        return self.MEMORY_KEY

coda/agents/viz_mapping.py ADDED Viewed

	@@ -0,0 +1,164 @@

+"""
+VizMapping Agent for CoDA.
+Maps query semantics and data characteristics to visualization primitives,
+selecting appropriate chart types and defining data-to-visual bindings.
+"""
+from typing import Optional
+from pydantic import BaseModel, Field
+from coda.core.base_agent import AgentContext, BaseAgent
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+class DataTransformation(BaseModel):
+    """A data transformation step."""
+    operation: str
+    columns: list[str]
+    parameters: dict
+class VisualMapping(BaseModel):
+    """Structured output from the VizMapping Agent."""
+    chart_type: str = Field(
+        description="Primary chart type (e.g., line, bar, scatter)"
+    )
+    chart_subtype: Optional[str] = Field(
+        default=None,
+        description="Chart subtype if applicable (e.g., stacked, grouped)"
+    )
+    x_axis: Optional[dict] = Field(
+        default=None,
+        description="X-axis configuration (column, label, type)"
+    )
+    y_axis: Optional[dict] = Field(
+        default=None,
+        description="Y-axis configuration (column, label, type)"
+    )
+    color_encoding: Optional[dict] = Field(
+        default=None,
+        description="Color encoding configuration"
+    )
+    size_encoding: Optional[dict] = Field(
+        default=None,
+        description="Size encoding for scatter plots"
+    )
+    transformations: list[dict] = Field(
+        default_factory=list,
+        description="Data transformations to apply"
+    )
+    styling_hints: dict = Field(
+        default_factory=dict,
+        description="Visual styling recommendations"
+    )
+    visualization_goals: list[str] = Field(
+        description="High-level goals for the visualization"
+    )
+    rationale: str = Field(
+        description="Explanation for the chosen visualization approach"
+    )
+class VizMappingAgent(BaseAgent[VisualMapping]):
+    """
+    Maps query semantics to visualization specifications.
+    Bridges the gap between data analysis and code generation by
+    defining exactly how data should be visualized.
+    """
+    MEMORY_KEY = "visual_mapping"
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: SharedMemory,
+        name: Optional[str] = None,
+    ) -> None:
+        super().__init__(llm, memory, name or "VizMapping")
+    def _get_system_prompt(self) -> str:
+        return """You are a Visualization Mapping specialist in a data visualization team.
+Your expertise is in translating data analysis requirements into concrete visualization specifications that can be implemented in code.
+Your responsibilities:
+1. Select the optimal chart type based on data and query requirements
+2. Define data-to-visual mappings (axes, colors, sizes)
+3. Specify required data transformations
+4. Provide styling hints for aesthetics
+5. Document the rationale for visualization choices
+Consider:
+- Data types when choosing encodings (categorical vs numerical)
+- Query intent when selecting chart types
+- Readability and best practices in visualization design
+Always respond with a valid JSON object matching the required schema."""
+    def _build_prompt(self, context: AgentContext) -> str:
+        query_analysis = self._get_from_memory("query_analysis") or {}
+        data_analysis = self._get_from_memory("data_analysis") or {}
+        query_section = ""
+        if query_analysis:
+            query_section = f"""
+Query Analysis:
+- Suggested Types: {query_analysis.get('visualization_types', [])}
+- Key Points: {query_analysis.get('key_points', [])}
+- Data Requirements: {query_analysis.get('data_requirements', [])}
+"""
+        data_section = ""
+        if data_analysis:
+            files = data_analysis.get('files', [])
+            if files:
+                columns_info = []
+                for f in files:
+                    for col in f.get('columns', []):
+                        columns_info.append(f"  - {col['name']} ({col['dtype']})")
+                data_section = f"""
+Available Data:
+- Columns:
+{chr(10).join(columns_info)}
+- Insights: {data_analysis.get('insights', [])}
+- Suggested Aggregations: {data_analysis.get('aggregations_needed', [])}
+"""
+        feedback_section = ""
+        if context.feedback:
+            feedback_section = f"""
+Refinement Feedback (iteration {context.iteration}):
+{context.feedback}
+"""
+        return f"""Create a visualization mapping for the following query.
+User Query: {context.query}
+{query_section}{data_section}{feedback_section}
+Provide a JSON object containing:
+- chart_type: Primary chart type (line, bar, scatter, pie, heatmap, etc.)
+- chart_subtype: Optional subtype (stacked, grouped, etc.)
+- x_axis: {{"column": "...", "label": "...", "type": "categorical|numerical|temporal"}}
+- y_axis: {{"column": "...", "label": "...", "type": "numerical"}}
+- color_encoding: Optional color mapping {{"column": "...", "palette": "..."}}
+- size_encoding: Optional size mapping for scatter {{"column": "..."}}
+- transformations: List of {{"operation": "...", "columns": [...], "parameters": {{}}}}
+- styling_hints: {{"theme": "...", "annotations": [...], "legend_position": "..."}}
+- visualization_goals: List of high-level goals
+- rationale: Brief explanation of choices
+JSON Response:"""
+    def _parse_response(self, response: str) -> VisualMapping:
+        data = self._extract_json(response)
+        return VisualMapping(**data)
+    def _get_output_key(self) -> str:
+        return self.MEMORY_KEY

coda/config.py ADDED Viewed

	@@ -0,0 +1,91 @@

+"""
+Configuration management for CoDA.
+Centralizes all configuration values including API keys, model settings,
+quality thresholds, and execution parameters.
+"""
+import os
+from dataclasses import dataclass, field
+from typing import Optional
+from dotenv import load_dotenv
+load_dotenv()
+@dataclass(frozen=True)
+class ModelConfig:
+    """Configuration for LLM models."""
+    default_model: str = "llama-3.3-70b-versatile"
+    vision_model: str = "meta-llama/llama-4-maverick-17b-128e-instruct"
+    temperature: float = 0.7
+    max_tokens: int = 4096
+    max_retries: int = 3
+    retry_delay: float = 1.0
+@dataclass(frozen=True)
+class QualityThresholds:
+    """Quality score thresholds for the feedback loop."""
+    minimum_overall_score: float = 7.0
+    minimum_readability_score: float = 6.0
+    minimum_accuracy_score: float = 7.0
+    minimum_aesthetics_score: float = 6.0
+@dataclass(frozen=True)
+class ExecutionConfig:
+    """Configuration for code execution."""
+    code_timeout_seconds: int = 60
+    max_refinement_iterations: int = 3
+    output_directory: str = "outputs"
+@dataclass
+class Config:
+    """Main configuration container for CoDA."""
+    groq_api_key: str = field(default_factory=lambda: os.getenv("GROQ_API_KEY", ""))
+    model: ModelConfig = field(default_factory=ModelConfig)
+    quality: QualityThresholds = field(default_factory=QualityThresholds)
+    execution: ExecutionConfig = field(default_factory=ExecutionConfig)
+    def __post_init__(self) -> None:
+        if not self.groq_api_key:
+            raise ValueError(
+                "GROQ_API_KEY environment variable is required. "
+                "Get your API key at https://console.groq.com"
+            )
+    @classmethod
+    def from_env(cls) -> "Config":
+        """Create configuration from environment variables."""
+        return cls(
+            groq_api_key=os.getenv("GROQ_API_KEY", ""),
+            model=ModelConfig(
+                default_model=os.getenv("CODA_DEFAULT_MODEL", "llama-3.3-70b-versatile"),
+                vision_model=os.getenv("CODA_VISION_MODEL", "meta-llama/llama-4-maverick-17b-128e-instruct"),
+                temperature=float(os.getenv("CODA_TEMPERATURE", "0.7")),
+                max_tokens=int(os.getenv("CODA_MAX_TOKENS", "4096")),
+            ),
+            quality=QualityThresholds(
+                minimum_overall_score=float(os.getenv("CODA_MIN_OVERALL_SCORE", "7.0")),
+                minimum_readability_score=float(os.getenv("CODA_MIN_READABILITY_SCORE", "6.0")),
+                minimum_accuracy_score=float(os.getenv("CODA_MIN_ACCURACY_SCORE", "7.0")),
+                minimum_aesthetics_score=float(os.getenv("CODA_MIN_AESTHETICS_SCORE", "6.0")),
+            ),
+            execution=ExecutionConfig(
+                code_timeout_seconds=int(os.getenv("CODA_CODE_TIMEOUT", "60")),
+                max_refinement_iterations=int(os.getenv("CODA_MAX_ITERATIONS", "3")),
+                output_directory=os.getenv("CODA_OUTPUT_DIR", "outputs"),
+            ),
+        )
+def get_config() -> Config:
+    """Get the application configuration."""
+    return Config.from_env()

coda/core/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+"""Core module for CoDA - contains LLM, memory, and base agent abstractions."""
+from coda.core.llm import LLMProvider, GroqLLM, LLMResponse
+from coda.core.memory import SharedMemory, MemoryEntry
+from coda.core.base_agent import BaseAgent, AgentContext
+from coda.core.agent_factory import AgentFactory
+__all__ = [
+    "LLMProvider",
+    "GroqLLM",
+    "LLMResponse",
+    "SharedMemory",
+    "MemoryEntry",
+    "BaseAgent",
+    "AgentContext",
+    "AgentFactory",
+]

coda/core/agent_factory.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""
+Agent Factory for CoDA.
+Provides factory methods for creating and configuring agents,
+enabling flexible agent composition and testing.
+"""
+from typing import Optional
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+from coda.core.base_agent import BaseAgent
+from coda.agents.query_analyzer import QueryAnalyzerAgent
+from coda.agents.data_processor import DataProcessorAgent
+from coda.agents.viz_mapping import VizMappingAgent
+from coda.agents.search_agent import SearchAgent
+from coda.agents.design_explorer import DesignExplorerAgent
+from coda.agents.code_generator import CodeGeneratorAgent
+from coda.agents.debug_agent import DebugAgent
+from coda.agents.visual_evaluator import VisualEvaluatorAgent
+class AgentFactory:
+    """
+    Factory for creating CoDA agents with shared dependencies.
+    Centralizes agent creation and configuration, making it easy to
+    swap implementations or configure agents for testing.
+    """
+    AGENT_TYPES = {
+        "query_analyzer": QueryAnalyzerAgent,
+        "data_processor": DataProcessorAgent,
+        "viz_mapping": VizMappingAgent,
+        "search_agent": SearchAgent,
+        "design_explorer": DesignExplorerAgent,
+        "code_generator": CodeGeneratorAgent,
+        "debug_agent": DebugAgent,
+        "visual_evaluator": VisualEvaluatorAgent,
+    }
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: Optional[SharedMemory] = None,
+    ) -> None:
+        self._llm = llm
+        self._memory = memory or SharedMemory()
+    @property
+    def memory(self) -> SharedMemory:
+        """Get the shared memory instance."""
+        return self._memory
+    def create(
+        self,
+        agent_type: str,
+        **kwargs,
+    ) -> BaseAgent:
+        """
+        Create an agent by type name.
+        Args:
+            agent_type: Name of the agent type to create
+            **kwargs: Additional arguments passed to the agent constructor
+        Returns:
+            Configured agent instance
+        Raises:
+            ValueError: If agent_type is not recognized
+        """
+        if agent_type not in self.AGENT_TYPES:
+            raise ValueError(
+                f"Unknown agent type: {agent_type}. "
+                f"Available types: {list(self.AGENT_TYPES.keys())}"
+            )
+        agent_class = self.AGENT_TYPES[agent_type]
+        return agent_class(
+            llm=self._llm,
+            memory=self._memory,
+            **kwargs,
+        )
+    def create_all(self, **agent_kwargs) -> dict[str, BaseAgent]:
+        """
+        Create all available agent types.
+        Args:
+            **agent_kwargs: Arguments to pass to each agent constructor
+        Returns:
+            Dictionary mapping agent type names to instances
+        """
+        return {
+            agent_type: self.create(agent_type, **agent_kwargs.get(agent_type, {}))
+            for agent_type in self.AGENT_TYPES
+        }
+    def create_pipeline_agents(
+        self,
+        code_timeout: int = 60,
+        output_directory: str = "outputs",
+        min_quality_score: float = 7.0,
+    ) -> dict[str, BaseAgent]:
+        """
+        Create agents configured for the standard visualization pipeline.
+        Args:
+            code_timeout: Timeout for code execution in seconds
+            output_directory: Directory for output files
+            min_quality_score: Minimum quality score threshold
+        Returns:
+            Dictionary of configured agents for the pipeline
+        """
+        return {
+            "query_analyzer": self.create("query_analyzer"),
+            "data_processor": self.create("data_processor"),
+            "viz_mapping": self.create("viz_mapping"),
+            "search_agent": self.create("search_agent"),
+            "design_explorer": self.create("design_explorer"),
+            "code_generator": self.create("code_generator"),
+            "debug_agent": self.create(
+                "debug_agent",
+                timeout_seconds=code_timeout,
+                output_directory=output_directory,
+            ),
+            "visual_evaluator": self.create(
+                "visual_evaluator",
+                min_overall_score=min_quality_score,
+            ),
+        }

coda/core/base_agent.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+Base agent interface for CoDA.
+Defines the contract that all specialized agents must implement,
+providing common functionality for LLM interaction and memory access.
+"""
+import json
+import logging
+import re
+from abc import ABC, abstractmethod
+from typing import Any, Optional, TypeVar, Generic
+from pydantic import BaseModel
+from coda.core.llm import LLMProvider
+from coda.core.memory import SharedMemory
+logger = logging.getLogger(__name__)
+T = TypeVar("T", bound=BaseModel)
+class AgentContext(BaseModel):
+    """Context passed to an agent during execution."""
+    query: str
+    data_paths: list[str] = []
+    iteration: int = 0
+    feedback: Optional[str] = None
+class BaseAgent(ABC, Generic[T]):
+    """
+    Abstract base class for all CoDA agents.
+    Each agent specializes in a specific aspect of the visualization pipeline.
+    Agents communicate through shared memory and use an LLM for reasoning.
+    """
+    def __init__(
+        self,
+        llm: LLMProvider,
+        memory: SharedMemory,
+        name: Optional[str] = None,
+    ) -> None:
+        self._llm = llm
+        self._memory = memory
+        self._name = name or self.__class__.__name__
+    @property
+    def name(self) -> str:
+        """Get the agent's name."""
+        return self._name
+    def execute(self, context: AgentContext) -> T:
+        """
+        Execute the agent's task.
+        Args:
+            context: The execution context containing query and data info
+        Returns:
+            The agent's structured output
+        """
+        logger.info(f"[{self._name}] Starting execution")
+        prompt = self._build_prompt(context)
+        system_prompt = self._get_system_prompt()
+        response = self._llm.complete(
+            prompt=prompt,
+            system_prompt=system_prompt,
+        )
+        result = self._parse_response(response.content)
+        self._store_result(result)
+        logger.info(f"[{self._name}] Execution complete")
+        return result
+    @abstractmethod
+    def _build_prompt(self, context: AgentContext) -> str:
+        """
+        Build the prompt for the LLM.
+        Args:
+            context: The execution context
+        Returns:
+            The formatted prompt string
+        """
+        pass
+    @abstractmethod
+    def _get_system_prompt(self) -> str:
+        """
+        Get the system prompt defining the agent's persona.
+        Returns:
+            The system prompt string
+        """
+        pass
+    @abstractmethod
+    def _parse_response(self, response: str) -> T:
+        """
+        Parse the LLM response into a structured output.
+        Args:
+            response: The raw LLM response
+        Returns:
+            The parsed and validated output
+        """
+        pass
+    @abstractmethod
+    def _get_output_key(self) -> str:
+        """
+        Get the key used to store this agent's output in memory.
+        Returns:
+            The memory key string
+        """
+        pass
+    def _store_result(self, result: T) -> None:
+        """Store the agent's result in shared memory."""
+        self._memory.store(
+            key=self._get_output_key(),
+            value=result.model_dump(),
+            agent_name=self._name,
+        )
+    def _get_from_memory(self, key: str) -> Optional[Any]:
+        """Retrieve a value from shared memory."""
+        return self._memory.retrieve(key)
+    def _extract_json(self, text: str) -> dict[str, Any]:
+        """
+        Extract JSON from LLM response text.
+        Handles responses where JSON is wrapped in markdown code blocks
+        and sanitizes control characters that can break JSON parsing.
+        """
+        json_match = re.search(r"```(?:json)?\s*([\s\S]*?)```", text)
+        if json_match:
+            text = json_match.group(1)
+        text = text.strip()
+        try:
+            return json.loads(text)
+        except json.JSONDecodeError:
+            pass
+        # Try to fix unescaped newlines/tabs inside JSON strings
+        # First, find the JSON object boundaries
+        try:
+            obj_match = re.search(r'(\{[\s\S]*\})', text, re.DOTALL)
+            if obj_match:
+                json_text = obj_match.group(1)
+                # Replace problematic control characters (but NOT newlines between key:value pairs)
+                # Only remove NUL and other truly invalid chars
+                json_text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', json_text)
+                try:
+                    return json.loads(json_text)
+                except json.JSONDecodeError:
+                    pass
+                # If still failing, try to properly escape newlines within strings
+                # by parsing character by character
+                fixed = self._fix_json_strings(json_text)
+                return json.loads(fixed)
+        except Exception:
+            pass
+        logger.error(f"Failed to parse JSON after sanitization attempts")
+        logger.debug(f"Raw text: {text[:500]}...")
+        raise ValueError(f"Invalid JSON in response: Could not parse after sanitization")
+    def _fix_json_strings(self, text: str) -> str:
+        """Fix unescaped newlines and control characters inside JSON strings."""
+        result = []
+        in_string = False
+        escape_next = False
+        for char in text:
+            if escape_next:
+                result.append(char)
+                escape_next = False
+                continue
+            if char == '\\':
+                result.append(char)
+                escape_next = True
+                continue
+            if char == '"':
+                in_string = not in_string
+                result.append(char)
+                continue
+            if in_string:
+                # Escape problematic characters inside strings
+                if char == '\n':
+                    result.append('\\n')
+                elif char == '\r':
+                    result.append('\\r')
+                elif char == '\t':
+                    result.append('\\t')
+                else:
+                    result.append(char)
+            else:
+                result.append(char)
+        return ''.join(result)

coda/core/llm.py ADDED Viewed

	@@ -0,0 +1,192 @@

+"""
+LLM abstraction layer for CoDA.
+Provides a clean interface for interacting with language models,
+with ChatGroq as the default implementation.
+"""
+import base64
+import logging
+import time
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any, Optional, Union
+from groq import Groq
+from pydantic import BaseModel
+logger = logging.getLogger(__name__)
+class LLMResponse(BaseModel):
+    """Structured response from an LLM call."""
+    content: str
+    model: str
+    usage: dict[str, int]
+    finish_reason: str
+class LLMProvider(ABC):
+    """Abstract interface for language model providers."""
+    @abstractmethod
+    def complete(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+    ) -> LLMResponse:
+        """Generate a text completion."""
+        pass
+    @abstractmethod
+    def complete_with_image(
+        self,
+        prompt: str,
+        image_path: Union[str, Path],
+        system_prompt: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+    ) -> LLMResponse:
+        """Generate a completion with image input (multimodal)."""
+        pass
+class GroqLLM(LLMProvider):
+    """ChatGroq implementation of the LLM provider."""
+    def __init__(
+        self,
+        api_key: str,
+        default_model: str = "llama-3.3-70b-versatile",
+        vision_model: str = "meta-llama/llama-4-maverick-17b-128e-instruct",
+        temperature: float = 0.7,
+        max_tokens: int = 4096,
+        max_retries: int = 3,
+        retry_delay: float = 1.0,
+    ) -> None:
+        self._client = Groq(api_key=api_key)
+        self._default_model = default_model
+        self._vision_model = vision_model
+        self._temperature = temperature
+        self._max_tokens = max_tokens
+        self._max_retries = max_retries
+        self._retry_delay = retry_delay
+    def complete(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+    ) -> LLMResponse:
+        """Generate a text completion using ChatGroq."""
+        messages = self._build_messages(prompt, system_prompt)
+        return self._call_with_retry(
+            messages=messages,
+            model=self._default_model,
+            temperature=temperature or self._temperature,
+            max_tokens=max_tokens or self._max_tokens,
+        )
+    def complete_with_image(
+        self,
+        prompt: str,
+        image_path: Union[str, Path],
+        system_prompt: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+    ) -> LLMResponse:
+        """Generate a completion with image input using the vision model."""
+        image_data = self._encode_image(image_path)
+        user_content = [
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/png;base64,{image_data}"
+                }
+            },
+            {
+                "type": "text",
+                "text": prompt
+            }
+        ]
+        messages: list[dict[str, Any]] = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": user_content})
+        return self._call_with_retry(
+            messages=messages,
+            model=self._vision_model,
+            temperature=temperature or self._temperature,
+            max_tokens=max_tokens or self._max_tokens,
+        )
+    def _build_messages(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+    ) -> list[dict[str, str]]:
+        """Build the message list for the API call."""
+        messages: list[dict[str, str]] = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+        return messages
+    def _call_with_retry(
+        self,
+        messages: list[dict[str, Any]],
+        model: str,
+        temperature: float,
+        max_tokens: int,
+    ) -> LLMResponse:
+        """Execute API call with exponential backoff retry."""
+        last_exception: Optional[Exception] = None
+        for attempt in range(self._max_retries):
+            try:
+                response = self._client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                )
+                return LLMResponse(
+                    content=response.choices[0].message.content or "",
+                    model=response.model,
+                    usage={
+                        "prompt_tokens": response.usage.prompt_tokens if response.usage else 0,
+                        "completion_tokens": response.usage.completion_tokens if response.usage else 0,
+                        "total_tokens": response.usage.total_tokens if response.usage else 0,
+                    },
+                    finish_reason=response.choices[0].finish_reason or "unknown",
+                )
+            except Exception as e:
+                last_exception = e
+                logger.warning(
+                    f"API call failed (attempt {attempt + 1}/{self._max_retries}): {e}"
+                )
+                if attempt < self._max_retries - 1:
+                    sleep_time = self._retry_delay * (2 ** attempt)
+                    time.sleep(sleep_time)
+        raise RuntimeError(
+            f"API call failed after {self._max_retries} attempts"
+        ) from last_exception
+    def _encode_image(self, image_path: Union[str, Path]) -> str:
+        """Encode an image file to base64."""
+        path = Path(image_path)
+        if not path.exists():
+            raise FileNotFoundError(f"Image not found: {path}")
+        with open(path, "rb") as f:
+            return base64.b64encode(f.read()).decode("utf-8")

coda/core/memory.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+Shared memory buffer for inter-agent communication in CoDA.
+Provides thread-safe storage for agents to exchange context,
+results, and feedback during the visualization pipeline.
+"""
+import threading
+from datetime import datetime
+from typing import Any, Optional
+from pydantic import BaseModel, Field
+class MemoryEntry(BaseModel):
+    """A single entry in the shared memory."""
+    key: str
+    value: Any
+    agent_name: str
+    timestamp: datetime = Field(default_factory=datetime.now)
+    metadata: dict[str, Any] = Field(default_factory=dict)
+class SharedMemory:
+    """
+    Thread-safe shared memory buffer for agent communication.
+    Agents can store and retrieve structured data using string keys.
+    Each entry tracks the source agent and timestamp for debugging.
+    """
+    def __init__(self) -> None:
+        self._storage: dict[str, MemoryEntry] = {}
+        self._lock = threading.RLock()
+        self._history: list[MemoryEntry] = []
+    def store(
+        self,
+        key: str,
+        value: Any,
+        agent_name: str,
+        metadata: Optional[dict[str, Any]] = None,
+    ) -> None:
+        """
+        Store a value in shared memory.
+        Args:
+            key: Unique identifier for the data
+            value: The data to store (should be JSON-serializable)
+            agent_name: Name of the agent storing the data
+            metadata: Optional additional context
+        """
+        entry = MemoryEntry(
+            key=key,
+            value=value,
+            agent_name=agent_name,
+            metadata=metadata or {},
+        )
+        with self._lock:
+            self._storage[key] = entry
+            self._history.append(entry)
+    def retrieve(self, key: str) -> Optional[Any]:
+        """
+        Retrieve a value from shared memory.
+        Args:
+            key: The key to look up
+        Returns:
+            The stored value, or None if not found
+        """
+        with self._lock:
+            entry = self._storage.get(key)
+            return entry.value if entry else None
+    def retrieve_entry(self, key: str) -> Optional[MemoryEntry]:
+        """
+        Retrieve the full memory entry including metadata.
+        Args:
+            key: The key to look up
+        Returns:
+            The full MemoryEntry, or None if not found
+        """
+        with self._lock:
+            return self._storage.get(key)
+    def get_context(self, keys: list[str]) -> dict[str, Any]:
+        """
+        Retrieve multiple values as a context dictionary.
+        Args:
+            keys: List of keys to retrieve
+        Returns:
+            Dictionary mapping keys to their values (missing keys excluded)
+        """
+        with self._lock:
+            return {
+                key: self._storage[key].value
+                for key in keys
+                if key in self._storage
+            }
+    def get_all(self) -> dict[str, Any]:
+        """
+        Retrieve all stored values.
+        Returns:
+            Dictionary mapping all keys to their values
+        """
+        with self._lock:
+            return {key: entry.value for key, entry in self._storage.items()}
+    def get_history(self, agent_name: Optional[str] = None) -> list[MemoryEntry]:
+        """
+        Get the history of all memory operations.
+        Args:
+            agent_name: Optional filter by agent name
+        Returns:
+            List of memory entries in chronological order
+        """
+        with self._lock:
+            if agent_name:
+                return [e for e in self._history if e.agent_name == agent_name]
+            return list(self._history)
+    def has_key(self, key: str) -> bool:
+        """Check if a key exists in memory."""
+        with self._lock:
+            return key in self._storage
+    def clear(self) -> None:
+        """Clear all stored data and history."""
+        with self._lock:
+            self._storage.clear()
+            self._history.clear()
+    def keys(self) -> list[str]:
+        """Get all stored keys."""
+        with self._lock:
+            return list(self._storage.keys())

coda/orchestrator.py ADDED Viewed

	@@ -0,0 +1,293 @@

+"""
+Workflow Orchestrator for CoDA.
+Manages the multi-agent pipeline, coordinating agent execution,
+handling feedback loops, and implementing quality-driven halting.
+"""
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, Callable
+from coda.config import Config, get_config
+from coda.core.llm import GroqLLM, LLMProvider
+from coda.core.memory import SharedMemory
+from coda.core.base_agent import AgentContext
+from coda.agents.query_analyzer import QueryAnalyzerAgent, QueryAnalysis
+from coda.agents.data_processor import DataProcessorAgent, DataAnalysis
+from coda.agents.viz_mapping import VizMappingAgent, VisualMapping
+from coda.agents.search_agent import SearchAgent, SearchResult
+from coda.agents.design_explorer import DesignExplorerAgent, DesignSpec
+from coda.agents.code_generator import CodeGeneratorAgent, GeneratedCode
+from coda.agents.debug_agent import DebugAgent, ExecutionResult
+from coda.agents.visual_evaluator import VisualEvaluatorAgent, VisualEvaluation
+logger = logging.getLogger(__name__)
+@dataclass
+class PipelineResult:
+    """Final result from the CoDA pipeline."""
+    success: bool
+    output_file: Optional[str]
+    evaluation: Optional[VisualEvaluation]
+    iterations: int
+    error: Optional[str] = None
+    @property
+    def scores(self) -> Optional[dict]:
+        """Get quality scores if evaluation exists."""
+        if self.evaluation:
+            return self.evaluation.scores.model_dump()
+        return None
+class CodaOrchestrator:
+    """
+    Orchestrates the CoDA multi-agent visualization pipeline.
+    Coordinates agent execution in sequence, manages the shared memory,
+    and implements iterative refinement through feedback loops.
+    """
+    def __init__(
+        self,
+        config: Optional[Config] = None,
+        llm: Optional[LLMProvider] = None,
+        progress_callback: Optional[Callable[[str, float], None]] = None,
+    ) -> None:
+        self._config = config or get_config()
+        self._llm = llm or self._create_llm()
+        self._memory = SharedMemory()
+        self._progress_callback = progress_callback
+        self._agents = self._create_agents()
+    def _create_llm(self) -> GroqLLM:
+        """Create the LLM instance."""
+        return GroqLLM(
+            api_key=self._config.groq_api_key,
+            default_model=self._config.model.default_model,
+            vision_model=self._config.model.vision_model,
+            temperature=self._config.model.temperature,
+            max_tokens=self._config.model.max_tokens,
+            max_retries=self._config.model.max_retries,
+        )
+    def _create_agents(self) -> dict:
+        """Initialize all agents with shared resources."""
+        return {
+            "query_analyzer": QueryAnalyzerAgent(self._llm, self._memory),
+            "data_processor": DataProcessorAgent(self._llm, self._memory),
+            "viz_mapping": VizMappingAgent(self._llm, self._memory),
+            "search_agent": SearchAgent(self._llm, self._memory),
+            "design_explorer": DesignExplorerAgent(self._llm, self._memory),
+            "code_generator": CodeGeneratorAgent(self._llm, self._memory),
+            "debug_agent": DebugAgent(
+                self._llm,
+                self._memory,
+                timeout_seconds=self._config.execution.code_timeout_seconds,
+                output_directory=self._config.execution.output_directory,
+            ),
+            "visual_evaluator": VisualEvaluatorAgent(
+                self._llm,
+                self._memory,
+                min_overall_score=self._config.quality.minimum_overall_score,
+            ),
+        }
+    def run(
+        self,
+        query: str,
+        data_paths: list[str],
+    ) -> PipelineResult:
+        """
+        Execute the full visualization pipeline.
+        Args:
+            query: Natural language visualization request
+            data_paths: Paths to data files
+        Returns:
+            PipelineResult with output file and evaluation
+        """
+        logger.info(f"Starting CoDA pipeline for query: {query[:50]}...")
+        self._memory.clear()
+        validated_paths = self._validate_data_paths(data_paths)
+        if not validated_paths:
+            return PipelineResult(
+                success=False,
+                output_file=None,
+                evaluation=None,
+                iterations=0,
+                error="No valid data files provided",
+            )
+        context = AgentContext(
+            query=query,
+            data_paths=validated_paths,
+            iteration=0,
+        )
+        try:
+            self._run_initial_pipeline(context)
+        except Exception as e:
+            logger.error(f"Initial pipeline failed: {e}")
+            return PipelineResult(
+                success=False,
+                output_file=None,
+                evaluation=None,
+                iterations=0,
+                error=str(e),
+            )
+        max_iterations = self._config.execution.max_refinement_iterations
+        final_result = self._run_refinement_loop(context, max_iterations)
+        return final_result
+    def _validate_data_paths(self, data_paths: list[str]) -> list[str]:
+        """Validate that data files exist."""
+        valid_paths = []
+        for path in data_paths:
+            if Path(path).exists():
+                valid_paths.append(path)
+            else:
+                logger.warning(f"Data file not found: {path}")
+        return valid_paths
+    def _run_initial_pipeline(self, context: AgentContext) -> None:
+        """Run the initial agent pipeline."""
+        steps = [
+            ("query_analyzer", "Analyzing query...", 0.1),
+            ("data_processor", "Processing data...", 0.2),
+            ("viz_mapping", "Mapping visualization...", 0.3),
+            ("search_agent", "Searching examples...", 0.4),
+            ("design_explorer", "Designing visualization...", 0.5),
+            ("code_generator", "Generating code...", 0.7),
+            ("debug_agent", "Executing code...", 0.85),
+            ("visual_evaluator", "Evaluating output...", 0.95),
+        ]
+        for agent_name, status, progress in steps:
+            self._report_progress(status, progress)
+            agent = self._agents[agent_name]
+            agent.execute(context)
+    def _run_refinement_loop(
+        self,
+        context: AgentContext,
+        max_iterations: int,
+    ) -> PipelineResult:
+        """Run the iterative refinement loop."""
+        for iteration in range(max_iterations):
+            evaluation = self._memory.retrieve("visual_evaluation")
+            if not evaluation:
+                break
+            if isinstance(evaluation, dict):
+                passes = evaluation.get("passes_threshold", False)
+                eval_obj = VisualEvaluation(**evaluation)
+            else:
+                passes = evaluation.passes_threshold
+                eval_obj = evaluation
+            if passes:
+                logger.info(f"Quality threshold met at iteration {iteration}")
+                return self._create_success_result(eval_obj, iteration + 1)
+            if iteration >= max_iterations - 1:
+                logger.info("Max iterations reached")
+                break
+            logger.info(f"Refinement iteration {iteration + 1}")
+            context = self._create_refinement_context(context, eval_obj, iteration + 1)
+            self._report_progress(f"Refining (iteration {iteration + 2})...", 0.5)
+            try:
+                self._run_refinement_agents(context)
+            except Exception as e:
+                logger.error(f"Refinement failed: {e}")
+                break
+        final_eval = self._memory.retrieve("visual_evaluation")
+        if isinstance(final_eval, dict):
+            final_eval = VisualEvaluation(**final_eval)
+        return self._create_success_result(final_eval, max_iterations)
+    def _run_refinement_agents(self, context: AgentContext) -> None:
+        """Run agents that participate in refinement."""
+        refinement_agents = [
+            "design_explorer",
+            "code_generator",
+            "debug_agent",
+            "visual_evaluator",
+        ]
+        for agent_name in refinement_agents:
+            agent = self._agents[agent_name]
+            agent.execute(context)
+    def _create_refinement_context(
+        self,
+        original_context: AgentContext,
+        evaluation: VisualEvaluation,
+        iteration: int,
+    ) -> AgentContext:
+        """Create context for refinement iteration."""
+        feedback_parts = []
+        if evaluation.issues:
+            feedback_parts.append(f"Issues: {', '.join(evaluation.issues[:3])}")
+        if evaluation.priority_fixes:
+            feedback_parts.append(f"Fix: {', '.join(evaluation.priority_fixes[:2])}")
+        feedback = " | ".join(feedback_parts)
+        return AgentContext(
+            query=original_context.query,
+            data_paths=original_context.data_paths,
+            iteration=iteration,
+            feedback=feedback,
+        )
+    def _create_success_result(
+        self,
+        evaluation: Optional[VisualEvaluation],
+        iterations: int,
+    ) -> PipelineResult:
+        """Create a successful pipeline result."""
+        execution_result = self._memory.retrieve("execution_result")
+        output_file = None
+        if execution_result:
+            if isinstance(execution_result, dict):
+                output_file = execution_result.get("output_file")
+            else:
+                output_file = execution_result.output_file
+        return PipelineResult(
+            success=output_file is not None and Path(output_file).exists(),
+            output_file=output_file,
+            evaluation=evaluation,
+            iterations=iterations,
+        )
+    def _report_progress(self, status: str, progress: float) -> None:
+        """Report progress to callback if set."""
+        if self._progress_callback:
+            self._progress_callback(status, progress)
+        logger.info(f"[{progress:.0%}] {status}")
+    def get_memory_state(self) -> dict:
+        """Get the current state of shared memory for debugging."""
+        return self._memory.get_all()

main.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""
+Command-Line Interface for CoDA.
+Provides a CLI for running the CoDA visualization pipeline locally.
+"""
+import argparse
+import logging
+import sys
+from pathlib import Path
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+def main():
+    """Main entry point for the CLI."""
+    parser = argparse.ArgumentParser(
+        description="CoDA - Collaborative Data Visualization Agents",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python main.py --query "Show sales trends" --data sales.csv
+  python main.py -q "Bar chart of categories" -d data.xlsx
+  python main.py --query "Scatter plot" --data file1.csv file2.csv
+        """
+    )
+    parser.add_argument(
+        "-q", "--query",
+        type=str,
+        required=True,
+        help="Natural language visualization query"
+    )
+    parser.add_argument(
+        "-d", "--data",
+        type=str,
+        nargs="+",
+        required=True,
+        help="Path(s) to data file(s)"
+    )
+    parser.add_argument(
+        "-o", "--output",
+        type=str,
+        default="outputs",
+        help="Output directory for visualizations (default: outputs)"
+    )
+    parser.add_argument(
+        "--max-iterations",
+        type=int,
+        default=3,
+        help="Maximum refinement iterations (default: 3)"
+    )
+    parser.add_argument(
+        "--min-score",
+        type=float,
+        default=7.0,
+        help="Minimum quality score threshold (default: 7.0)"
+    )
+    parser.add_argument(
+        "-v", "--verbose",
+        action="store_true",
+        help="Enable verbose logging"
+    )
+    args = parser.parse_args()
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+    for path in args.data:
+        if not Path(path).exists():
+            logger.error(f"Data file not found: {path}")
+            sys.exit(1)
+    try:
+        from coda.config import Config, ExecutionConfig, QualityThresholds
+        from coda.orchestrator import CodaOrchestrator
+    except ImportError as e:
+        logger.error(f"Failed to import CoDA modules: {e}")
+        logger.error("Make sure you have installed all dependencies: pip install -r requirements.txt")
+        sys.exit(1)
+    try:
+        config = Config(
+            execution=ExecutionConfig(
+                max_refinement_iterations=args.max_iterations,
+                output_directory=args.output,
+            ),
+            quality=QualityThresholds(
+                minimum_overall_score=args.min_score,
+            ),
+        )
+    except ValueError as e:
+        logger.error(f"Configuration error: {e}")
+        sys.exit(1)
+    def progress_callback(status: str, progress: float):
+        bar_length = 30
+        filled = int(bar_length * progress)
+        bar = "█" * filled + "░" * (bar_length - filled)
+        print(f"\r[{bar}] {progress:.0%} - {status}", end="", flush=True)
+        if progress >= 1.0:
+            print()
+    print(f"\n{'='*60}")
+    print("CoDA - Collaborative Data Visualization Agents")
+    print(f"{'='*60}\n")
+    print(f"Query: {args.query}")
+    print(f"Data: {', '.join(args.data)}")
+    print(f"Output: {args.output}/")
+    print()
+    orchestrator = CodaOrchestrator(
+        config=config,
+        progress_callback=progress_callback,
+    )
+    result = orchestrator.run(
+        query=args.query,
+        data_paths=args.data,
+    )
+    print()
+    print(f"{'='*60}")
+    print("Results")
+    print(f"{'='*60}\n")
+    if result.success:
+        print(f"✅ Visualization generated successfully!")
+        print(f"📁 Output: {result.output_file}")
+        print(f"🔄 Iterations: {result.iterations}")
+        if result.scores:
+            print(f"\n📊 Quality Scores:")
+            for key, value in result.scores.items():
+                emoji = "🟢" if value >= 7 else "🟡" if value >= 5 else "🔴"
+                print(f"   {key.title()}: {emoji} {value:.1f}/10")
+        if result.evaluation and result.evaluation.strengths:
+            print(f"\n💪 Strengths:")
+            for s in result.evaluation.strengths[:3]:
+                print(f"   • {s}")
+    else:
+        print(f"❌ Visualization failed!")
+        if result.error:
+            print(f"   Error: {result.error}")
+        sys.exit(1)
+    print()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+# CoDA Dependencies
+# Core
+groq>=0.4.0
+pydantic>=2.0.0
+python-dotenv>=1.0.0
+# Data Processing
+pandas>=2.0.0
+openpyxl>=3.1.0
+pyarrow>=14.0.0
+# Visualization
+matplotlib>=3.7.0
+seaborn>=0.13.0
+# Web Interface
+gradio>=4.0.0
+# Development (optional)
+pytest>=7.0.0
+pytest-asyncio>=0.21.0

sample_data.csv ADDED Viewed

	@@ -0,0 +1,16 @@

+date,sales,category,region
+2024-01-01,1500,Electronics,North
+2024-01-02,2300,Electronics,South
+2024-01-03,1800,Clothing,North
+2024-01-04,3200,Electronics,East
+2024-01-05,2100,Clothing,West
+2024-01-06,1900,Food,North
+2024-01-07,2800,Electronics,South
+2024-01-08,1600,Clothing,East
+2024-01-09,3500,Food,West
+2024-01-10,2400,Electronics,North
+2024-01-11,1700,Clothing,South
+2024-01-12,2900,Food,East
+2024-01-13,2200,Electronics,West
+2024-01-14,1400,Clothing,North
+2024-01-15,3100,Food,South

tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Tests package

tests/test_agents.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+Unit tests for agent implementations.
+"""
+import pytest
+from unittest.mock import Mock, MagicMock
+import json
+from coda.core.memory import SharedMemory
+from coda.core.llm import LLMResponse
+from coda.core.base_agent import AgentContext
+from coda.agents.query_analyzer import QueryAnalyzerAgent, QueryAnalysis
+from coda.agents.data_processor import DataProcessorAgent, DataAnalysis
+from coda.agents.viz_mapping import VizMappingAgent, VisualMapping
+class MockLLM:
+    """Mock LLM for testing agents."""
+    def __init__(self, response_content: str):
+        self._response = response_content
+    def complete(self, prompt, system_prompt=None, **kwargs):
+        return LLMResponse(
+            content=self._response,
+            model="mock",
+            usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+            finish_reason="stop"
+        )
+    def complete_with_image(self, prompt, image_path, **kwargs):
+        return self.complete(prompt, **kwargs)
+class TestQueryAnalyzerAgent:
+    """Tests for the Query Analyzer agent."""
+    @pytest.fixture
+    def mock_response(self):
+        return json.dumps({
+            "visualization_types": ["line chart", "bar chart"],
+            "key_points": ["sales trends", "monthly data"],
+            "todo_list": ["Load data", "Create chart", "Add labels"],
+            "data_requirements": ["date", "sales"],
+            "constraints": ["use blue colors"],
+            "ambiguities": []
+        })
+    def test_execute(self, mock_response):
+        """Test query analysis execution."""
+        llm = MockLLM(mock_response)
+        memory = SharedMemory()
+        agent = QueryAnalyzerAgent(llm, memory)
+        context = AgentContext(query="Show sales trends over time")
+        result = agent.execute(context)
+        assert isinstance(result, QueryAnalysis)
+        assert "line chart" in result.visualization_types
+        assert "sales trends" in result.key_points
+        assert len(result.todo_list) == 3
+    def test_stores_in_memory(self, mock_response):
+        """Test that results are stored in memory."""
+        llm = MockLLM(mock_response)
+        memory = SharedMemory()
+        agent = QueryAnalyzerAgent(llm, memory)
+        context = AgentContext(query="Test query")
+        agent.execute(context)
+        stored = memory.retrieve("query_analysis")
+        assert stored is not None
+        assert "visualization_types" in stored
+class TestVizMappingAgent:
+    """Tests for the VizMapping agent."""
+    @pytest.fixture
+    def mock_response(self):
+        return json.dumps({
+            "chart_type": "line",
+            "chart_subtype": None,
+            "x_axis": {"column": "date", "label": "Date", "type": "temporal"},
+            "y_axis": {"column": "sales", "label": "Sales", "type": "numerical"},
+            "color_encoding": None,
+            "size_encoding": None,
+            "transformations": [],
+            "styling_hints": {"theme": "modern"},
+            "visualization_goals": ["Show trends"],
+            "rationale": "Line chart best for trends"
+        })
+    def test_execute(self, mock_response):
+        """Test visualization mapping execution."""
+        llm = MockLLM(mock_response)
+        memory = SharedMemory()
+        memory.store("query_analysis", {
+            "visualization_types": ["line chart"],
+            "key_points": ["trends"],
+            "data_requirements": ["date", "sales"]
+        }, "test")
+        agent = VizMappingAgent(llm, memory)
+        context = AgentContext(query="Show sales trends")
+        result = agent.execute(context)
+        assert isinstance(result, VisualMapping)
+        assert result.chart_type == "line"
+        assert result.x_axis["column"] == "date"
+class TestAgentContext:
+    """Tests for the AgentContext model."""
+    def test_basic_context(self):
+        """Test creating a basic context."""
+        context = AgentContext(
+            query="Test query",
+            data_paths=["file1.csv", "file2.csv"]
+        )
+        assert context.query == "Test query"
+        assert len(context.data_paths) == 2
+        assert context.iteration == 0
+        assert context.feedback is None
+    def test_context_with_feedback(self):
+        """Test context with feedback for refinement."""
+        context = AgentContext(
+            query="Test",
+            iteration=2,
+            feedback="Improve colors"
+        )
+        assert context.iteration == 2
+        assert context.feedback == "Improve colors"
+class TestBaseAgentJsonExtraction:
+    """Tests for JSON extraction from LLM responses."""
+    def test_extract_json_plain(self):
+        """Test extracting plain JSON."""
+        llm = MockLLM("{}")
+        memory = SharedMemory()
+        agent = QueryAnalyzerAgent(llm, memory)
+        result = agent._extract_json('{"key": "value"}')
+        assert result == {"key": "value"}
+    def test_extract_json_markdown(self):
+        """Test extracting JSON from markdown code block."""
+        llm = MockLLM("{}")
+        memory = SharedMemory()
+        agent = QueryAnalyzerAgent(llm, memory)
+        text = """Here is the response:
+```json
+{"key": "value"}
+```
+"""
+        result = agent._extract_json(text)
+        assert result == {"key": "value"}
+    def test_extract_json_invalid(self):
+        """Test handling invalid JSON."""
+        llm = MockLLM("{}")
+        memory = SharedMemory()
+        agent = QueryAnalyzerAgent(llm, memory)
+        with pytest.raises(ValueError):
+            agent._extract_json("not valid json")

tests/test_llm.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""
+Unit tests for the LLM abstraction layer.
+"""
+import pytest
+from unittest.mock import Mock, patch, MagicMock
+from pathlib import Path
+from coda.core.llm import LLMProvider, GroqLLM, LLMResponse
+class TestLLMResponse:
+    """Tests for the LLMResponse model."""
+    def test_response_creation(self):
+        """Test creating a valid response."""
+        response = LLMResponse(
+            content="Test content",
+            model="test-model",
+            usage={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
+            finish_reason="stop"
+        )
+        assert response.content == "Test content"
+        assert response.model == "test-model"
+        assert response.usage["total_tokens"] == 30
+        assert response.finish_reason == "stop"
+class TestGroqLLM:
+    """Tests for the GroqLLM implementation."""
+    @pytest.fixture
+    def mock_groq_client(self):
+        """Create a mock Groq client."""
+        with patch("coda.core.llm.Groq") as mock:
+            client_instance = Mock()
+            mock.return_value = client_instance
+            yield client_instance
+    def test_initialization(self, mock_groq_client):
+        """Test LLM initialization with custom parameters."""
+        llm = GroqLLM(
+            api_key="test-key",
+            default_model="custom-model",
+            temperature=0.5,
+            max_tokens=2048,
+        )
+        assert llm._default_model == "custom-model"
+        assert llm._temperature == 0.5
+        assert llm._max_tokens == 2048
+    def test_complete_success(self, mock_groq_client):
+        """Test successful completion."""
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "Generated text"
+        mock_response.choices[0].finish_reason = "stop"
+        mock_response.model = "llama-3.3-70b-versatile"
+        mock_response.usage = MagicMock()
+        mock_response.usage.prompt_tokens = 10
+        mock_response.usage.completion_tokens = 20
+        mock_response.usage.total_tokens = 30
+        mock_groq_client.chat.completions.create.return_value = mock_response
+        llm = GroqLLM(api_key="test-key")
+        response = llm.complete(
+            prompt="Test prompt",
+            system_prompt="System prompt"
+        )
+        assert response.content == "Generated text"
+        assert response.finish_reason == "stop"
+        mock_groq_client.chat.completions.create.assert_called_once()
+    def test_complete_with_retry(self, mock_groq_client):
+        """Test retry logic on failure."""
+        mock_groq_client.chat.completions.create.side_effect = [
+            Exception("Rate limited"),
+            MagicMock(
+                choices=[MagicMock(message=MagicMock(content="Success"), finish_reason="stop")],
+                model="test",
+                usage=MagicMock(prompt_tokens=0, completion_tokens=0, total_tokens=0)
+            )
+        ]
+        llm = GroqLLM(api_key="test-key", retry_delay=0.01)
+        response = llm.complete(prompt="Test")
+        assert response.content == "Success"
+        assert mock_groq_client.chat.completions.create.call_count == 2
+    def test_build_messages(self, mock_groq_client):
+        """Test message building with system prompt."""
+        llm = GroqLLM(api_key="test-key")
+        messages = llm._build_messages(
+            prompt="User message",
+            system_prompt="System message"
+        )
+        assert len(messages) == 2
+        assert messages[0]["role"] == "system"
+        assert messages[0]["content"] == "System message"
+        assert messages[1]["role"] == "user"
+        assert messages[1]["content"] == "User message"
+    def test_build_messages_no_system(self, mock_groq_client):
+        """Test message building without system prompt."""
+        llm = GroqLLM(api_key="test-key")
+        messages = llm._build_messages(prompt="User message")
+        assert len(messages) == 1
+        assert messages[0]["role"] == "user"
+class TestLLMProviderInterface:
+    """Tests for the abstract interface."""
+    def test_interface_methods(self):
+        """Verify LLMProvider defines required methods."""
+        assert hasattr(LLMProvider, "complete")
+        assert hasattr(LLMProvider, "complete_with_image")

tests/test_memory.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""
+Unit tests for the SharedMemory class.
+"""
+import pytest
+from datetime import datetime
+import threading
+import time
+from coda.core.memory import SharedMemory, MemoryEntry
+class TestSharedMemory:
+    """Tests for the SharedMemory class."""
+    @pytest.fixture
+    def memory(self):
+        """Create a fresh SharedMemory instance."""
+        return SharedMemory()
+    def test_store_and_retrieve(self, memory):
+        """Test basic store and retrieve operations."""
+        memory.store(
+            key="test_key",
+            value={"data": "value"},
+            agent_name="TestAgent"
+        )
+        result = memory.retrieve("test_key")
+        assert result == {"data": "value"}
+    def test_retrieve_nonexistent(self, memory):
+        """Test retrieving a non-existent key."""
+        result = memory.retrieve("nonexistent")
+        assert result is None
+    def test_retrieve_entry(self, memory):
+        """Test retrieving the full entry with metadata."""
+        memory.store(
+            key="test_key",
+            value="test_value",
+            agent_name="TestAgent",
+            metadata={"extra": "info"}
+        )
+        entry = memory.retrieve_entry("test_key")
+        assert entry is not None
+        assert entry.value == "test_value"
+        assert entry.agent_name == "TestAgent"
+        assert entry.metadata == {"extra": "info"}
+        assert isinstance(entry.timestamp, datetime)
+    def test_get_context(self, memory):
+        """Test retrieving multiple keys as context."""
+        memory.store("key1", "value1", "Agent1")
+        memory.store("key2", "value2", "Agent2")
+        memory.store("key3", "value3", "Agent3")
+        context = memory.get_context(["key1", "key3", "nonexistent"])
+        assert context == {"key1": "value1", "key3": "value3"}
+    def test_get_all(self, memory):
+        """Test retrieving all stored values."""
+        memory.store("key1", "value1", "Agent")
+        memory.store("key2", "value2", "Agent")
+        all_data = memory.get_all()
+        assert all_data == {"key1": "value1", "key2": "value2"}
+    def test_overwrite_value(self, memory):
+        """Test overwriting an existing value."""
+        memory.store("key", "original", "Agent")
+        memory.store("key", "updated", "Agent")
+        assert memory.retrieve("key") == "updated"
+    def test_history_tracking(self, memory):
+        """Test that history is tracked for all operations."""
+        memory.store("key1", "v1", "Agent1")
+        memory.store("key2", "v2", "Agent2")
+        memory.store("key1", "v1_updated", "Agent1")
+        history = memory.get_history()
+        assert len(history) == 3
+        assert history[0].key == "key1"
+        assert history[1].key == "key2"
+        assert history[2].value == "v1_updated"
+    def test_history_filter_by_agent(self, memory):
+        """Test filtering history by agent name."""
+        memory.store("k1", "v1", "Agent1")
+        memory.store("k2", "v2", "Agent2")
+        memory.store("k3", "v3", "Agent1")
+        agent1_history = memory.get_history(agent_name="Agent1")
+        assert len(agent1_history) == 2
+        assert all(e.agent_name == "Agent1" for e in agent1_history)
+    def test_has_key(self, memory):
+        """Test key existence check."""
+        memory.store("exists", "value", "Agent")
+        assert memory.has_key("exists") is True
+        assert memory.has_key("not_exists") is False
+    def test_clear(self, memory):
+        """Test clearing all data."""
+        memory.store("k1", "v1", "Agent")
+        memory.store("k2", "v2", "Agent")
+        memory.clear()
+        assert memory.retrieve("k1") is None
+        assert memory.retrieve("k2") is None
+        assert len(memory.get_history()) == 0
+    def test_keys(self, memory):
+        """Test getting all keys."""
+        memory.store("a", 1, "Agent")
+        memory.store("b", 2, "Agent")
+        memory.store("c", 3, "Agent")
+        keys = memory.keys()
+        assert set(keys) == {"a", "b", "c"}
+    def test_thread_safety(self, memory):
+        """Test that operations are thread-safe."""
+        results = []
+        errors = []
+        def writer(n):
+            try:
+                for i in range(100):
+                    memory.store(f"key_{n}_{i}", i, f"Agent{n}")
+            except Exception as e:
+                errors.append(e)
+        def reader():
+            try:
+                for _ in range(100):
+                    memory.get_all()
+                    memory.keys()
+            except Exception as e:
+                errors.append(e)
+        threads = [
+            threading.Thread(target=writer, args=(i,))
+            for i in range(3)
+        ]
+        threads.append(threading.Thread(target=reader))
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+        assert len(errors) == 0