Spaces:

vanishingradient
/

SCoDA

Sleeping

File size: 5,802 Bytes

9281fab

"""
Code Generator Agent for CoDA.

Synthesizes executable Python visualization code by integrating
specifications from upstream agents.
"""

from typing import Optional

from pydantic import BaseModel, Field

from coda.core.base_agent import AgentContext, BaseAgent
from coda.core.llm import LLMProvider
from coda.core.memory import SharedMemory


class GeneratedCode(BaseModel):
    """Structured output from the Code Generator."""
    
    code: str = Field(default="", description="The generated Python code")
    dependencies: list[str] = Field(
        default_factory=lambda: ["matplotlib", "pandas"],
        description="Required Python packages"
    )
    output_filename: str = Field(
        default="output.png",
        description="Name of the output visualization file"
    )
    documentation: str = Field(
        default="Generated visualization code",
        description="Brief documentation of the code"
    )
    quality_score: float = Field(
        default=5.0,
        description="Self-assessed code quality (0-10)"
    )
    potential_issues: list[str] = Field(
        default_factory=list,
        description="Potential issues or edge cases"
    )


class CodeGeneratorAgent(BaseAgent[GeneratedCode]):
    """
    Generates executable Python visualization code.
    
    Integrates all upstream specifications (data processing, visual mapping,
    design specs) into working code.
    """
    
    MEMORY_KEY = "generated_code"
    
    def __init__(
        self,
        llm: LLMProvider,
        memory: SharedMemory,
        name: Optional[str] = None,
    ) -> None:
        super().__init__(llm, memory, name or "CodeGenerator")
    
    def _get_system_prompt(self) -> str:
        return """You are an expert Python Developer specializing in data visualization.

Your expertise is in writing clean, efficient, and well-documented Python code for data visualization using matplotlib, seaborn, and pandas.

Your responsibilities:
1. Generate complete, executable Python code
2. Integrate all specifications from the design and mapping agents
3. Handle data loading and transformation correctly
4. Apply proper styling and formatting
5. Include error handling for robustness
6. Write clear documentation

Code requirements:
- Use matplotlib and seaborn as primary libraries
- Include all necessary imports at the top
- Load data from the specified file paths
- Apply all transformations before plotting
- Set figure size, colors, and labels as specified
- Save the output to a file (PNG format)
- Use descriptive variable names
- Add comments for complex operations

IMPORTANT styling rules:
- For seaborn barplots, ALWAYS use hue parameter: sns.barplot(..., hue='category_column', legend=False)
- Use ONLY these reliable palettes: 'viridis', 'plasma', 'inferno', 'magma', 'cividis', 'Deep', 'Muted', 'Pastel'
- DO NOT use complex or custom named palettes like 'tableau10' or 'husl' unless you are sure.
- When in doubt, omit the palette argument or use 'viridis'.
- Always use plt.tight_layout() before saving.

Always respond with a valid JSON object containing the code and metadata."""
    
    def _build_prompt(self, context: AgentContext) -> str:
        data_analysis = self._get_from_memory("data_analysis") or {}
        visual_mapping = self._get_from_memory("visual_mapping") or {}
        design_spec = self._get_from_memory("design_spec") or {}
        search_results = self._get_from_memory("search_results") or {}
        
        file_info = data_analysis.get("files", [])
        data_paths = [f.get("file_path", "") for f in file_info] if file_info else context.data_paths
        
        code_examples = search_results.get("examples", [])
        examples_section = ""
        if code_examples:
            examples_section = "\nReference Code Examples:\n"
            for ex in code_examples[:2]:
                if isinstance(ex, dict):
                    examples_section += f"```python\n# {ex.get('title', 'Example')}\n{ex.get('code', '')}\n```\n"
        
        feedback_section = ""
        if context.feedback:
            feedback_section = f"""
Code Feedback (iteration {context.iteration}):
{context.feedback}

Fix the issues mentioned in the feedback.
"""
        
        return f"""Generate Python visualization code based on the following specifications.

User Query: {context.query}

Data Files: {data_paths}

Visual Mapping:
- Chart Type: {visual_mapping.get('chart_type', 'line')}
- X-Axis: {visual_mapping.get('x_axis') or 'Not specified (infer from data or chart type)'}
- Y-Axis: {visual_mapping.get('y_axis') or 'Not specified (infer from data or chart type)'}
- Color Encoding: {visual_mapping.get('color_encoding')}
- Transformations: {visual_mapping.get('transformations', [])}

Design Specification:
- Colors: {design_spec.get('color_scheme', {})}
- Layout: {design_spec.get('layout', {})}
- Typography: {design_spec.get('typography', {})}
- Annotations: {design_spec.get('annotations', [])}
- Guidelines: {design_spec.get('implementation_guidelines', [])}
{examples_section}{feedback_section}

Generate a complete Python script that:
1. Imports all necessary libraries
2. Loads the data file(s)
3. Applies required transformations
4. Creates the visualization with specified styling
5. Saves to 'output.png'

Respond with a JSON object:
- code: Complete Python code as a string
- dependencies: List of required packages
- output_filename: Output file name
- documentation: Brief description
- quality_score: Self-assessment 0-10
- potential_issues: List of potential issues

JSON Response:"""
    
    def _parse_response(self, response: str) -> GeneratedCode:
        data = self._extract_json(response)
        return GeneratedCode(**data)
    
    def _get_output_key(self) -> str:
        return self.MEMORY_KEY