File size: 2,524 Bytes

553fbf7

"""
MINDI 1.5 Vision-Coder — UI Critic Agent

Uses the vision encoder to evaluate screenshots of generated UI
and provide structured feedback for iterative improvement.
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Optional

import torch


@dataclass
class CritiqueResult:
    """Structured critique of a UI screenshot."""
    score: float               # 0.0 to 1.0 overall quality
    layout_score: float        # Layout and spacing quality
    typography_score: float    # Text hierarchy and readability
    color_score: float         # Color contrast and consistency
    responsiveness_score: float  # Mobile-readiness estimation
    feedback: str              # Natural language critique
    suggestions: list[str]     # Actionable improvement items


class UICritic:
    """Vision-powered UI/UX critic for evaluating generated web pages."""

    def __init__(
        self,
        vision_encoder: Optional[object] = None,
        device: Optional[str] = None,
    ) -> None:
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.vision_encoder = vision_encoder  # VisionEncoder instance

    async def critique_screenshot(
        self,
        screenshot_path: Path,
        generated_code: str,
    ) -> CritiqueResult:
        """
        Analyze a screenshot of the generated UI and produce a critique.

        The critique is used by the orchestrator to decide whether to
        iterate on the code or accept it as final output.
        """
        if not screenshot_path.exists():
            return CritiqueResult(
                score=0.0,
                layout_score=0.0,
                typography_score=0.0,
                color_score=0.0,
                responsiveness_score=0.0,
                feedback="Screenshot not found — cannot critique.",
                suggestions=["Ensure sandbox produces a screenshot."],
            )

        # Encode the screenshot using vision encoder
        # (Full implementation will use the VisionEncoder + LLM to generate critique)
        # For now, return a placeholder that signals "needs implementation"
        return CritiqueResult(
            score=0.0,
            layout_score=0.0,
            typography_score=0.0,
            color_score=0.0,
            responsiveness_score=0.0,
            feedback="Vision critique pipeline not yet connected.",
            suggestions=["Wire VisionEncoder to critique pipeline."],
        )