File size: 2,524 Bytes
553fbf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
MINDI 1.5 Vision-Coder — UI Critic Agent

Uses the vision encoder to evaluate screenshots of generated UI
and provide structured feedback for iterative improvement.
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Optional

import torch


@dataclass
class CritiqueResult:
    """Structured critique of a UI screenshot."""
    score: float               # 0.0 to 1.0 overall quality
    layout_score: float        # Layout and spacing quality
    typography_score: float    # Text hierarchy and readability
    color_score: float         # Color contrast and consistency
    responsiveness_score: float  # Mobile-readiness estimation
    feedback: str              # Natural language critique
    suggestions: list[str]     # Actionable improvement items


class UICritic:
    """Vision-powered UI/UX critic for evaluating generated web pages."""

    def __init__(
        self,
        vision_encoder: Optional[object] = None,
        device: Optional[str] = None,
    ) -> None:
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.vision_encoder = vision_encoder  # VisionEncoder instance

    async def critique_screenshot(
        self,
        screenshot_path: Path,
        generated_code: str,
    ) -> CritiqueResult:
        """
        Analyze a screenshot of the generated UI and produce a critique.

        The critique is used by the orchestrator to decide whether to
        iterate on the code or accept it as final output.
        """
        if not screenshot_path.exists():
            return CritiqueResult(
                score=0.0,
                layout_score=0.0,
                typography_score=0.0,
                color_score=0.0,
                responsiveness_score=0.0,
                feedback="Screenshot not found — cannot critique.",
                suggestions=["Ensure sandbox produces a screenshot."],
            )

        # Encode the screenshot using vision encoder
        # (Full implementation will use the VisionEncoder + LLM to generate critique)
        # For now, return a placeholder that signals "needs implementation"
        return CritiqueResult(
            score=0.0,
            layout_score=0.0,
            typography_score=0.0,
            color_score=0.0,
            responsiveness_score=0.0,
            feedback="Vision critique pipeline not yet connected.",
            suggestions=["Wire VisionEncoder to critique pipeline."],
        )