Spaces:

riazmo
/

Design-System-Extractor-2

Running

App Files Files Community

riazmo commited on Feb 2

Commit

f7fb352

verified ·

1 Parent(s): 8d4cb01

Upload 8 files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
tests/__pycache__/__init__.cpython-314.pyc +0 -0
tests/__pycache__/test_agent_evals.cpython-314-pytest-9.0.2.pyc +0 -0
tests/__pycache__/test_stage1_extraction.cpython-314-pytest-9.0.2.pyc +3 -0
tests/__pycache__/test_stage2_pipeline.cpython-314-pytest-9.0.2.pyc +0 -0
tests/test_agent_evals.py +650 -0
tests/test_stage1_extraction.py +716 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tests/__pycache__/test_stage1_extraction.cpython-314-pytest-9.0.2.pyc filter=lfs diff=lfs merge=lfs -text

tests/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file (163 Bytes). View file

tests/__pycache__/test_agent_evals.cpython-314-pytest-9.0.2.pyc ADDED Viewed

Binary file (64.9 kB). View file

tests/__pycache__/test_stage1_extraction.cpython-314-pytest-9.0.2.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c920bbdc9cb32c20edb958653c1fd8e065b1bcf34f52613f70c25d538c5bc8c
+size 114191

tests/__pycache__/test_stage2_pipeline.cpython-314-pytest-9.0.2.pyc ADDED Viewed

Binary file (57.7 kB). View file

tests/test_agent_evals.py ADDED Viewed

	@@ -0,0 +1,650 @@

+#!/usr/bin/env python3
+"""
+LLM Agent Evaluation Tests
+============================
+Evaluates the 4 named AI agents using mock HF client responses.
+Tests schema compliance, output correctness, and consistency.
+Uses DeepEval when available, falls back to manual assertions.
+Run: pytest tests/test_agent_evals.py -v
+"""
+import asyncio
+import json
+import os
+import sys
+from dataclasses import asdict
+from typing import Optional
+import pytest
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from agents.llm_agents import (
+    BrandIdentifierAgent,
+    BenchmarkAdvisorAgent,
+    BestPracticesValidatorAgent,
+    HeadSynthesizerAgent,
+    BrandIdentification,
+    BenchmarkAdvice,
+    BestPracticesResult,
+    HeadSynthesis,
+)
+# Try importing DeepEval
+try:
+    from deepeval import assert_test
+    from deepeval.test_case import LLMTestCase
+    from deepeval.metrics import JsonSchemaMetric
+    HAS_DEEPEVAL = True
+except ImportError:
+    HAS_DEEPEVAL = False
+# =============================================================================
+# MOCK HF CLIENT
+# =============================================================================
+# Canned JSON responses that each agent would return
+AURORA_RESPONSE = json.dumps({
+    "brand_primary": {
+        "color": "#06b2c4",
+        "confidence": "high",
+        "reasoning": "Used in 33 buttons and 12 CTAs — dominant interactive color",
+        "usage_count": 45,
+    },
+    "brand_secondary": {
+        "color": "#c1df1f",
+        "confidence": "medium",
+        "reasoning": "Used in highlights and badges",
+        "usage_count": 23,
+    },
+    "brand_accent": None,
+    "palette_strategy": "complementary",
+    "cohesion_score": 6,
+    "cohesion_notes": "Primary and secondary are near-complementary on the color wheel. Reasonable coherence but accent is missing.",
+    "semantic_names": {
+        "#06b2c4": "brand.primary",
+        "#c1df1f": "brand.secondary",
+        "#1a1a1a": "text.primary",
+        "#666666": "text.secondary",
+    },
+    "self_evaluation": {
+        "confidence": 8,
+        "reasoning": "Clear dominant primary from button usage. Secondary less certain.",
+        "data_quality": "good",
+        "flags": [],
+    },
+})
+ATLAS_RESPONSE = json.dumps({
+    "recommended_benchmark": "shopify_polaris",
+    "recommended_benchmark_name": "Shopify Polaris",
+    "reasoning": "87% structural match. Polaris uses similar type scale and spacing grid approach.",
+    "alignment_changes": [
+        {"change": "Adopt 1.25 Major Third type scale", "from": "1.18 random", "to": "1.25", "effort": "low"},
+        {"change": "Standardize to 4px spacing grid", "from": "mixed", "to": "4px", "effort": "medium"},
+    ],
+    "pros_of_alignment": [
+        "Industry-standard component patterns",
+        "Strong accessibility built-in",
+    ],
+    "cons_of_alignment": [
+        "May feel generic without customization",
+    ],
+    "alternative_benchmarks": [
+        {"name": "Material Design 3", "reason": "77% match, stronger theming support"},
+        {"name": "Atlassian Design System", "reason": "76% match, similar enterprise focus"},
+    ],
+    "self_evaluation": {
+        "confidence": 7,
+        "reasoning": "Good structural match but benchmark comparison limited to 8 systems",
+        "data_quality": "good",
+        "flags": [],
+    },
+})
+SENTINEL_RESPONSE = json.dumps({
+    "overall_score": 62,
+    "checks": {
+        "color_contrast": {"status": "fail", "note": "67 AA failures including brand primary"},
+        "type_scale": {"status": "warn", "note": "Near-consistent but not standard ratio"},
+        "spacing_grid": {"status": "pass", "note": "4px grid detected with 85% alignment"},
+        "color_count": {"status": "warn", "note": "143 unique colors — recommend consolidation to ~20"},
+    },
+    "priority_fixes": [
+        {"rank": 1, "issue": "Brand primary fails AA contrast", "impact": "high", "effort": "low", "action": "Darken #06b2c4 to #048391"},
+        {"rank": 2, "issue": "143 colors too many", "impact": "medium", "effort": "medium", "action": "Consolidate to semantic palette"},
+        {"rank": 3, "issue": "Type scale inconsistent", "impact": "medium", "effort": "low", "action": "Adopt 1.25 Major Third"},
+    ],
+    "passing_practices": ["spacing_grid", "font_family_consistency"],
+    "failing_practices": ["color_contrast", "color_count"],
+    "self_evaluation": {
+        "confidence": 8,
+        "reasoning": "Rule engine data is clear. Priority ordering based on impact analysis.",
+        "data_quality": "good",
+        "flags": [],
+    },
+})
+NEXUS_RESPONSE = json.dumps({
+    "executive_summary": "Design system shows strong structural foundation (4px grid, consistent typography) but needs critical accessibility fixes. Brand primary #06b2c4 fails AA — recommend darkened variant. 87% aligned to Polaris.",
+    "scores": {
+        "overall": 62,
+        "accessibility": 45,
+        "consistency": 72,
+        "organization": 68,
+    },
+    "benchmark_fit": {
+        "closest": "Shopify Polaris",
+        "similarity": 87,
+        "recommendation": "Align type scale and consolidate colors for 95%+ match",
+    },
+    "brand_analysis": {
+        "primary": "#06b2c4",
+        "secondary": "#c1df1f",
+        "cohesion": 6,
+    },
+    "top_3_actions": [
+        {"action": "Fix brand primary contrast", "impact": "high", "effort": "low", "details": "Darken to #048391 for AA 4.5:1"},
+        {"action": "Consolidate color palette", "impact": "medium", "effort": "medium", "details": "Reduce 143 → ~20 semantic colors"},
+        {"action": "Standardize type scale", "impact": "medium", "effort": "low", "details": "Adopt 1.25 Major Third ratio"},
+    ],
+    "color_recommendations": [
+        {"role": "brand-primary", "current": "#06b2c4", "suggested": "#048391", "reason": "AA compliance", "accept": True},
+    ],
+    "type_scale_recommendation": {
+        "current_ratio": 1.18,
+        "recommended_ratio": 1.25,
+        "name": "Major Third",
+    },
+    "spacing_recommendation": {
+        "current_base": 4,
+        "recommended_base": 8,
+        "reason": "Simpler system with fewer decisions",
+    },
+    "self_evaluation": {
+        "confidence": 8,
+        "reasoning": "Strong data from rule engine and all 3 agents. Minor disagreement on spacing resolved by averaging.",
+        "data_quality": "good",
+        "flags": [],
+    },
+})
+class MockHFClient:
+    """Mock HF Inference client that returns canned responses per agent."""
+    AGENT_RESPONSES = {
+        "brand_identifier": AURORA_RESPONSE,
+        "benchmark_advisor": ATLAS_RESPONSE,
+        "best_practices": SENTINEL_RESPONSE,
+        "best_practices_validator": SENTINEL_RESPONSE,
+        "head_synthesizer": NEXUS_RESPONSE,
+    }
+    async def complete_async(
+        self,
+        agent_name: str,
+        system_prompt: str,
+        user_message: str,
+        max_tokens: int = 2000,
+        json_mode: bool = True,
+    ) -> str:
+        """Return canned response for the agent."""
+        return self.AGENT_RESPONSES.get(agent_name, "{}")
+# =============================================================================
+# TEST DATA
+# =============================================================================
+MOCK_COLOR_TOKENS = {
+    "brand-primary": {"value": "#06b2c4", "frequency": 45, "context": "buttons, links"},
+    "brand-secondary": {"value": "#c1df1f", "frequency": 23, "context": "highlights"},
+    "text-primary": {"value": "#1a1a1a", "frequency": 120, "context": "headings, body"},
+    "text-secondary": {"value": "#666666", "frequency": 80, "context": "captions"},
+    "background": {"value": "#ffffff", "frequency": 200, "context": "page background"},
+}
+MOCK_SEMANTIC_ANALYSIS = {
+    "brand": [{"hex": "#06b2c4", "name": "brand-primary"}],
+    "text": [{"hex": "#1a1a1a", "name": "text-primary"}],
+}
+class MockBenchmarkSystem:
+    """Mock benchmark system object (what c.benchmark returns)."""
+    def __init__(self, name, icon, scale_ratio, base_size, spacing_base, best_for):
+        self.name = name
+        self.icon = icon
+        self.typography = {"scale_ratio": scale_ratio, "base_size": base_size}
+        self.spacing = {"base": spacing_base}
+        self.best_for = best_for
+class MockBenchmarkComparison:
+    """Mock benchmark comparison object (what ATLAS._format_comparisons expects)."""
+    def __init__(self, benchmark, similarity_score, overall_match_pct, type_ratio_diff, base_size_diff, spacing_grid_diff):
+        self.benchmark = benchmark
+        self.similarity_score = similarity_score
+        self.overall_match_pct = overall_match_pct
+        self.type_ratio_diff = type_ratio_diff
+        self.base_size_diff = base_size_diff
+        self.spacing_grid_diff = spacing_grid_diff
+MOCK_BENCHMARK_COMPARISONS = [
+    MockBenchmarkComparison(
+        benchmark=MockBenchmarkSystem("Shopify Polaris", "🟢", 1.25, 16, 4, ["e-commerce", "admin"]),
+        similarity_score=0.13, overall_match_pct=87, type_ratio_diff=0.07, base_size_diff=0, spacing_grid_diff=0,
+    ),
+    MockBenchmarkComparison(
+        benchmark=MockBenchmarkSystem("Material Design 3", "🔵", 1.25, 16, 8, ["mobile", "web"]),
+        similarity_score=0.23, overall_match_pct=77, type_ratio_diff=0.07, base_size_diff=0, spacing_grid_diff=4,
+    ),
+    MockBenchmarkComparison(
+        benchmark=MockBenchmarkSystem("Atlassian", "🔷", 1.2, 14, 8, ["enterprise", "tools"]),
+        similarity_score=0.24, overall_match_pct=76, type_ratio_diff=0.02, base_size_diff=2, spacing_grid_diff=4,
+    ),
+]
+# Mock RuleEngineResults for SENTINEL and NEXUS
+class MockTypography:
+    detected_ratio = 1.18
+    base_size = 16.0
+    sizes_px = [12, 14, 16, 18, 22, 28, 36, 48]
+    is_consistent = False
+    variance = 0.22
+    scale_name = "Minor Third"
+    closest_standard_ratio = 1.2
+    recommendation = 1.25
+    recommendation_name = "Major Third"
+    def to_dict(self):
+        return {"detected_ratio": self.detected_ratio, "base_size": self.base_size}
+class MockSpacing:
+    detected_base = 4
+    is_aligned = True
+    alignment_percentage = 85.0
+    misaligned_values = [5, 10]
+    recommendation = 8
+    recommendation_reason = "Simpler grid"
+    current_values = [4, 8, 12, 16, 24, 32]
+    suggested_scale = [0, 4, 8, 12, 16, 24, 32, 48]
+    def to_dict(self):
+        return {"detected_base": self.detected_base, "alignment_percentage": self.alignment_percentage}
+class MockColorStats:
+    total_count = 160
+    unique_count = 143
+    duplicate_count = 17
+    gray_count = 22
+    saturated_count = 45
+    near_duplicates = [("#06b2c4", "#07b3c5", 0.01)]
+    hue_distribution = {"cyan": 5, "gray": 22, "green": 3}
+    def to_dict(self):
+        return {"total": self.total_count, "unique": self.unique_count}
+class MockAccessibility:
+    def __init__(self):
+        self.hex_color = "#06b2c4"
+        self.name = "brand-primary"
+        self.passes_aa_normal = False
+        self.contrast_on_white = 2.57
+        self.contrast_on_black = 8.18
+        self.suggested_fix = "#048391"
+        self.suggested_fix_contrast = 4.5
+    def to_dict(self):
+        return {"color": self.hex_color, "aa_normal": self.passes_aa_normal}
+class MockRuleEngineResults:
+    typography = MockTypography()
+    spacing = MockSpacing()
+    color_stats = MockColorStats()
+    accessibility = [MockAccessibility()]
+    aa_failures = 67
+    consistency_score = 52
+    def to_dict(self):
+        return {
+            "typography": self.typography.to_dict(),
+            "spacing": self.spacing.to_dict(),
+            "color_stats": self.color_stats.to_dict(),
+            "summary": {"aa_failures": self.aa_failures, "consistency_score": self.consistency_score},
+        }
+# =============================================================================
+# SCHEMA COMPLIANCE TESTS
+# =============================================================================
+class TestAuroraSchemaCompliance:
+    """AURORA (Brand Identifier) output schema validation."""
+    @pytest.fixture
+    def agent(self):
+        return BrandIdentifierAgent(MockHFClient())
+    @pytest.mark.asyncio
+    async def test_schema_compliance(self, agent):
+        """AURORA output has all required BrandIdentification fields."""
+        result = await agent.analyze(
+            color_tokens=MOCK_COLOR_TOKENS,
+            semantic_analysis=MOCK_SEMANTIC_ANALYSIS,
+        )
+        assert isinstance(result, BrandIdentification)
+        # Required fields present
+        assert hasattr(result, "brand_primary")
+        assert hasattr(result, "palette_strategy")
+        assert hasattr(result, "cohesion_score")
+        assert hasattr(result, "self_evaluation")
+    @pytest.mark.asyncio
+    async def test_brand_primary_detected(self, agent):
+        """AURORA correctly identifies brand primary from high-usage color."""
+        result = await agent.analyze(
+            color_tokens=MOCK_COLOR_TOKENS,
+            semantic_analysis=MOCK_SEMANTIC_ANALYSIS,
+        )
+        bp = result.brand_primary
+        assert isinstance(bp, dict)
+        assert bp.get("color") == "#06b2c4"
+        assert bp.get("confidence") in ("high", "medium", "low")
+    @pytest.mark.asyncio
+    async def test_palette_strategy_valid(self, agent):
+        """Palette strategy is a recognized value."""
+        result = await agent.analyze(
+            color_tokens=MOCK_COLOR_TOKENS,
+            semantic_analysis=MOCK_SEMANTIC_ANALYSIS,
+        )
+        valid_strategies = ["complementary", "analogous", "triadic", "monochromatic", "split-complementary", "random", ""]
+        assert result.palette_strategy in valid_strategies
+    @pytest.mark.asyncio
+    async def test_to_dict_serializable(self, agent):
+        """Output is JSON-serializable."""
+        result = await agent.analyze(
+            color_tokens=MOCK_COLOR_TOKENS,
+            semantic_analysis=MOCK_SEMANTIC_ANALYSIS,
+        )
+        d = result.to_dict()
+        json_str = json.dumps(d)
+        assert len(json_str) > 10
+class TestAtlasSchemaCompliance:
+    """ATLAS (Benchmark Advisor) output schema validation."""
+    @pytest.fixture
+    def agent(self):
+        return BenchmarkAdvisorAgent(MockHFClient())
+    @pytest.mark.asyncio
+    async def test_schema_compliance(self, agent):
+        """ATLAS output has all required BenchmarkAdvice fields."""
+        result = await agent.analyze(
+            user_ratio=1.18,
+            user_base=16,
+            user_spacing=4,
+            benchmark_comparisons=MOCK_BENCHMARK_COMPARISONS,
+        )
+        assert isinstance(result, BenchmarkAdvice)
+        assert hasattr(result, "recommended_benchmark")
+        assert hasattr(result, "reasoning")
+        assert hasattr(result, "alignment_changes")
+        assert hasattr(result, "self_evaluation")
+    @pytest.mark.asyncio
+    async def test_benchmark_recommended(self, agent):
+        """ATLAS recommends a valid benchmark."""
+        result = await agent.analyze(
+            user_ratio=1.18,
+            user_base=16,
+            user_spacing=4,
+            benchmark_comparisons=MOCK_BENCHMARK_COMPARISONS,
+        )
+        assert result.recommended_benchmark != ""
+        assert result.reasoning != ""
+    @pytest.mark.asyncio
+    async def test_alignment_changes_structured(self, agent):
+        """Alignment changes are structured dicts."""
+        result = await agent.analyze(
+            user_ratio=1.18,
+            user_base=16,
+            user_spacing=4,
+            benchmark_comparisons=MOCK_BENCHMARK_COMPARISONS,
+        )
+        assert isinstance(result.alignment_changes, list)
+        if result.alignment_changes:
+            change = result.alignment_changes[0]
+            assert isinstance(change, dict)
+            assert "change" in change
+class TestSentinelSchemaCompliance:
+    """SENTINEL (Best Practices Validator) output schema validation."""
+    @pytest.fixture
+    def agent(self):
+        return BestPracticesValidatorAgent(MockHFClient())
+    @pytest.mark.asyncio
+    async def test_schema_compliance(self, agent):
+        """SENTINEL output has all required BestPracticesResult fields."""
+        result = await agent.analyze(
+            rule_engine_results=MockRuleEngineResults(),
+        )
+        assert isinstance(result, BestPracticesResult)
+        assert hasattr(result, "overall_score")
+        assert hasattr(result, "priority_fixes")
+        assert hasattr(result, "self_evaluation")
+    @pytest.mark.asyncio
+    async def test_score_in_range(self, agent):
+        """Overall score is between 0-100."""
+        result = await agent.analyze(
+            rule_engine_results=MockRuleEngineResults(),
+        )
+        assert 0 <= result.overall_score <= 100
+    @pytest.mark.asyncio
+    async def test_priority_fixes_ranked(self, agent):
+        """Priority fixes are a list with high-impact items first."""
+        result = await agent.analyze(
+            rule_engine_results=MockRuleEngineResults(),
+        )
+        assert isinstance(result.priority_fixes, list)
+        if len(result.priority_fixes) >= 2:
+            # First fix should be highest priority
+            first = result.priority_fixes[0]
+            if isinstance(first, dict) and "rank" in first:
+                assert first["rank"] == 1
+class TestNexusSchemaCompliance:
+    """NEXUS (Head Synthesizer) output schema validation."""
+    @pytest.fixture
+    def agent(self):
+        return HeadSynthesizerAgent(MockHFClient())
+    @pytest.mark.asyncio
+    async def test_schema_compliance(self, agent):
+        """NEXUS output has all required HeadSynthesis fields."""
+        result = await agent.synthesize(
+            rule_engine_results=MockRuleEngineResults(),
+            benchmark_comparisons=MOCK_BENCHMARK_COMPARISONS,
+            brand_identification=BrandIdentification(
+                brand_primary={"color": "#06b2c4", "confidence": "high"},
+                palette_strategy="complementary",
+                cohesion_score=6,
+            ),
+            benchmark_advice=BenchmarkAdvice(
+                recommended_benchmark="shopify_polaris",
+                reasoning="87% structural match",
+            ),
+            best_practices=BestPracticesResult(
+                overall_score=62,
+                priority_fixes=[{"issue": "AA contrast", "impact": "high"}],
+            ),
+        )
+        assert isinstance(result, HeadSynthesis)
+        assert hasattr(result, "executive_summary")
+        assert hasattr(result, "top_3_actions")
+        assert hasattr(result, "scores")
+        assert hasattr(result, "self_evaluation")
+    @pytest.mark.asyncio
+    async def test_executive_summary_non_empty(self, agent):
+        """NEXUS produces a non-empty executive summary."""
+        result = await agent.synthesize(
+            rule_engine_results=MockRuleEngineResults(),
+            benchmark_comparisons=MOCK_BENCHMARK_COMPARISONS,
+            brand_identification=BrandIdentification(),
+            benchmark_advice=BenchmarkAdvice(),
+            best_practices=BestPracticesResult(),
+        )
+        assert result.executive_summary != ""
+    @pytest.mark.asyncio
+    async def test_top_3_actions_present(self, agent):
+        """NEXUS provides top 3 action items."""
+        result = await agent.synthesize(
+            rule_engine_results=MockRuleEngineResults(),
+            benchmark_comparisons=MOCK_BENCHMARK_COMPARISONS,
+            brand_identification=BrandIdentification(),
+            benchmark_advice=BenchmarkAdvice(),
+            best_practices=BestPracticesResult(),
+        )
+        assert isinstance(result.top_3_actions, list)
+        assert len(result.top_3_actions) >= 1
+# =============================================================================
+# SELF-EVALUATION TESTS
+# =============================================================================
+class TestSelfEvaluation:
+    """All agents should include self_evaluation with confidence scoring."""
+    @pytest.mark.asyncio
+    async def test_aurora_self_evaluation(self):
+        agent = BrandIdentifierAgent(MockHFClient())
+        result = await agent.analyze(
+            color_tokens=MOCK_COLOR_TOKENS,
+            semantic_analysis=MOCK_SEMANTIC_ANALYSIS,
+        )
+        se = result.self_evaluation
+        assert isinstance(se, dict)
+        assert "confidence" in se
+        assert "data_quality" in se
+    @pytest.mark.asyncio
+    async def test_atlas_self_evaluation(self):
+        agent = BenchmarkAdvisorAgent(MockHFClient())
+        result = await agent.analyze(
+            user_ratio=1.18,
+            user_base=16,
+            user_spacing=4,
+            benchmark_comparisons=MOCK_BENCHMARK_COMPARISONS,
+        )
+        se = result.self_evaluation
+        assert isinstance(se, dict)
+        assert "confidence" in se
+    @pytest.mark.asyncio
+    async def test_sentinel_self_evaluation(self):
+        agent = BestPracticesValidatorAgent(MockHFClient())
+        result = await agent.analyze(
+            rule_engine_results=MockRuleEngineResults(),
+        )
+        se = result.self_evaluation
+        assert isinstance(se, dict)
+        assert "confidence" in se
+    @pytest.mark.asyncio
+    async def test_nexus_self_evaluation(self):
+        agent = HeadSynthesizerAgent(MockHFClient())
+        result = await agent.synthesize(
+            rule_engine_results=MockRuleEngineResults(),
+            benchmark_comparisons=MOCK_BENCHMARK_COMPARISONS,
+            brand_identification=BrandIdentification(),
+            benchmark_advice=BenchmarkAdvice(),
+            best_practices=BestPracticesResult(),
+        )
+        se = result.self_evaluation
+        assert isinstance(se, dict)
+        assert "confidence" in se
+# =============================================================================
+# VALIDATION MODULE TESTS
+# =============================================================================
+class TestValidationModule:
+    """Test the core/validation.py module."""
+    def test_validate_aurora_output(self):
+        from core.validation import validate_agent_output
+        data = {
+            "brand_primary": {"color": "#06b2c4"},
+            "palette_strategy": "complementary",
+            "cohesion_score": 6,
+        }
+        is_valid, error = validate_agent_output(data, "aurora")
+        assert is_valid
+    def test_validate_aurora_missing_required(self):
+        from core.validation import validate_agent_output
+        data = {"cohesion_score": 6}  # Missing brand_primary and palette_strategy
+        is_valid, error = validate_agent_output(data, "aurora")
+        assert not is_valid
+        assert error is not None
+    def test_validate_nexus_output(self):
+        from core.validation import validate_agent_output
+        data = {
+            "executive_summary": "Test summary",
+            "top_3_actions": [{"action": "Fix contrast"}],
+            "scores": {"overall": 62},
+        }
+        is_valid, error = validate_agent_output(data, "nexus")
+        assert is_valid
+    def test_validate_unknown_agent_passes(self):
+        from core.validation import validate_agent_output
+        is_valid, error = validate_agent_output({"anything": True}, "unknown_agent")
+        assert is_valid  # No schema = pass
+    def test_validate_dataclass(self):
+        from core.validation import validate_agent_output
+        brand = BrandIdentification(
+            brand_primary={"color": "#06b2c4"},
+            palette_strategy="complementary",
+        )
+        is_valid, error = validate_agent_output(brand, "aurora")
+        assert is_valid
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

tests/test_stage1_extraction.py ADDED Viewed

	@@ -0,0 +1,716 @@

+#!/usr/bin/env python3
+"""
+Stage 1 Test Suite — Extraction, Normalization & Rule Engine
+=============================================================
+Tests the deterministic (free) layer:
+- Color utilities: hex normalization, deduplication, categorization
+- Rule Engine: WCAG contrast, type scale detection, spacing grid, consistency score
+- Edge cases and boundary conditions
+Run: pytest tests/test_stage1_extraction.py -v
+"""
+import os
+import sys
+import pytest
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from core.color_utils import (
+    normalize_hex,
+    parse_color,
+    deduplicate_colors,
+    are_colors_similar,
+    color_distance,
+    categorize_color,
+    get_contrast_ratio,
+    check_wcag_compliance,
+    generate_color_ramp,
+    hex_to_rgb,
+    rgb_to_hex,
+)
+from core.rule_engine import (
+    analyze_type_scale,
+    analyze_accessibility,
+    analyze_spacing_grid,
+    analyze_color_statistics,
+    run_rule_engine,
+    get_contrast_ratio as re_get_contrast_ratio,
+    get_relative_luminance,
+    hex_to_rgb as re_hex_to_rgb,
+    is_gray,
+    color_distance as re_color_distance,
+    find_aa_compliant_color,
+    parse_size_to_px,
+    STANDARD_SCALES,
+)
+# =============================================================================
+# TEST DATA
+# =============================================================================
+MOCK_TYPOGRAPHY_TOKENS = {
+    "heading-1": {"font_size": "48px", "font_weight": "700"},
+    "heading-2": {"font_size": "36px", "font_weight": "600"},
+    "heading-3": {"font_size": "28px", "font_weight": "600"},
+    "heading-4": {"font_size": "22px", "font_weight": "500"},
+    "body-large": {"font_size": "18px", "font_weight": "400"},
+    "body": {"font_size": "16px", "font_weight": "400"},
+    "body-small": {"font_size": "14px", "font_weight": "400"},
+    "caption": {"font_size": "12px", "font_weight": "400"},
+}
+MOCK_COLOR_TOKENS = {
+    "brand-primary": {"value": "#06b2c4"},
+    "brand-secondary": {"value": "#c1df1f"},
+    "text-primary": {"value": "#1a1a1a"},
+    "text-secondary": {"value": "#666666"},
+    "background": {"value": "#ffffff"},
+    "light-cyan": {"value": "#7dd3fc"},  # Fails AA on white
+    "light-lime": {"value": "#d9f99d"},  # Fails AA on white
+}
+MOCK_SPACING_TOKENS_ALIGNED = {
+    "space-1": {"value_px": 4},
+    "space-2": {"value_px": 8},
+    "space-3": {"value_px": 16},
+    "space-4": {"value_px": 24},
+    "space-5": {"value_px": 32},
+    "space-6": {"value_px": 48},
+}
+MOCK_SPACING_TOKENS_MISALIGNED = {
+    "space-1": {"value_px": 5},
+    "space-2": {"value_px": 10},
+    "space-3": {"value_px": 15},
+    "space-4": {"value_px": 22},
+    "space-5": {"value_px": 33},
+}
+# =============================================================================
+# TEST CLASS: Color Utilities — Normalization & Deduplication
+# =============================================================================
+class TestColorNormalization:
+    """Test color parsing, normalization and deduplication."""
+    def test_normalize_hex_6digit(self):
+        """6-digit hex stays lowercase."""
+        assert normalize_hex("#FF0000") == "#ff0000"
+        assert normalize_hex("#ffffff") == "#ffffff"
+    def test_normalize_hex_3digit(self):
+        """3-digit hex expands to 6-digit."""
+        assert normalize_hex("#fff") == "#ffffff"
+        assert normalize_hex("#000") == "#000000"
+        assert normalize_hex("#f00") == "#ff0000"
+    def test_parse_color_hex(self):
+        """Parse hex color to ParsedColor."""
+        parsed = parse_color("#ff0000")
+        assert parsed is not None
+        assert parsed.hex == "#ff0000"
+        assert parsed.rgb == (255, 0, 0)
+    def test_parse_color_rgb(self):
+        """Parse rgb() string."""
+        parsed = parse_color("rgb(0, 128, 255)")
+        assert parsed is not None
+        assert parsed.rgb == (0, 128, 255)
+    def test_parse_color_invalid(self):
+        """Invalid color returns None."""
+        assert parse_color("not-a-color") is None
+        assert parse_color("") is None
+    def test_hex_to_rgb_and_back(self):
+        """Round-trip hex → RGB → hex."""
+        r, g, b = hex_to_rgb("#1a2b3c")
+        result = rgb_to_hex(r, g, b)
+        assert result == "#1a2b3c"
+    def test_deduplicate_exact_duplicates(self):
+        """Exact same colors are deduplicated."""
+        colors = ["#ff0000", "#ff0000", "#00ff00", "#00ff00", "#0000ff"]
+        result = deduplicate_colors(colors, threshold=1.0)
+        assert len(result) == 3
+    def test_deduplicate_near_duplicates(self):
+        """Near-duplicate colors (within threshold) are deduplicated."""
+        colors = ["#ff0000", "#fe0101", "#00ff00"]
+        result = deduplicate_colors(colors, threshold=10.0)
+        assert len(result) == 2  # #ff0000 and #fe0101 are near-dupes
+    def test_deduplicate_preserves_distinct(self):
+        """Distinct colors are preserved."""
+        colors = ["#ff0000", "#00ff00", "#0000ff"]
+        result = deduplicate_colors(colors, threshold=10.0)
+        assert len(result) == 3
+    def test_are_colors_similar_identical(self):
+        """Same color is similar."""
+        assert are_colors_similar("#ff0000", "#ff0000")
+    def test_are_colors_similar_different(self):
+        """Very different colors are not similar."""
+        assert not are_colors_similar("#ff0000", "#0000ff", threshold=10.0)
+    def test_color_distance_identical(self):
+        """Same color has distance 0."""
+        assert color_distance("#ff0000", "#ff0000") == 0.0
+    def test_color_distance_symmetric(self):
+        """Distance is symmetric."""
+        d1 = color_distance("#ff0000", "#00ff00")
+        d2 = color_distance("#00ff00", "#ff0000")
+        assert d1 == d2
+# =============================================================================
+# TEST CLASS: Color Categorization
+# =============================================================================
+class TestColorCategorization:
+    """Test semantic color classification."""
+    def test_categorize_red(self):
+        assert categorize_color("#ff0000") == "red"
+    def test_categorize_blue(self):
+        assert categorize_color("#0000ff") == "blue"
+    def test_categorize_green(self):
+        assert categorize_color("#00ff00") == "green"
+    def test_categorize_neutral_white(self):
+        assert categorize_color("#ffffff") == "neutral"
+    def test_categorize_neutral_black(self):
+        assert categorize_color("#000000") == "neutral"
+    def test_categorize_neutral_gray(self):
+        assert categorize_color("#808080") == "neutral"
+    def test_categorize_cyan(self):
+        """Brand color #06b2c4 should be cyan."""
+        assert categorize_color("#06b2c4") == "cyan"
+# =============================================================================
+# TEST CLASS: WCAG Contrast (Rule Engine)
+# =============================================================================
+class TestWCAGContrast:
+    """Test WCAG contrast ratio calculations — core math."""
+    def test_black_on_white_is_21(self):
+        """Black on white should be 21:1 (maximum contrast)."""
+        ratio = re_get_contrast_ratio("#000000", "#ffffff")
+        assert abs(ratio - 21.0) < 0.1
+    def test_white_on_black_is_21(self):
+        """White on black is also 21:1 (symmetric)."""
+        ratio = re_get_contrast_ratio("#ffffff", "#000000")
+        assert abs(ratio - 21.0) < 0.1
+    def test_same_color_is_1(self):
+        """Same color on same color should be 1:1."""
+        ratio = re_get_contrast_ratio("#ff0000", "#ff0000")
+        assert abs(ratio - 1.0) < 0.01
+    def test_contrast_ratio_symmetric(self):
+        """Contrast ratio is symmetric."""
+        r1 = re_get_contrast_ratio("#06b2c4", "#ffffff")
+        r2 = re_get_contrast_ratio("#ffffff", "#06b2c4")
+        assert abs(r1 - r2) < 0.01
+    def test_brand_primary_fails_aa_on_white(self):
+        """Brand color #06b2c4 fails AA on white (contrast ~2.6)."""
+        ratio = re_get_contrast_ratio("#06b2c4", "#ffffff")
+        assert ratio < 4.5  # Fails AA normal
+        assert ratio > 2.0  # But has some contrast
+    def test_dark_text_passes_aa(self):
+        """Dark text #1a1a1a passes AA on white."""
+        ratio = re_get_contrast_ratio("#1a1a1a", "#ffffff")
+        assert ratio >= 4.5
+    def test_luminance_black_is_zero(self):
+        """Black has luminance ~0."""
+        lum = get_relative_luminance("#000000")
+        assert abs(lum) < 0.001
+    def test_luminance_white_is_one(self):
+        """White has luminance ~1."""
+        lum = get_relative_luminance("#ffffff")
+        assert abs(lum - 1.0) < 0.001
+    def test_find_aa_compliant_preserves_passing(self):
+        """Color already passing AA is returned unchanged."""
+        result = find_aa_compliant_color("#1a1a1a", "#ffffff", 4.5)
+        assert result == "#1a1a1a"
+    def test_find_aa_compliant_fixes_failing(self):
+        """Failing color gets a fix that passes AA."""
+        fixed = find_aa_compliant_color("#06b2c4", "#ffffff", 4.5)
+        fixed_ratio = re_get_contrast_ratio(fixed, "#ffffff")
+        assert fixed_ratio >= 4.5
+    def test_analyze_accessibility_finds_failures(self):
+        """analyze_accessibility identifies colors that fail AA on BOTH white and black."""
+        results = analyze_accessibility(MOCK_COLOR_TOKENS)
+        # passes_aa_normal is True if contrast >= 4.5 on white OR black.
+        # Light colors pass because they have good contrast on black.
+        # Medium-contrast colors like #06b2c4 or #666666 may fail on both.
+        # At minimum, all results should be analyzed
+        assert len(results) >= 5  # At least the colors with valid hex
+        # Check that brand-primary #06b2c4 has low contrast on white
+        brand = [r for r in results if r.hex_color == "#06b2c4"]
+        assert len(brand) == 1
+        assert brand[0].contrast_on_white < 4.5
+    def test_analyze_accessibility_suggests_fixes(self):
+        """AA failures get suggested fixes."""
+        results = analyze_accessibility(MOCK_COLOR_TOKENS)
+        failures = [r for r in results if not r.passes_aa_normal]
+        for f in failures:
+            assert f.suggested_fix is not None
+            assert f.suggested_fix_contrast is not None
+            assert f.suggested_fix_contrast >= 4.5
+    def test_fg_bg_pair_check(self):
+        """FG/BG pairs are checked for contrast."""
+        pairs = [
+            {"foreground": "#06b2c4", "background": "#ffffff", "element": "button"},
+        ]
+        results = analyze_accessibility({}, fg_bg_pairs=pairs)
+        # #06b2c4 on white fails AA (contrast ~3.2)
+        pair_failures = [r for r in results if r.name.startswith("fg:")]
+        assert len(pair_failures) == 1
+    def test_fg_bg_same_color_skipped(self):
+        """Same-color FG/BG pairs are skipped (invisible text)."""
+        pairs = [
+            {"foreground": "#ffffff", "background": "#ffffff", "element": "hidden"},
+        ]
+        results = analyze_accessibility({}, fg_bg_pairs=pairs)
+        assert len(results) == 0
+# =============================================================================
+# TEST CLASS: Type Scale Detection
+# =============================================================================
+class TestTypeScaleDetection:
+    """Test type scale ratio detection and recommendations."""
+    def test_detect_ratio_from_tokens(self):
+        """Detects a reasonable ratio from mock typography tokens."""
+        result = analyze_type_scale(MOCK_TYPOGRAPHY_TOKENS)
+        # Sizes: 12, 14, 16, 18, 22, 28, 36, 48 — ratios vary
+        assert result.detected_ratio > 1.0
+        assert result.detected_ratio < 2.0
+    def test_consistent_scale(self):
+        """A perfectly consistent scale is detected as consistent."""
+        # Major Third (1.25): 12, 15, 18.75, 23.4, 29.3
+        tokens = {
+            f"size-{i}": {"font_size": f"{12 * (1.25 ** i):.1f}px"}
+            for i in range(5)
+        }
+        result = analyze_type_scale(tokens)
+        assert result.is_consistent
+        assert abs(result.detected_ratio - 1.25) < 0.05
+    def test_inconsistent_scale(self):
+        """Random sizes are detected as inconsistent."""
+        tokens = {
+            "a": {"font_size": "10px"},
+            "b": {"font_size": "17px"},
+            "c": {"font_size": "31px"},
+            "d": {"font_size": "42px"},
+        }
+        result = analyze_type_scale(tokens)
+        # Very inconsistent — large variance
+        assert result.variance > 0.15 or not result.is_consistent
+    def test_single_size(self):
+        """Single size returns Unknown scale."""
+        tokens = {"body": {"font_size": "16px"}}
+        result = analyze_type_scale(tokens)
+        assert result.scale_name == "Unknown"
+        assert result.recommendation == 1.25  # Default: Major Third
+    def test_no_sizes(self):
+        """Empty tokens return Unknown scale."""
+        result = analyze_type_scale({})
+        assert result.scale_name == "Unknown"
+    def test_rem_conversion(self):
+        """rem values are converted to px (1rem = 16px)."""
+        tokens = {
+            "body": {"font_size": "1rem"},
+            "heading": {"font_size": "2rem"},
+        }
+        result = analyze_type_scale(tokens)
+        assert 16.0 in result.sizes_px
+        assert 32.0 in result.sizes_px
+    def test_base_size_detection(self):
+        """Base size detected near 16px."""
+        result = analyze_type_scale(MOCK_TYPOGRAPHY_TOKENS)
+        assert 14 <= result.base_size <= 18
+    def test_standard_scales_defined(self):
+        """All standard scales are defined."""
+        assert 1.25 in STANDARD_SCALES
+        assert 1.333 in STANDARD_SCALES
+        assert 1.618 in STANDARD_SCALES
+    def test_parse_size_to_px(self):
+        """Various size formats parsed correctly."""
+        assert parse_size_to_px("16px") == 16.0
+        assert parse_size_to_px("1rem") == 16.0
+        assert parse_size_to_px("1.5em") == 24.0
+        assert parse_size_to_px(16) == 16.0
+        assert parse_size_to_px("abc") is None
+# =============================================================================
+# TEST CLASS: Spacing Grid Analysis
+# =============================================================================
+class TestSpacingGrid:
+    """Test spacing grid detection and GCD math."""
+    def test_aligned_to_4px(self):
+        """Values divisible by 4 are detected as 4px-aligned."""
+        result = analyze_spacing_grid(MOCK_SPACING_TOKENS_ALIGNED)
+        assert result.is_aligned
+        # All values (4, 8, 16, 24, 32, 48) are divisible by 4 and 8
+        assert result.recommendation in [4, 8]
+    def test_8px_grid_detected(self):
+        """All-8px-multiple values detected as 8px grid."""
+        tokens = {
+            "s1": {"value_px": 8},
+            "s2": {"value_px": 16},
+            "s3": {"value_px": 24},
+            "s4": {"value_px": 32},
+        }
+        result = analyze_spacing_grid(tokens)
+        assert result.detected_base == 8
+        assert result.is_aligned
+        assert result.alignment_percentage == 100.0
+    def test_misaligned_detected(self):
+        """Misaligned spacing is flagged."""
+        result = analyze_spacing_grid(MOCK_SPACING_TOKENS_MISALIGNED)
+        # GCD of 5, 10, 15, 22, 33 = 1 — not aligned
+        assert result.detected_base == 1
+        assert not result.is_aligned
+    def test_empty_spacing(self):
+        """Empty spacing defaults to 8px recommendation."""
+        result = analyze_spacing_grid({})
+        assert result.recommendation == 8
+        assert not result.is_aligned
+    def test_single_value(self):
+        """Single value uses itself as base."""
+        tokens = {"s1": {"value_px": 8}}
+        result = analyze_spacing_grid(tokens)
+        assert result.detected_base == 8
+    def test_gcd_calculation(self):
+        """GCD correctly computed for spacing values."""
+        tokens = {
+            "s1": {"value_px": 12},
+            "s2": {"value_px": 24},
+            "s3": {"value_px": 36},
+        }
+        result = analyze_spacing_grid(tokens)
+        assert result.detected_base == 12
+    def test_suggested_scale_generated(self):
+        """Suggested scale is generated."""
+        result = analyze_spacing_grid(MOCK_SPACING_TOKENS_ALIGNED)
+        assert len(result.suggested_scale) > 0
+        assert 0 in result.suggested_scale
+    def test_string_values_parsed(self):
+        """String values like '16px' are parsed correctly."""
+        tokens = {
+            "s1": {"value": "8px"},
+            "s2": {"value": "16px"},
+        }
+        result = analyze_spacing_grid(tokens)
+        assert result.current_values == [8, 16]
+# =============================================================================
+# TEST CLASS: Color Statistics
+# =============================================================================
+class TestColorStatistics:
+    """Test color palette statistics analysis."""
+    def test_counts_correct(self):
+        """Total and unique counts are correct."""
+        tokens = {
+            "a": {"value": "#ff0000"},
+            "b": {"value": "#ff0000"},  # duplicate
+            "c": {"value": "#00ff00"},
+        }
+        result = analyze_color_statistics(tokens)
+        assert result.total_count == 3
+        assert result.unique_count == 2
+        assert result.duplicate_count == 1
+    def test_gray_detection(self):
+        """Grays are counted correctly."""
+        tokens = {
+            "white": {"value": "#ffffff"},
+            "gray": {"value": "#808080"},
+            "black": {"value": "#000000"},
+            "red": {"value": "#ff0000"},
+        }
+        result = analyze_color_statistics(tokens)
+        assert result.gray_count >= 3  # white, gray, black are all low saturation
+    def test_near_duplicates_found(self):
+        """Near-duplicate colors are detected."""
+        tokens = {
+            "red1": {"value": "#ff0000"},
+            "red2": {"value": "#fe0101"},  # Very close to red1
+            "blue": {"value": "#0000ff"},
+        }
+        result = analyze_color_statistics(tokens, similarity_threshold=0.05)
+        assert len(result.near_duplicates) >= 1
+    def test_hue_distribution(self):
+        """Hue distribution groups colors correctly."""
+        tokens = {
+            "red": {"value": "#ff0000"},
+            "blue": {"value": "#0000ff"},
+            "green": {"value": "#00ff00"},
+        }
+        result = analyze_color_statistics(tokens)
+        assert "red" in result.hue_distribution
+        assert "blue" in result.hue_distribution
+        assert "green" in result.hue_distribution
+    def test_empty_tokens(self):
+        """Empty tokens return zeros."""
+        result = analyze_color_statistics({})
+        assert result.total_count == 0
+        assert result.unique_count == 0
+# =============================================================================
+# TEST CLASS: Rule Engine Integration
+# =============================================================================
+class TestRuleEngineIntegration:
+    """Test the full run_rule_engine() function."""
+    def test_returns_all_components(self):
+        """Rule engine returns all analysis components."""
+        result = run_rule_engine(
+            typography_tokens=MOCK_TYPOGRAPHY_TOKENS,
+            color_tokens=MOCK_COLOR_TOKENS,
+            spacing_tokens=MOCK_SPACING_TOKENS_ALIGNED,
+        )
+        assert result.typography is not None
+        assert result.accessibility is not None
+        assert result.spacing is not None
+        assert result.color_stats is not None
+    def test_consistency_score_bounds(self):
+        """Consistency score is between 0 and 100."""
+        result = run_rule_engine(
+            typography_tokens=MOCK_TYPOGRAPHY_TOKENS,
+            color_tokens=MOCK_COLOR_TOKENS,
+            spacing_tokens=MOCK_SPACING_TOKENS_ALIGNED,
+        )
+        assert 0 <= result.consistency_score <= 100
+    def test_aa_failures_counted(self):
+        """AA failures are counted in summary."""
+        result = run_rule_engine(
+            typography_tokens=MOCK_TYPOGRAPHY_TOKENS,
+            color_tokens=MOCK_COLOR_TOKENS,
+            spacing_tokens=MOCK_SPACING_TOKENS_ALIGNED,
+        )
+        assert result.aa_failures >= 0
+    def test_to_dict_serializable(self):
+        """to_dict() returns JSON-serializable data."""
+        import json
+        result = run_rule_engine(
+            typography_tokens=MOCK_TYPOGRAPHY_TOKENS,
+            color_tokens=MOCK_COLOR_TOKENS,
+            spacing_tokens=MOCK_SPACING_TOKENS_ALIGNED,
+        )
+        d = result.to_dict()
+        json_str = json.dumps(d)
+        assert len(json_str) > 0
+    def test_log_callback_called(self):
+        """Log callback receives messages."""
+        logs = []
+        run_rule_engine(
+            typography_tokens=MOCK_TYPOGRAPHY_TOKENS,
+            color_tokens=MOCK_COLOR_TOKENS,
+            spacing_tokens=MOCK_SPACING_TOKENS_ALIGNED,
+            log_callback=lambda msg: logs.append(msg),
+        )
+        assert len(logs) > 0
+        # Should contain rule engine header
+        assert any("RULE ENGINE" in log for log in logs)
+    def test_with_fg_bg_pairs(self):
+        """FG/BG pairs are analyzed when provided."""
+        pairs = [
+            {"foreground": "#06b2c4", "background": "#ffffff", "element": "button"},
+            {"foreground": "#1a1a1a", "background": "#ffffff", "element": "heading"},
+        ]
+        result = run_rule_engine(
+            typography_tokens=MOCK_TYPOGRAPHY_TOKENS,
+            color_tokens=MOCK_COLOR_TOKENS,
+            spacing_tokens=MOCK_SPACING_TOKENS_ALIGNED,
+            fg_bg_pairs=pairs,
+        )
+        # Should have accessibility results including pair checks
+        assert len(result.accessibility) > 0
+    def test_empty_tokens_no_crash(self):
+        """Empty tokens don't crash the rule engine."""
+        result = run_rule_engine(
+            typography_tokens={},
+            color_tokens={},
+            spacing_tokens={},
+        )
+        assert result.consistency_score >= 0
+    def test_perfect_score_possible(self):
+        """A well-organized design system scores high."""
+        # All 8px-aligned spacing
+        spacing = {f"s{i}": {"value_px": i * 8} for i in range(1, 7)}
+        # Consistent type scale (Major Third 1.25)
+        typo = {
+            f"t{i}": {"font_size": f"{16 * (1.25 ** i):.0f}px"}
+            for i in range(5)
+        }
+        # AA-passing colors only
+        colors = {
+            "dark": {"value": "#1a1a1a"},
+            "medium": {"value": "#333333"},
+        }
+        result = run_rule_engine(
+            typography_tokens=typo,
+            color_tokens=colors,
+            spacing_tokens=spacing,
+        )
+        assert result.consistency_score >= 50  # Should be reasonably high
+# =============================================================================
+# TEST CLASS: Color Ramp Generation
+# =============================================================================
+class TestColorRampGeneration:
+    """Test color ramp generation from base color."""
+    def test_ramp_has_all_shades(self):
+        """Ramp generates all standard shades."""
+        ramp = generate_color_ramp("#06b2c4")
+        assert "50" in ramp
+        assert "500" in ramp
+        assert "900" in ramp
+        assert len(ramp) == 10
+    def test_ramp_500_is_base(self):
+        """Shade 500 is the base color."""
+        ramp = generate_color_ramp("#06b2c4")
+        assert ramp["500"] == "#06b2c4"
+    def test_ramp_lightness_order(self):
+        """Lighter shades are lighter than darker shades."""
+        ramp = generate_color_ramp("#06b2c4")
+        shade_50 = parse_color(ramp["50"])
+        shade_900 = parse_color(ramp["900"])
+        assert shade_50.hsl[2] > shade_900.hsl[2]  # 50 is lighter
+    def test_ramp_empty_on_invalid(self):
+        """Invalid color returns empty ramp."""
+        ramp = generate_color_ramp("not-a-color")
+        assert ramp == {}
+# =============================================================================
+# TEST CLASS: Edge Cases
+# =============================================================================
+class TestEdgeCases:
+    """Edge cases and boundary conditions."""
+    def test_is_gray_pure_white(self):
+        """White is gray (low saturation)."""
+        assert is_gray("#ffffff")
+    def test_is_gray_pure_black(self):
+        """Black is gray (low saturation)."""
+        assert is_gray("#000000")
+    def test_is_gray_red_is_not(self):
+        """Pure red is not gray."""
+        assert not is_gray("#ff0000")
+    def test_color_distance_black_white(self):
+        """Black to white is maximum distance."""
+        dist = re_color_distance("#000000", "#ffffff")
+        assert dist > 0.9  # Close to maximum (~1.0)
+    def test_very_large_spacing(self):
+        """Large spacing values don't crash."""
+        tokens = {"huge": {"value_px": 10000}}
+        result = analyze_spacing_grid(tokens)
+        assert result.detected_base == 10000
+    def test_typography_mixed_units(self):
+        """Mixed px/rem/em units are handled."""
+        tokens = {
+            "a": {"font_size": "16px"},
+            "b": {"font_size": "1.5rem"},
+            "c": {"font_size": "2em"},
+        }
+        result = analyze_type_scale(tokens)
+        assert len(result.sizes_px) == 3
+        assert 16.0 in result.sizes_px
+        assert 24.0 in result.sizes_px
+        assert 32.0 in result.sizes_px
+    def test_duplicate_sizes_deduped(self):
+        """Duplicate font sizes are deduplicated."""
+        tokens = {
+            "a": {"font_size": "16px"},
+            "b": {"font_size": "16px"},
+            "c": {"font_size": "24px"},
+        }
+        result = analyze_type_scale(tokens)
+        assert len(result.sizes_px) == 2  # 16 and 24
+    def test_hex_to_rgb_shorthand(self):
+        """3-digit hex expands correctly."""
+        assert re_hex_to_rgb("#fff") == (255, 255, 255)
+        assert re_hex_to_rgb("#000") == (0, 0, 0)
+        assert re_hex_to_rgb("#f00") == (255, 0, 0)
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])