Spaces:

riazmo
/

Design-System-Automation

Runtime error

File size: 33,393 Bytes

abab3e7
 
d041f14
abab3e7

"""
Agent 2: Token Normalizer & Structurer
Design System Automation v3

Persona: Design System Librarian

Responsibilities:
- Clean noisy extraction data
- Deduplicate similar tokens (colors within threshold, similar spacing)
- Assign ALL color names using NUMERIC shades only (50-900)
- Add role_hints based on CSS property/element context (absorbed from semantic_analyzer)
- Normalize radius values (parse, deduplicate, sort, name)
- Normalize shadow values (parse, sort by blur, name)
- Fix typography naming collisions (add weight suffix)
- Tag tokens as: detected | inferred | low-confidence
"""

import re
from typing import Optional
from collections import defaultdict

from core.token_schema import (
    ColorToken,
    TypographyToken,
    SpacingToken,
    RadiusToken,
    ShadowToken,
    ExtractedTokens,
    NormalizedTokens,
    Confidence,
    TokenSource,
)
from core.color_utils import (
    parse_color,
    normalize_hex,
    categorize_color,
)


class TokenNormalizer:
    """
    Normalizes and structures extracted tokens.

    This is Agent 2's job — taking raw extraction data and
    organizing it into a clean, deduplicated structure.

    v3 changes:
    - Color naming: ALWAYS numeric shades (50-900), NEVER words (light/dark/base)
    - Role hints: CSS-property-based metadata for AURORA to consume
    - Radius: Full normalization (parse, deduplicate, sort, name)
    - Shadows: Full normalization (parse, sort by blur, deduplicate, name)
    - Typography: Collision-proof naming with weight suffix
    """

    def __init__(self):
        # Thresholds for duplicate detection
        self.color_similarity_threshold = 10  # Delta in RGB space
        self.spacing_merge_threshold = 2  # px difference to merge

        # Radius semantic tiers (px -> name)
        self.radius_tiers = [
            (0, "none"),
            (2, "sm"),
            (4, "md"),
            (8, "lg"),
            (16, "xl"),
            (24, "2xl"),
            (9999, "full"),
        ]

        # Shadow elevation tiers (by count)
        self.shadow_tier_names = ["xs", "sm", "md", "lg", "xl", "2xl"]

    def normalize(self, extracted: ExtractedTokens) -> NormalizedTokens:
        """
        Normalize extracted tokens.

        Args:
            extracted: Raw extraction results from Agent 1

        Returns:
            NormalizedTokens with cleaned, deduplicated data
        """
        # Process each token type (returns lists)
        colors_list = self._normalize_colors(extracted.colors)
        typography_list = self._normalize_typography(extracted.typography)
        spacing_list = self._normalize_spacing(extracted.spacing)
        radius_list = self._normalize_radius(extracted.radius)
        shadows_list = self._normalize_shadows(extracted.shadows)

        # Convert to dicts keyed by suggested_name
        colors_dict = {}
        for c in colors_list:
            key = c.suggested_name or c.value
            # Handle duplicate names by appending a suffix
            if key in colors_dict:
                suffix = 2
                while f"{key}_{suffix}" in colors_dict:
                    suffix += 1
                key = f"{key}_{suffix}"
            colors_dict[key] = c

        typography_dict = {}
        for t in typography_list:
            key = t.suggested_name or f"{t.font_family}-{t.font_size}"
            if key in typography_dict:
                suffix = 2
                while f"{key}_{suffix}" in typography_dict:
                    suffix += 1
                key = f"{key}_{suffix}"
            typography_dict[key] = t

        spacing_dict = {}
        for s in spacing_list:
            key = s.suggested_name or s.value
            if key in spacing_dict:
                suffix = 2
                while f"{key}_{suffix}" in spacing_dict:
                    suffix += 1
                key = f"{key}_{suffix}"
            spacing_dict[key] = s

        # Radius and shadows are already properly named
        radius_dict = {}
        for r in radius_list:
            key = r.suggested_name or f"radius-{r.value}"
            if key in radius_dict:
                suffix = 2
                while f"{key}_{suffix}" in radius_dict:
                    suffix += 1
                key = f"{key}_{suffix}"
            radius_dict[key] = r

        shadows_dict = {}
        for s in shadows_list:
            key = s.suggested_name or f"shadow-{hash(s.value) % 1000}"
            if key in shadows_dict:
                suffix = 2
                while f"{key}_{suffix}" in shadows_dict:
                    suffix += 1
                key = f"{key}_{suffix}"
            shadows_dict[key] = s

        # Create normalized result
        normalized = NormalizedTokens(
            viewport=extracted.viewport,
            source_url=extracted.source_url,
            colors=colors_dict,
            typography=typography_dict,
            spacing=spacing_dict,
            radius=radius_dict,
            shadows=shadows_dict,
            font_families=extracted.font_families,
            detected_spacing_base=extracted.spacing_base,
            detected_naming_convention=extracted.naming_convention,
        )

        return normalized

    # =========================================================================
    # COLOR NORMALIZATION
    # =========================================================================

    def _normalize_colors(self, colors: list[ColorToken]) -> list[ColorToken]:
        """
        Normalize color tokens:
        - Deduplicate similar colors
        - Assign role_hints based on CSS context (absorbed from semantic_analyzer)
        - Assign suggested names using hue + NUMERIC shade (50-900)
        - Calculate confidence

        v3: Removed _infer_color_role() and _generate_color_name_from_value().
        ALL colors now get numeric shades via _generate_preliminary_name().
        Role hints are set for AURORA to consume (not used in naming).
        """
        if not colors:
            return []

        # Step 1: Deduplicate by exact hex value
        unique_colors = {}
        for color in colors:
            hex_val = normalize_hex(color.value)
            if hex_val in unique_colors:
                # Merge frequency and contexts
                existing = unique_colors[hex_val]
                existing.frequency += color.frequency
                existing.contexts = list(set(existing.contexts + color.contexts))
                existing.elements = list(set(existing.elements + color.elements))
                existing.css_properties = list(set(existing.css_properties + color.css_properties))
            else:
                color.value = hex_val
                unique_colors[hex_val] = color

        # Step 2: Merge visually similar colors
        merged_colors = self._merge_similar_colors(list(unique_colors.values()))

        # Step 3: Assign role_hints and preliminary names (ALL numeric)
        for color in merged_colors:
            # Set role_hint based on CSS property/element context
            color.role_hint = self._infer_role_hint(color)

            # Generate name: ALWAYS hue + numeric shade (50-900)
            color.suggested_name = self._generate_preliminary_name(color)

            # Update confidence based on frequency
            color.confidence = self._calculate_confidence(color.frequency)

        # Sort by frequency (most used first)
        merged_colors.sort(key=lambda c: -c.frequency)

        return merged_colors

    def _merge_similar_colors(self, colors: list[ColorToken]) -> list[ColorToken]:
        """Merge colors that are visually very similar."""
        if len(colors) <= 1:
            return colors

        merged = []
        used = set()

        for i, color1 in enumerate(colors):
            if i in used:
                continue

            # Find similar colors
            similar_group = [color1]
            for j, color2 in enumerate(colors[i+1:], i+1):
                if j in used:
                    continue
                if self._colors_are_similar(color1.value, color2.value):
                    similar_group.append(color2)
                    used.add(j)

            # Merge the group - keep the most frequent
            similar_group.sort(key=lambda c: -c.frequency)
            primary = similar_group[0]

            # Aggregate data from similar colors
            for other in similar_group[1:]:
                primary.frequency += other.frequency
                primary.contexts = list(set(primary.contexts + other.contexts))
                primary.elements = list(set(primary.elements + other.elements))
                primary.css_properties = list(set(primary.css_properties + other.css_properties))

            merged.append(primary)
            used.add(i)

        return merged

    def _colors_are_similar(self, hex1: str, hex2: str) -> bool:
        """Check if two colors are visually similar."""
        try:
            parsed1 = parse_color(hex1)
            parsed2 = parse_color(hex2)
            if parsed1 is None or parsed2 is None:
                return False
            if parsed1.rgb is None or parsed2.rgb is None:
                return False

            rgb1 = parsed1.rgb
            rgb2 = parsed2.rgb

            # Calculate Euclidean distance in RGB space
            distance = sum((a - b) ** 2 for a, b in zip(rgb1, rgb2)) ** 0.5
            return distance < self.color_similarity_threshold
        except Exception:
            return False

    def _infer_role_hint(self, color: ColorToken) -> Optional[str]:
        """
        Infer a role_hint for AURORA based on CSS property and element context.

        This replaces the old _infer_color_role() (which was used for naming)
        and absorbs the useful heuristics from semantic_analyzer.py.

        Role hints are metadata for AURORA — they do NOT affect the color name.
        """
        css_props = [p.lower() for p in color.css_properties]
        elements = [e.lower() for e in color.elements]
        contexts = [c.lower() for c in color.contexts]
        all_context = " ".join(css_props + elements + contexts)

        # Calculate color properties for additional heuristics
        parsed = parse_color(color.value)
        if parsed and parsed.rgb:
            r, g, b = parsed.rgb
            luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
            max_c = max(r, g, b)
            min_c = min(r, g, b)
            saturation = (max_c - min_c) / 255 if max_c > 0 else 0
        else:
            luminance = 0.5
            saturation = 0

        # --- BRAND/INTERACTIVE candidate ---
        interactive_elements = ["button", "a", "input", "select", "submit", "btn", "cta", "link"]
        is_interactive = any(el in all_context for el in interactive_elements)
        has_bg_prop = any("background" in p for p in css_props)

        # Interactive elements with background-color + saturated color
        if saturation > 0.25 and is_interactive and has_bg_prop:
            return "brand_candidate"
        # Highly saturated + high frequency
        if saturation > 0.35 and color.frequency > 15:
            return "brand_candidate"

        # --- TEXT candidate ---
        has_color_prop = any(
            p == "color" or (p.endswith("-color") and "background" not in p and "border" not in p)
            for p in css_props
        )
        text_elements = ["p", "span", "h1", "h2", "h3", "h4", "h5", "h6", "label", "text"]
        is_text_element = any(el in all_context for el in text_elements)

        if saturation < 0.15 and (has_color_prop or is_text_element):
            return "text_candidate"
        if saturation < 0.1 and luminance < 0.5 and color.frequency > 30:
            return "text_candidate"

        # --- BACKGROUND candidate ---
        container_elements = ["div", "section", "main", "body", "article", "header", "footer", "card"]
        is_container = any(el in all_context for el in container_elements)

        if has_bg_prop and is_container and saturation < 0.15:
            return "bg_candidate"
        if luminance > 0.9 and saturation < 0.1:
            return "bg_candidate"

        # --- BORDER candidate ---
        has_border_prop = any("border" in p for p in css_props)
        if has_border_prop or "border" in all_context:
            return "border_candidate"

        # --- FEEDBACK candidate ---
        # Check for error/success/warning keywords in context
        feedback_keywords = {
            "error": ["error", "danger", "invalid", "negative"],
            "success": ["success", "valid", "positive"],
            "warning": ["warning", "caution", "alert"],
            "info": ["info", "notice"],
        }
        for fb_type, keywords in feedback_keywords.items():
            if any(kw in all_context for kw in keywords):
                return "feedback_candidate"

        # --- Generic palette color (saturated but no clear role) ---
        if saturation > 0.2:
            return "palette"

        return None

    def _generate_preliminary_name(self, color: ColorToken) -> str:
        """
        Generate a preliminary name using hue family + numeric shade.

        This is the SINGLE naming path for ALL colors.
        Convention: color.{hue_family}.{shade}

        Shade is ALWAYS numeric (50-900) based on HSL lightness.
        NEVER uses words like light/dark/base.

        AURORA may later override these with semantic names (color.brand.primary),
        but the normalizer's job is just hue + shade.
        """
        category = categorize_color(color.value)
        parsed = parse_color(color.value)

        if parsed and parsed.hsl:
            h, s, l = parsed.hsl

            # Map lightness to shade number (50-900)
            # Uses HSL lightness which is more perceptually accurate than
            # the old luminance-based approach
            if l >= 95:
                shade = "50"
            elif l >= 85:
                shade = "100"
            elif l >= 75:
                shade = "200"
            elif l >= 65:
                shade = "300"
            elif l >= 55:
                shade = "400"
            elif l >= 45:
                shade = "500"
            elif l >= 35:
                shade = "600"
            elif l >= 25:
                shade = "700"
            elif l >= 15:
                shade = "800"
            else:
                shade = "900"
        else:
            shade = "500"

        return f"color.{category}.{shade}"

    # =========================================================================
    # TYPOGRAPHY NORMALIZATION
    # =========================================================================

    def _normalize_typography(self, typography: list[TypographyToken]) -> list[TypographyToken]:
        """
        Normalize typography tokens:
        - Deduplicate identical styles
        - Infer type scale categories
        - Assign suggested names with weight suffix to prevent collisions
        """
        if not typography:
            return []

        # Deduplicate by unique style combination
        unique_typo = {}
        for typo in typography:
            key = f"{typo.font_family}|{typo.font_size}|{typo.font_weight}|{typo.line_height}"
            if key in unique_typo:
                existing = unique_typo[key]
                existing.frequency += typo.frequency
                existing.elements = list(set(existing.elements + typo.elements))
            else:
                unique_typo[key] = typo

        result = list(unique_typo.values())

        # Infer names based on size, elements, AND weight (v3: collision fix)
        for typo in result:
            typo.suggested_name = self._generate_typography_name(typo)
            typo.confidence = self._calculate_confidence(typo.frequency)

        # Sort by font size (largest first)
        result.sort(key=lambda t: -self._parse_font_size(t.font_size))

        return result

    def _generate_typography_name(self, typo: TypographyToken) -> str:
        """
        Generate a semantic name for typography.

        v3: Includes font weight in name to prevent collisions.
        Two styles at 24px with weight 700 and 400 now produce
        font.heading.lg.700 and font.heading.lg.400 instead of both being font.heading.lg.
        """
        size_px = self._parse_font_size(typo.font_size)
        elements = " ".join(typo.elements).lower()

        # Determine category from elements
        if any(h in elements for h in ["h1", "hero", "display"]):
            category = "display"
        elif any(h in elements for h in ["h2", "h3", "h4", "h5", "h6", "heading", "title"]):
            category = "heading"
        elif any(h in elements for h in ["label", "caption", "small", "meta"]):
            category = "label"
        elif any(h in elements for h in ["body", "p", "paragraph", "text"]):
            category = "body"
        else:
            category = "text"

        # Determine size tier
        if size_px >= 32:
            size_tier = "xl"
        elif size_px >= 24:
            size_tier = "lg"
        elif size_px >= 18:
            size_tier = "md"
        elif size_px >= 14:
            size_tier = "sm"
        else:
            size_tier = "xs"

        # v3: Include weight to prevent collisions
        weight = typo.font_weight
        return f"font.{category}.{size_tier}.{weight}"

    def _parse_font_size(self, size: str) -> float:
        """Parse font size string to pixels."""
        if not size:
            return 16

        size = size.lower().strip()

        # Handle px
        if "px" in size:
            try:
                return float(size.replace("px", ""))
            except ValueError:
                return 16

        # Handle rem (assume 16px base)
        if "rem" in size:
            try:
                return float(size.replace("rem", "")) * 16
            except ValueError:
                return 16

        # Handle em (assume 16px base)
        if "em" in size:
            try:
                return float(size.replace("em", "")) * 16
            except ValueError:
                return 16

        # Try plain number
        try:
            return float(size)
        except ValueError:
            return 16

    # =========================================================================
    # SPACING NORMALIZATION
    # =========================================================================

    def _normalize_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]:
        """
        Normalize spacing tokens:
        - Merge similar values
        - Align to base-8 grid if close
        - Assign suggested names
        """
        if not spacing:
            return []

        # Deduplicate by value
        unique_spacing = {}
        for space in spacing:
            key = space.value
            if key in unique_spacing:
                existing = unique_spacing[key]
                existing.frequency += space.frequency
                existing.contexts = list(set(existing.contexts + space.contexts))
            else:
                unique_spacing[key] = space

        result = list(unique_spacing.values())

        # Merge very similar values
        result = self._merge_similar_spacing(result)

        # Assign names
        for space in result:
            space.suggested_name = self._generate_spacing_name(space)
            space.confidence = self._calculate_confidence(space.frequency)

        # Sort by value
        result.sort(key=lambda s: s.value_px)

        return result

    def _merge_similar_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]:
        """Merge spacing values that are very close."""
        if len(spacing) <= 1:
            return spacing

        # Sort by pixel value
        spacing.sort(key=lambda s: s.value_px)

        merged = []
        i = 0

        while i < len(spacing):
            current = spacing[i]
            group = [current]

            # Find adjacent similar values
            j = i + 1
            while j < len(spacing):
                if abs(spacing[j].value_px - current.value_px) <= self.spacing_merge_threshold:
                    group.append(spacing[j])
                    j += 1
                else:
                    break

            # Merge group - prefer base-8 aligned value or most frequent
            group.sort(key=lambda s: (-s.fits_base_8, -s.frequency))
            primary = group[0]

            for other in group[1:]:
                primary.frequency += other.frequency
                primary.contexts = list(set(primary.contexts + other.contexts))

            merged.append(primary)
            i = j

        return merged

    def _generate_spacing_name(self, space: SpacingToken) -> str:
        """Generate a semantic name for spacing."""
        px = space.value_px

        # Map to t-shirt sizes based on value
        if px <= 2:
            size = "px"
        elif px <= 4:
            size = "0.5"
        elif px <= 8:
            size = "1"
        elif px <= 12:
            size = "1.5"
        elif px <= 16:
            size = "2"
        elif px <= 20:
            size = "2.5"
        elif px <= 24:
            size = "3"
        elif px <= 32:
            size = "4"
        elif px <= 40:
            size = "5"
        elif px <= 48:
            size = "6"
        elif px <= 64:
            size = "8"
        elif px <= 80:
            size = "10"
        elif px <= 96:
            size = "12"
        else:
            size = str(int(px / 4))

        return f"space.{size}"

    # =========================================================================
    # RADIUS NORMALIZATION (NEW in v3)
    # =========================================================================

    def _normalize_radius(self, radius_tokens: list[RadiusToken]) -> list[RadiusToken]:
        """
        Normalize border radius tokens.

        v3: Full processing instead of just storing raw values.
        - Parse multi-value shorthand (take max single value)
        - Convert percentage values (50% -> 9999px for "full")
        - Convert rem/em to px
        - Deduplicate by resolved px value
        - Sort by size
        - Assign semantic names (none, sm, md, lg, xl, 2xl, full)
        """
        if not radius_tokens:
            return []

        # Step 1: Parse each radius to a single px value
        parsed_radii = []
        for token in radius_tokens:
            px_value = self._parse_radius_value(token.value)
            if px_value is not None:
                token.value_px = int(px_value)
                token.value = f"{int(px_value)}px"
                # Set grid alignment flags
                token.fits_base_4 = (px_value % 4 == 0) if px_value > 0 else True
                token.fits_base_8 = (px_value % 8 == 0) if px_value > 0 else True
                parsed_radii.append(token)

        # Step 2: Deduplicate by px value
        unique_radii = {}
        for token in parsed_radii:
            key = token.value_px
            if key in unique_radii:
                existing = unique_radii[key]
                existing.frequency += token.frequency
                existing.elements = list(set(existing.elements + token.elements))
            else:
                unique_radii[key] = token

        result = list(unique_radii.values())

        # Step 3: Sort by px value
        result.sort(key=lambda r: r.value_px or 0)

        # Step 4: Assign semantic names
        for token in result:
            token.suggested_name = self._generate_radius_name(token)
            token.confidence = self._calculate_confidence(token.frequency)

        return result

    def _parse_radius_value(self, value: str) -> Optional[int]:
        """
        Parse a CSS border-radius value to a single integer px value.

        Handles:
        - Single values: "8px", "0.5rem", "1em"
        - Multi-value shorthand: "0px 0px 16px 16px" -> take max (16)
        - Percentage: "50%" -> 9999 (treated as "full")
        - "none" / "0" -> 0
        """
        if not value:
            return None

        value = value.strip().lower()

        # Handle "none"
        if value == "none" or value == "0":
            return 0

        # Handle percentage — 50% means fully round, map to 9999
        if "%" in value:
            try:
                pct = float(value.replace("%", "").strip())
                if pct >= 50:
                    return 9999
                # For lower percentages, approximate (not exact, but reasonable)
                # Most radius percentages in practice are 50% for circles
                return int(pct)
            except ValueError:
                return None

        # Handle multi-value shorthand: "0px 0px 16px 16px"
        # Split by spaces and take the max value
        parts = value.split()
        if len(parts) > 1:
            max_px = 0
            for part in parts:
                px = self._parse_single_length(part)
                if px is not None and px > max_px:
                    max_px = px
            return int(max_px) if max_px > 0 else 0

        # Single value
        px = self._parse_single_length(value)
        return int(round(px)) if px is not None else None

    def _parse_single_length(self, value: str) -> Optional[float]:
        """Parse a single CSS length value to px."""
        value = value.strip().lower()

        if "px" in value:
            try:
                return float(value.replace("px", ""))
            except ValueError:
                return None

        if "rem" in value:
            try:
                return float(value.replace("rem", "")) * 16
            except ValueError:
                return None

        if "em" in value:
            try:
                return float(value.replace("em", "")) * 16
            except ValueError:
                return None

        # Try plain number (treat as px)
        try:
            return float(value)
        except ValueError:
            return None

    def _generate_radius_name(self, token: RadiusToken) -> str:
        """
        Generate a semantic name for a border radius token.

        Maps px values to semantic tiers:
        - 0 -> radius.none
        - 1-3 -> radius.sm
        - 4-7 -> radius.md
        - 8-15 -> radius.lg
        - 16-23 -> radius.xl
        - 24-9998 -> radius.2xl
        - 9999 -> radius.full
        """
        px = token.value_px or 0

        if px == 0:
            return "radius.none"
        elif px >= 9999:
            return "radius.full"
        elif px <= 3:
            return "radius.sm"
        elif px <= 7:
            return "radius.md"
        elif px <= 15:
            return "radius.lg"
        elif px <= 23:
            return "radius.xl"
        else:
            return "radius.2xl"

    # =========================================================================
    # SHADOW NORMALIZATION (NEW in v3)
    # =========================================================================

    def _normalize_shadows(self, shadow_tokens: list[ShadowToken]) -> list[ShadowToken]:
        """
        Normalize box shadow tokens.

        v3: Full processing instead of hash-based keys.
        - Parse shadow CSS into components (if not already parsed)
        - Compute blur_px and y_offset_px for sorting
        - Filter out spread-only shadows (border simulations)
        - Separate inset shadows into their own category
        - Sort by blur radius (elevation)
        - Deduplicate visually similar shadows
        - Assign semantic names (xs, sm, md, lg, xl)
        """
        if not shadow_tokens:
            return []

        # Step 1: Parse and compute numeric values
        parsed_shadows = []
        for token in shadow_tokens:
            self._ensure_shadow_parsed(token)

            # Skip spread-only shadows (border simulations)
            if (token.blur_px is None or token.blur_px == 0) and token.spread and token.spread != "0px":
                continue

            # Skip inset shadows (different semantic — handle separately if needed)
            if token.inset:
                continue

            # Skip shadows with no meaningful blur
            if token.blur_px is not None and token.blur_px <= 0:
                continue

            parsed_shadows.append(token)

        if not parsed_shadows:
            return []

        # Step 2: Deduplicate by visual similarity (same blur + y-offset range)
        unique_shadows = []
        seen_blur_values = set()
        for token in parsed_shadows:
            blur = token.blur_px or 0
            # Round to nearest 2px for dedup
            blur_bucket = round(blur / 2) * 2
            if blur_bucket not in seen_blur_values:
                seen_blur_values.add(blur_bucket)
                unique_shadows.append(token)
            else:
                # Merge frequency with existing
                for existing in unique_shadows:
                    existing_blur = round((existing.blur_px or 0) / 2) * 2
                    if existing_blur == blur_bucket:
                        existing.frequency += token.frequency
                        existing.elements = list(set(existing.elements + token.elements))
                        break

        # Step 3: Sort by blur radius (ascending = increasing elevation)
        unique_shadows.sort(key=lambda s: s.blur_px or 0)

        # Step 4: Assign semantic names based on sort order
        for i, token in enumerate(unique_shadows):
            if i < len(self.shadow_tier_names):
                tier_name = self.shadow_tier_names[i]
            else:
                tier_name = f"{i + 1}xl"
            token.suggested_name = f"shadow.{tier_name}"
            token.confidence = self._calculate_confidence(token.frequency)

        return unique_shadows

    def _ensure_shadow_parsed(self, token: ShadowToken):
        """
        Ensure shadow token has parsed components and computed px values.

        If offset_x/offset_y/blur/spread/color are None, attempt to parse
        from the raw CSS value string.
        """
        # Compute blur_px from blur string
        if token.blur is not None and token.blur_px is None:
            px = self._parse_single_length(token.blur)
            token.blur_px = px if px is not None else 0

        # Compute y_offset_px from offset_y string
        if token.offset_y is not None and token.y_offset_px is None:
            px = self._parse_single_length(token.offset_y)
            token.y_offset_px = px if px is not None else 0

        # If components are all None, try to parse from CSS value
        if token.blur is None and token.offset_x is None:
            self._parse_shadow_css(token)

    def _parse_shadow_css(self, token: ShadowToken):
        """
        Parse a CSS box-shadow value into components.

        Format: [inset] <offset-x> <offset-y> [blur] [spread] <color>
        Example: "0px 4px 8px 0px rgba(0,0,0,0.1)"
        """
        value = token.value.strip()

        # Check for inset
        if value.startswith("inset"):
            token.inset = True
            value = value[5:].strip()

        # Extract color (rgba/rgb/hex at the end or beginning)
        color_match = re.search(
            r'(rgba?\s*\([^)]+\)|#[0-9a-fA-F]{3,8})\s*$',
            value
        )
        if color_match:
            token.color = color_match.group(1).strip()
            value = value[:color_match.start()].strip()
        else:
            # Try color at the beginning
            color_match = re.search(
                r'^(rgba?\s*\([^)]+\)|#[0-9a-fA-F]{3,8})\s+',
                value
            )
            if color_match:
                token.color = color_match.group(1).strip()
                value = value[color_match.end():].strip()

        # Parse remaining length values
        length_pattern = r'(-?\d+(?:\.\d+)?(?:px|rem|em|%)?)'
        lengths = re.findall(length_pattern, value)

        if len(lengths) >= 2:
            token.offset_x = lengths[0]
            token.offset_y = lengths[1]
            px = self._parse_single_length(lengths[1])
            token.y_offset_px = px if px is not None else 0

        if len(lengths) >= 3:
            token.blur = lengths[2]
            px = self._parse_single_length(lengths[2])
            token.blur_px = px if px is not None else 0

        if len(lengths) >= 4:
            token.spread = lengths[3]

        # Default blur_px to 0 if still None
        if token.blur_px is None:
            token.blur_px = 0
        if token.y_offset_px is None:
            token.y_offset_px = 0

    # =========================================================================
    # SHARED UTILITIES
    # =========================================================================

    def _calculate_confidence(self, frequency: int) -> Confidence:
        """Calculate confidence based on frequency."""
        if frequency >= 10:
            return Confidence.HIGH
        elif frequency >= 3:
            return Confidence.MEDIUM
        else:
            return Confidence.LOW


def normalize_tokens(extracted: ExtractedTokens) -> NormalizedTokens:
    """Convenience function to normalize tokens."""
    normalizer = TokenNormalizer()
    return normalizer.normalize(extracted)