Spaces:
Runtime error
Runtime error
| """ | |
| Agent 2: Token Normalizer & Structurer | |
| Design System Automation v3 | |
| Persona: Design System Librarian | |
| Responsibilities: | |
| - Clean noisy extraction data | |
| - Deduplicate similar tokens (colors within threshold, similar spacing) | |
| - Assign ALL color names using NUMERIC shades only (50-900) | |
| - Add role_hints based on CSS property/element context (absorbed from semantic_analyzer) | |
| - Normalize radius values (parse, deduplicate, sort, name) | |
| - Normalize shadow values (parse, sort by blur, name) | |
| - Fix typography naming collisions (add weight suffix) | |
| - Tag tokens as: detected | inferred | low-confidence | |
| """ | |
| import re | |
| from typing import Optional | |
| from collections import defaultdict | |
| from core.token_schema import ( | |
| ColorToken, | |
| TypographyToken, | |
| SpacingToken, | |
| RadiusToken, | |
| ShadowToken, | |
| ExtractedTokens, | |
| NormalizedTokens, | |
| Confidence, | |
| TokenSource, | |
| ) | |
| from core.color_utils import ( | |
| parse_color, | |
| normalize_hex, | |
| categorize_color, | |
| ) | |
| class TokenNormalizer: | |
| """ | |
| Normalizes and structures extracted tokens. | |
| This is Agent 2's job — taking raw extraction data and | |
| organizing it into a clean, deduplicated structure. | |
| v3 changes: | |
| - Color naming: ALWAYS numeric shades (50-900), NEVER words (light/dark/base) | |
| - Role hints: CSS-property-based metadata for AURORA to consume | |
| - Radius: Full normalization (parse, deduplicate, sort, name) | |
| - Shadows: Full normalization (parse, sort by blur, deduplicate, name) | |
| - Typography: Collision-proof naming with weight suffix | |
| """ | |
| def __init__(self): | |
| # Thresholds for duplicate detection | |
| self.color_similarity_threshold = 10 # Delta in RGB space | |
| self.spacing_merge_threshold = 2 # px difference to merge | |
| # Radius semantic tiers (px -> name) | |
| self.radius_tiers = [ | |
| (0, "none"), | |
| (2, "sm"), | |
| (4, "md"), | |
| (8, "lg"), | |
| (16, "xl"), | |
| (24, "2xl"), | |
| (9999, "full"), | |
| ] | |
| # Shadow elevation tiers (by count) | |
| self.shadow_tier_names = ["xs", "sm", "md", "lg", "xl", "2xl"] | |
| def normalize(self, extracted: ExtractedTokens) -> NormalizedTokens: | |
| """ | |
| Normalize extracted tokens. | |
| Args: | |
| extracted: Raw extraction results from Agent 1 | |
| Returns: | |
| NormalizedTokens with cleaned, deduplicated data | |
| """ | |
| # Process each token type (returns lists) | |
| colors_list = self._normalize_colors(extracted.colors) | |
| typography_list = self._normalize_typography(extracted.typography) | |
| spacing_list = self._normalize_spacing(extracted.spacing) | |
| radius_list = self._normalize_radius(extracted.radius) | |
| shadows_list = self._normalize_shadows(extracted.shadows) | |
| # Convert to dicts keyed by suggested_name | |
| colors_dict = {} | |
| for c in colors_list: | |
| key = c.suggested_name or c.value | |
| # Handle duplicate names by appending a suffix | |
| if key in colors_dict: | |
| suffix = 2 | |
| while f"{key}_{suffix}" in colors_dict: | |
| suffix += 1 | |
| key = f"{key}_{suffix}" | |
| colors_dict[key] = c | |
| typography_dict = {} | |
| for t in typography_list: | |
| key = t.suggested_name or f"{t.font_family}-{t.font_size}" | |
| if key in typography_dict: | |
| suffix = 2 | |
| while f"{key}_{suffix}" in typography_dict: | |
| suffix += 1 | |
| key = f"{key}_{suffix}" | |
| typography_dict[key] = t | |
| spacing_dict = {} | |
| for s in spacing_list: | |
| key = s.suggested_name or s.value | |
| if key in spacing_dict: | |
| suffix = 2 | |
| while f"{key}_{suffix}" in spacing_dict: | |
| suffix += 1 | |
| key = f"{key}_{suffix}" | |
| spacing_dict[key] = s | |
| # Radius and shadows are already properly named | |
| radius_dict = {} | |
| for r in radius_list: | |
| key = r.suggested_name or f"radius-{r.value}" | |
| if key in radius_dict: | |
| suffix = 2 | |
| while f"{key}_{suffix}" in radius_dict: | |
| suffix += 1 | |
| key = f"{key}_{suffix}" | |
| radius_dict[key] = r | |
| shadows_dict = {} | |
| for s in shadows_list: | |
| key = s.suggested_name or f"shadow-{hash(s.value) % 1000}" | |
| if key in shadows_dict: | |
| suffix = 2 | |
| while f"{key}_{suffix}" in shadows_dict: | |
| suffix += 1 | |
| key = f"{key}_{suffix}" | |
| shadows_dict[key] = s | |
| # Create normalized result | |
| normalized = NormalizedTokens( | |
| viewport=extracted.viewport, | |
| source_url=extracted.source_url, | |
| colors=colors_dict, | |
| typography=typography_dict, | |
| spacing=spacing_dict, | |
| radius=radius_dict, | |
| shadows=shadows_dict, | |
| font_families=extracted.font_families, | |
| detected_spacing_base=extracted.spacing_base, | |
| detected_naming_convention=extracted.naming_convention, | |
| ) | |
| return normalized | |
| # ========================================================================= | |
| # COLOR NORMALIZATION | |
| # ========================================================================= | |
| def _normalize_colors(self, colors: list[ColorToken]) -> list[ColorToken]: | |
| """ | |
| Normalize color tokens: | |
| - Deduplicate similar colors | |
| - Assign role_hints based on CSS context (absorbed from semantic_analyzer) | |
| - Assign suggested names using hue + NUMERIC shade (50-900) | |
| - Calculate confidence | |
| v3: Removed _infer_color_role() and _generate_color_name_from_value(). | |
| ALL colors now get numeric shades via _generate_preliminary_name(). | |
| Role hints are set for AURORA to consume (not used in naming). | |
| """ | |
| if not colors: | |
| return [] | |
| # Step 1: Deduplicate by exact hex value | |
| unique_colors = {} | |
| for color in colors: | |
| hex_val = normalize_hex(color.value) | |
| if hex_val in unique_colors: | |
| # Merge frequency and contexts | |
| existing = unique_colors[hex_val] | |
| existing.frequency += color.frequency | |
| existing.contexts = list(set(existing.contexts + color.contexts)) | |
| existing.elements = list(set(existing.elements + color.elements)) | |
| existing.css_properties = list(set(existing.css_properties + color.css_properties)) | |
| else: | |
| color.value = hex_val | |
| unique_colors[hex_val] = color | |
| # Step 2: Merge visually similar colors | |
| merged_colors = self._merge_similar_colors(list(unique_colors.values())) | |
| # Step 3: Assign role_hints and preliminary names (ALL numeric) | |
| for color in merged_colors: | |
| # Set role_hint based on CSS property/element context | |
| color.role_hint = self._infer_role_hint(color) | |
| # Generate name: ALWAYS hue + numeric shade (50-900) | |
| color.suggested_name = self._generate_preliminary_name(color) | |
| # Update confidence based on frequency | |
| color.confidence = self._calculate_confidence(color.frequency) | |
| # Sort by frequency (most used first) | |
| merged_colors.sort(key=lambda c: -c.frequency) | |
| return merged_colors | |
| def _merge_similar_colors(self, colors: list[ColorToken]) -> list[ColorToken]: | |
| """Merge colors that are visually very similar.""" | |
| if len(colors) <= 1: | |
| return colors | |
| merged = [] | |
| used = set() | |
| for i, color1 in enumerate(colors): | |
| if i in used: | |
| continue | |
| # Find similar colors | |
| similar_group = [color1] | |
| for j, color2 in enumerate(colors[i+1:], i+1): | |
| if j in used: | |
| continue | |
| if self._colors_are_similar(color1.value, color2.value): | |
| similar_group.append(color2) | |
| used.add(j) | |
| # Merge the group - keep the most frequent | |
| similar_group.sort(key=lambda c: -c.frequency) | |
| primary = similar_group[0] | |
| # Aggregate data from similar colors | |
| for other in similar_group[1:]: | |
| primary.frequency += other.frequency | |
| primary.contexts = list(set(primary.contexts + other.contexts)) | |
| primary.elements = list(set(primary.elements + other.elements)) | |
| primary.css_properties = list(set(primary.css_properties + other.css_properties)) | |
| merged.append(primary) | |
| used.add(i) | |
| return merged | |
| def _colors_are_similar(self, hex1: str, hex2: str) -> bool: | |
| """Check if two colors are visually similar.""" | |
| try: | |
| parsed1 = parse_color(hex1) | |
| parsed2 = parse_color(hex2) | |
| if parsed1 is None or parsed2 is None: | |
| return False | |
| if parsed1.rgb is None or parsed2.rgb is None: | |
| return False | |
| rgb1 = parsed1.rgb | |
| rgb2 = parsed2.rgb | |
| # Calculate Euclidean distance in RGB space | |
| distance = sum((a - b) ** 2 for a, b in zip(rgb1, rgb2)) ** 0.5 | |
| return distance < self.color_similarity_threshold | |
| except Exception: | |
| return False | |
| def _infer_role_hint(self, color: ColorToken) -> Optional[str]: | |
| """ | |
| Infer a role_hint for AURORA based on CSS property and element context. | |
| This replaces the old _infer_color_role() (which was used for naming) | |
| and absorbs the useful heuristics from semantic_analyzer.py. | |
| Role hints are metadata for AURORA — they do NOT affect the color name. | |
| """ | |
| css_props = [p.lower() for p in color.css_properties] | |
| elements = [e.lower() for e in color.elements] | |
| contexts = [c.lower() for c in color.contexts] | |
| all_context = " ".join(css_props + elements + contexts) | |
| # Calculate color properties for additional heuristics | |
| parsed = parse_color(color.value) | |
| if parsed and parsed.rgb: | |
| r, g, b = parsed.rgb | |
| luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255 | |
| max_c = max(r, g, b) | |
| min_c = min(r, g, b) | |
| saturation = (max_c - min_c) / 255 if max_c > 0 else 0 | |
| else: | |
| luminance = 0.5 | |
| saturation = 0 | |
| # --- BRAND/INTERACTIVE candidate --- | |
| interactive_elements = ["button", "a", "input", "select", "submit", "btn", "cta", "link"] | |
| is_interactive = any(el in all_context for el in interactive_elements) | |
| has_bg_prop = any("background" in p for p in css_props) | |
| # Interactive elements with background-color + saturated color | |
| if saturation > 0.25 and is_interactive and has_bg_prop: | |
| return "brand_candidate" | |
| # Highly saturated + high frequency | |
| if saturation > 0.35 and color.frequency > 15: | |
| return "brand_candidate" | |
| # --- TEXT candidate --- | |
| has_color_prop = any( | |
| p == "color" or (p.endswith("-color") and "background" not in p and "border" not in p) | |
| for p in css_props | |
| ) | |
| text_elements = ["p", "span", "h1", "h2", "h3", "h4", "h5", "h6", "label", "text"] | |
| is_text_element = any(el in all_context for el in text_elements) | |
| if saturation < 0.15 and (has_color_prop or is_text_element): | |
| return "text_candidate" | |
| if saturation < 0.1 and luminance < 0.5 and color.frequency > 30: | |
| return "text_candidate" | |
| # --- BACKGROUND candidate --- | |
| container_elements = ["div", "section", "main", "body", "article", "header", "footer", "card"] | |
| is_container = any(el in all_context for el in container_elements) | |
| if has_bg_prop and is_container and saturation < 0.15: | |
| return "bg_candidate" | |
| if luminance > 0.9 and saturation < 0.1: | |
| return "bg_candidate" | |
| # --- BORDER candidate --- | |
| has_border_prop = any("border" in p for p in css_props) | |
| if has_border_prop or "border" in all_context: | |
| return "border_candidate" | |
| # --- FEEDBACK candidate --- | |
| # Check for error/success/warning keywords in context | |
| feedback_keywords = { | |
| "error": ["error", "danger", "invalid", "negative"], | |
| "success": ["success", "valid", "positive"], | |
| "warning": ["warning", "caution", "alert"], | |
| "info": ["info", "notice"], | |
| } | |
| for fb_type, keywords in feedback_keywords.items(): | |
| if any(kw in all_context for kw in keywords): | |
| return "feedback_candidate" | |
| # --- Generic palette color (saturated but no clear role) --- | |
| if saturation > 0.2: | |
| return "palette" | |
| return None | |
| def _generate_preliminary_name(self, color: ColorToken) -> str: | |
| """ | |
| Generate a preliminary name using hue family + numeric shade. | |
| This is the SINGLE naming path for ALL colors. | |
| Convention: color.{hue_family}.{shade} | |
| Shade is ALWAYS numeric (50-900) based on HSL lightness. | |
| NEVER uses words like light/dark/base. | |
| AURORA may later override these with semantic names (color.brand.primary), | |
| but the normalizer's job is just hue + shade. | |
| """ | |
| category = categorize_color(color.value) | |
| parsed = parse_color(color.value) | |
| if parsed and parsed.hsl: | |
| h, s, l = parsed.hsl | |
| # Map lightness to shade number (50-900) | |
| # Uses HSL lightness which is more perceptually accurate than | |
| # the old luminance-based approach | |
| if l >= 95: | |
| shade = "50" | |
| elif l >= 85: | |
| shade = "100" | |
| elif l >= 75: | |
| shade = "200" | |
| elif l >= 65: | |
| shade = "300" | |
| elif l >= 55: | |
| shade = "400" | |
| elif l >= 45: | |
| shade = "500" | |
| elif l >= 35: | |
| shade = "600" | |
| elif l >= 25: | |
| shade = "700" | |
| elif l >= 15: | |
| shade = "800" | |
| else: | |
| shade = "900" | |
| else: | |
| shade = "500" | |
| return f"color.{category}.{shade}" | |
| # ========================================================================= | |
| # TYPOGRAPHY NORMALIZATION | |
| # ========================================================================= | |
| def _normalize_typography(self, typography: list[TypographyToken]) -> list[TypographyToken]: | |
| """ | |
| Normalize typography tokens: | |
| - Deduplicate identical styles | |
| - Infer type scale categories | |
| - Assign suggested names with weight suffix to prevent collisions | |
| """ | |
| if not typography: | |
| return [] | |
| # Deduplicate by unique style combination | |
| unique_typo = {} | |
| for typo in typography: | |
| key = f"{typo.font_family}|{typo.font_size}|{typo.font_weight}|{typo.line_height}" | |
| if key in unique_typo: | |
| existing = unique_typo[key] | |
| existing.frequency += typo.frequency | |
| existing.elements = list(set(existing.elements + typo.elements)) | |
| else: | |
| unique_typo[key] = typo | |
| result = list(unique_typo.values()) | |
| # Infer names based on size, elements, AND weight (v3: collision fix) | |
| for typo in result: | |
| typo.suggested_name = self._generate_typography_name(typo) | |
| typo.confidence = self._calculate_confidence(typo.frequency) | |
| # Sort by font size (largest first) | |
| result.sort(key=lambda t: -self._parse_font_size(t.font_size)) | |
| return result | |
| def _generate_typography_name(self, typo: TypographyToken) -> str: | |
| """ | |
| Generate a semantic name for typography. | |
| v3: Includes font weight in name to prevent collisions. | |
| Two styles at 24px with weight 700 and 400 now produce | |
| font.heading.lg.700 and font.heading.lg.400 instead of both being font.heading.lg. | |
| """ | |
| size_px = self._parse_font_size(typo.font_size) | |
| elements = " ".join(typo.elements).lower() | |
| # Determine category from elements | |
| if any(h in elements for h in ["h1", "hero", "display"]): | |
| category = "display" | |
| elif any(h in elements for h in ["h2", "h3", "h4", "h5", "h6", "heading", "title"]): | |
| category = "heading" | |
| elif any(h in elements for h in ["label", "caption", "small", "meta"]): | |
| category = "label" | |
| elif any(h in elements for h in ["body", "p", "paragraph", "text"]): | |
| category = "body" | |
| else: | |
| category = "text" | |
| # Determine size tier | |
| if size_px >= 32: | |
| size_tier = "xl" | |
| elif size_px >= 24: | |
| size_tier = "lg" | |
| elif size_px >= 18: | |
| size_tier = "md" | |
| elif size_px >= 14: | |
| size_tier = "sm" | |
| else: | |
| size_tier = "xs" | |
| # v3: Include weight to prevent collisions | |
| weight = typo.font_weight | |
| return f"font.{category}.{size_tier}.{weight}" | |
| def _parse_font_size(self, size: str) -> float: | |
| """Parse font size string to pixels.""" | |
| if not size: | |
| return 16 | |
| size = size.lower().strip() | |
| # Handle px | |
| if "px" in size: | |
| try: | |
| return float(size.replace("px", "")) | |
| except ValueError: | |
| return 16 | |
| # Handle rem (assume 16px base) | |
| if "rem" in size: | |
| try: | |
| return float(size.replace("rem", "")) * 16 | |
| except ValueError: | |
| return 16 | |
| # Handle em (assume 16px base) | |
| if "em" in size: | |
| try: | |
| return float(size.replace("em", "")) * 16 | |
| except ValueError: | |
| return 16 | |
| # Try plain number | |
| try: | |
| return float(size) | |
| except ValueError: | |
| return 16 | |
| # ========================================================================= | |
| # SPACING NORMALIZATION | |
| # ========================================================================= | |
| def _normalize_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]: | |
| """ | |
| Normalize spacing tokens: | |
| - Merge similar values | |
| - Align to base-8 grid if close | |
| - Assign suggested names | |
| """ | |
| if not spacing: | |
| return [] | |
| # Deduplicate by value | |
| unique_spacing = {} | |
| for space in spacing: | |
| key = space.value | |
| if key in unique_spacing: | |
| existing = unique_spacing[key] | |
| existing.frequency += space.frequency | |
| existing.contexts = list(set(existing.contexts + space.contexts)) | |
| else: | |
| unique_spacing[key] = space | |
| result = list(unique_spacing.values()) | |
| # Merge very similar values | |
| result = self._merge_similar_spacing(result) | |
| # Assign names | |
| for space in result: | |
| space.suggested_name = self._generate_spacing_name(space) | |
| space.confidence = self._calculate_confidence(space.frequency) | |
| # Sort by value | |
| result.sort(key=lambda s: s.value_px) | |
| return result | |
| def _merge_similar_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]: | |
| """Merge spacing values that are very close.""" | |
| if len(spacing) <= 1: | |
| return spacing | |
| # Sort by pixel value | |
| spacing.sort(key=lambda s: s.value_px) | |
| merged = [] | |
| i = 0 | |
| while i < len(spacing): | |
| current = spacing[i] | |
| group = [current] | |
| # Find adjacent similar values | |
| j = i + 1 | |
| while j < len(spacing): | |
| if abs(spacing[j].value_px - current.value_px) <= self.spacing_merge_threshold: | |
| group.append(spacing[j]) | |
| j += 1 | |
| else: | |
| break | |
| # Merge group - prefer base-8 aligned value or most frequent | |
| group.sort(key=lambda s: (-s.fits_base_8, -s.frequency)) | |
| primary = group[0] | |
| for other in group[1:]: | |
| primary.frequency += other.frequency | |
| primary.contexts = list(set(primary.contexts + other.contexts)) | |
| merged.append(primary) | |
| i = j | |
| return merged | |
| def _generate_spacing_name(self, space: SpacingToken) -> str: | |
| """Generate a semantic name for spacing.""" | |
| px = space.value_px | |
| # Map to t-shirt sizes based on value | |
| if px <= 2: | |
| size = "px" | |
| elif px <= 4: | |
| size = "0.5" | |
| elif px <= 8: | |
| size = "1" | |
| elif px <= 12: | |
| size = "1.5" | |
| elif px <= 16: | |
| size = "2" | |
| elif px <= 20: | |
| size = "2.5" | |
| elif px <= 24: | |
| size = "3" | |
| elif px <= 32: | |
| size = "4" | |
| elif px <= 40: | |
| size = "5" | |
| elif px <= 48: | |
| size = "6" | |
| elif px <= 64: | |
| size = "8" | |
| elif px <= 80: | |
| size = "10" | |
| elif px <= 96: | |
| size = "12" | |
| else: | |
| size = str(int(px / 4)) | |
| return f"space.{size}" | |
| # ========================================================================= | |
| # RADIUS NORMALIZATION (NEW in v3) | |
| # ========================================================================= | |
| def _normalize_radius(self, radius_tokens: list[RadiusToken]) -> list[RadiusToken]: | |
| """ | |
| Normalize border radius tokens. | |
| v3: Full processing instead of just storing raw values. | |
| - Parse multi-value shorthand (take max single value) | |
| - Convert percentage values (50% -> 9999px for "full") | |
| - Convert rem/em to px | |
| - Deduplicate by resolved px value | |
| - Sort by size | |
| - Assign semantic names (none, sm, md, lg, xl, 2xl, full) | |
| """ | |
| if not radius_tokens: | |
| return [] | |
| # Step 1: Parse each radius to a single px value | |
| parsed_radii = [] | |
| for token in radius_tokens: | |
| px_value = self._parse_radius_value(token.value) | |
| if px_value is not None: | |
| token.value_px = int(px_value) | |
| token.value = f"{int(px_value)}px" | |
| # Set grid alignment flags | |
| token.fits_base_4 = (px_value % 4 == 0) if px_value > 0 else True | |
| token.fits_base_8 = (px_value % 8 == 0) if px_value > 0 else True | |
| parsed_radii.append(token) | |
| # Step 2: Deduplicate by px value | |
| unique_radii = {} | |
| for token in parsed_radii: | |
| key = token.value_px | |
| if key in unique_radii: | |
| existing = unique_radii[key] | |
| existing.frequency += token.frequency | |
| existing.elements = list(set(existing.elements + token.elements)) | |
| else: | |
| unique_radii[key] = token | |
| result = list(unique_radii.values()) | |
| # Step 3: Sort by px value | |
| result.sort(key=lambda r: r.value_px or 0) | |
| # Step 4: Assign semantic names | |
| for token in result: | |
| token.suggested_name = self._generate_radius_name(token) | |
| token.confidence = self._calculate_confidence(token.frequency) | |
| return result | |
| def _parse_radius_value(self, value: str) -> Optional[int]: | |
| """ | |
| Parse a CSS border-radius value to a single integer px value. | |
| Handles: | |
| - Single values: "8px", "0.5rem", "1em" | |
| - Multi-value shorthand: "0px 0px 16px 16px" -> take max (16) | |
| - Percentage: "50%" -> 9999 (treated as "full") | |
| - "none" / "0" -> 0 | |
| """ | |
| if not value: | |
| return None | |
| value = value.strip().lower() | |
| # Handle "none" | |
| if value == "none" or value == "0": | |
| return 0 | |
| # Handle percentage — 50% means fully round, map to 9999 | |
| if "%" in value: | |
| try: | |
| pct = float(value.replace("%", "").strip()) | |
| if pct >= 50: | |
| return 9999 | |
| # For lower percentages, approximate (not exact, but reasonable) | |
| # Most radius percentages in practice are 50% for circles | |
| return int(pct) | |
| except ValueError: | |
| return None | |
| # Handle multi-value shorthand: "0px 0px 16px 16px" | |
| # Split by spaces and take the max value | |
| parts = value.split() | |
| if len(parts) > 1: | |
| max_px = 0 | |
| for part in parts: | |
| px = self._parse_single_length(part) | |
| if px is not None and px > max_px: | |
| max_px = px | |
| return int(max_px) if max_px > 0 else 0 | |
| # Single value | |
| px = self._parse_single_length(value) | |
| return int(round(px)) if px is not None else None | |
| def _parse_single_length(self, value: str) -> Optional[float]: | |
| """Parse a single CSS length value to px.""" | |
| value = value.strip().lower() | |
| if "px" in value: | |
| try: | |
| return float(value.replace("px", "")) | |
| except ValueError: | |
| return None | |
| if "rem" in value: | |
| try: | |
| return float(value.replace("rem", "")) * 16 | |
| except ValueError: | |
| return None | |
| if "em" in value: | |
| try: | |
| return float(value.replace("em", "")) * 16 | |
| except ValueError: | |
| return None | |
| # Try plain number (treat as px) | |
| try: | |
| return float(value) | |
| except ValueError: | |
| return None | |
| def _generate_radius_name(self, token: RadiusToken) -> str: | |
| """ | |
| Generate a semantic name for a border radius token. | |
| Maps px values to semantic tiers: | |
| - 0 -> radius.none | |
| - 1-3 -> radius.sm | |
| - 4-7 -> radius.md | |
| - 8-15 -> radius.lg | |
| - 16-23 -> radius.xl | |
| - 24-9998 -> radius.2xl | |
| - 9999 -> radius.full | |
| """ | |
| px = token.value_px or 0 | |
| if px == 0: | |
| return "radius.none" | |
| elif px >= 9999: | |
| return "radius.full" | |
| elif px <= 3: | |
| return "radius.sm" | |
| elif px <= 7: | |
| return "radius.md" | |
| elif px <= 15: | |
| return "radius.lg" | |
| elif px <= 23: | |
| return "radius.xl" | |
| else: | |
| return "radius.2xl" | |
| # ========================================================================= | |
| # SHADOW NORMALIZATION (NEW in v3) | |
| # ========================================================================= | |
| def _normalize_shadows(self, shadow_tokens: list[ShadowToken]) -> list[ShadowToken]: | |
| """ | |
| Normalize box shadow tokens. | |
| v3: Full processing instead of hash-based keys. | |
| - Parse shadow CSS into components (if not already parsed) | |
| - Compute blur_px and y_offset_px for sorting | |
| - Filter out spread-only shadows (border simulations) | |
| - Separate inset shadows into their own category | |
| - Sort by blur radius (elevation) | |
| - Deduplicate visually similar shadows | |
| - Assign semantic names (xs, sm, md, lg, xl) | |
| """ | |
| if not shadow_tokens: | |
| return [] | |
| # Step 1: Parse and compute numeric values | |
| parsed_shadows = [] | |
| for token in shadow_tokens: | |
| self._ensure_shadow_parsed(token) | |
| # Skip spread-only shadows (border simulations) | |
| if (token.blur_px is None or token.blur_px == 0) and token.spread and token.spread != "0px": | |
| continue | |
| # Skip inset shadows (different semantic — handle separately if needed) | |
| if token.inset: | |
| continue | |
| # Skip shadows with no meaningful blur | |
| if token.blur_px is not None and token.blur_px <= 0: | |
| continue | |
| parsed_shadows.append(token) | |
| if not parsed_shadows: | |
| return [] | |
| # Step 2: Deduplicate by visual similarity (same blur + y-offset range) | |
| unique_shadows = [] | |
| seen_blur_values = set() | |
| for token in parsed_shadows: | |
| blur = token.blur_px or 0 | |
| # Round to nearest 2px for dedup | |
| blur_bucket = round(blur / 2) * 2 | |
| if blur_bucket not in seen_blur_values: | |
| seen_blur_values.add(blur_bucket) | |
| unique_shadows.append(token) | |
| else: | |
| # Merge frequency with existing | |
| for existing in unique_shadows: | |
| existing_blur = round((existing.blur_px or 0) / 2) * 2 | |
| if existing_blur == blur_bucket: | |
| existing.frequency += token.frequency | |
| existing.elements = list(set(existing.elements + token.elements)) | |
| break | |
| # Step 3: Sort by blur radius (ascending = increasing elevation) | |
| unique_shadows.sort(key=lambda s: s.blur_px or 0) | |
| # Step 4: Assign semantic names based on sort order | |
| for i, token in enumerate(unique_shadows): | |
| if i < len(self.shadow_tier_names): | |
| tier_name = self.shadow_tier_names[i] | |
| else: | |
| tier_name = f"{i + 1}xl" | |
| token.suggested_name = f"shadow.{tier_name}" | |
| token.confidence = self._calculate_confidence(token.frequency) | |
| return unique_shadows | |
| def _ensure_shadow_parsed(self, token: ShadowToken): | |
| """ | |
| Ensure shadow token has parsed components and computed px values. | |
| If offset_x/offset_y/blur/spread/color are None, attempt to parse | |
| from the raw CSS value string. | |
| """ | |
| # Compute blur_px from blur string | |
| if token.blur is not None and token.blur_px is None: | |
| px = self._parse_single_length(token.blur) | |
| token.blur_px = px if px is not None else 0 | |
| # Compute y_offset_px from offset_y string | |
| if token.offset_y is not None and token.y_offset_px is None: | |
| px = self._parse_single_length(token.offset_y) | |
| token.y_offset_px = px if px is not None else 0 | |
| # If components are all None, try to parse from CSS value | |
| if token.blur is None and token.offset_x is None: | |
| self._parse_shadow_css(token) | |
| def _parse_shadow_css(self, token: ShadowToken): | |
| """ | |
| Parse a CSS box-shadow value into components. | |
| Format: [inset] <offset-x> <offset-y> [blur] [spread] <color> | |
| Example: "0px 4px 8px 0px rgba(0,0,0,0.1)" | |
| """ | |
| value = token.value.strip() | |
| # Check for inset | |
| if value.startswith("inset"): | |
| token.inset = True | |
| value = value[5:].strip() | |
| # Extract color (rgba/rgb/hex at the end or beginning) | |
| color_match = re.search( | |
| r'(rgba?\s*\([^)]+\)|#[0-9a-fA-F]{3,8})\s*$', | |
| value | |
| ) | |
| if color_match: | |
| token.color = color_match.group(1).strip() | |
| value = value[:color_match.start()].strip() | |
| else: | |
| # Try color at the beginning | |
| color_match = re.search( | |
| r'^(rgba?\s*\([^)]+\)|#[0-9a-fA-F]{3,8})\s+', | |
| value | |
| ) | |
| if color_match: | |
| token.color = color_match.group(1).strip() | |
| value = value[color_match.end():].strip() | |
| # Parse remaining length values | |
| length_pattern = r'(-?\d+(?:\.\d+)?(?:px|rem|em|%)?)' | |
| lengths = re.findall(length_pattern, value) | |
| if len(lengths) >= 2: | |
| token.offset_x = lengths[0] | |
| token.offset_y = lengths[1] | |
| px = self._parse_single_length(lengths[1]) | |
| token.y_offset_px = px if px is not None else 0 | |
| if len(lengths) >= 3: | |
| token.blur = lengths[2] | |
| px = self._parse_single_length(lengths[2]) | |
| token.blur_px = px if px is not None else 0 | |
| if len(lengths) >= 4: | |
| token.spread = lengths[3] | |
| # Default blur_px to 0 if still None | |
| if token.blur_px is None: | |
| token.blur_px = 0 | |
| if token.y_offset_px is None: | |
| token.y_offset_px = 0 | |
| # ========================================================================= | |
| # SHARED UTILITIES | |
| # ========================================================================= | |
| def _calculate_confidence(self, frequency: int) -> Confidence: | |
| """Calculate confidence based on frequency.""" | |
| if frequency >= 10: | |
| return Confidence.HIGH | |
| elif frequency >= 3: | |
| return Confidence.MEDIUM | |
| else: | |
| return Confidence.LOW | |
| def normalize_tokens(extracted: ExtractedTokens) -> NormalizedTokens: | |
| """Convenience function to normalize tokens.""" | |
| normalizer = TokenNormalizer() | |
| return normalizer.normalize(extracted) | |