Spaces:

riazmo
/

Design-System-Extractor-2

Running

App Files Files Community

riazmo commited on 10 days ago

Commit

98da421

verified ·

1 Parent(s): 7ca38c3

Delete agents/normalizer.py

Browse files

Files changed (1) hide show

agents/normalizer.py +0 -469

agents/normalizer.py DELETED Viewed

@@ -1,469 +0,0 @@
-"""
-Agent 2: Token Normalizer & Structurer
-Design System Extractor v2
-Persona: Design System Librarian
-Responsibilities:
-- Clean noisy extraction data
-- Deduplicate similar tokens (colors within threshold, similar spacing)
-- Infer naming patterns from class names and contexts
-- Tag tokens as: detected | inferred | low-confidence
-- Group colors by role (primary, secondary, neutral, etc.)
-"""
-import re
-from typing import Optional
-from collections import defaultdict
-from core.token_schema import (
-    ColorToken,
-    TypographyToken,
-    SpacingToken,
-    ExtractedTokens,
-    NormalizedTokens,
-    Confidence,
-    TokenSource,
-)
-from core.color_utils import (
-    parse_color,
-    normalize_hex,
-    categorize_color,
-)
-class TokenNormalizer:
-    """
-    Normalizes and structures extracted tokens.
-    This is Agent 2's job — taking raw extraction data and
-    organizing it into a clean, deduplicated structure.
-    """
-    def __init__(self):
-        # Thresholds for duplicate detection
-        self.color_similarity_threshold = 10  # Delta in RGB space
-        self.spacing_merge_threshold = 2  # px difference to merge
-        # Naming patterns
-        self.color_role_keywords = {
-            "primary": ["primary", "brand", "main", "accent"],
-            "secondary": ["secondary", "alt", "alternate"],
-            "success": ["success", "green", "positive", "valid"],
-            "warning": ["warning", "yellow", "caution", "alert"],
-            "error": ["error", "red", "danger", "invalid", "negative"],
-            "info": ["info", "blue", "notice"],
-            "neutral": ["gray", "grey", "neutral", "muted", "subtle"],
-            "background": ["bg", "background", "surface"],
-            "text": ["text", "foreground", "content", "body"],
-            "border": ["border", "divider", "separator", "line"],
-        }
-    def normalize(self, extracted: ExtractedTokens) -> NormalizedTokens:
-        """
-        Normalize extracted tokens.
-        Args:
-            extracted: Raw extraction results from Agent 1
-        Returns:
-            NormalizedTokens with cleaned, deduplicated data
-        """
-        # Process each token type
-        colors = self._normalize_colors(extracted.colors)
-        typography = self._normalize_typography(extracted.typography)
-        spacing = self._normalize_spacing(extracted.spacing)
-        # Create normalized result
-        normalized = NormalizedTokens(
-            viewport=extracted.viewport,
-            colors=colors,
-            typography=typography,
-            spacing=spacing,
-            radius=extracted.radius,  # Pass through for now
-            shadows=extracted.shadows,  # Pass through for now
-            font_families=extracted.font_families,
-            pages_crawled=extracted.pages_crawled,
-            total_elements=extracted.total_elements,
-        )
-        return normalized
-    def _normalize_colors(self, colors: list[ColorToken]) -> list[ColorToken]:
-        """
-        Normalize color tokens:
-        - Deduplicate similar colors
-        - Infer color roles
-        - Assign suggested names
-        - Calculate confidence
-        """
-        if not colors:
-            return []
-        # Step 1: Deduplicate by exact hex value
-        unique_colors = {}
-        for color in colors:
-            hex_val = normalize_hex(color.value)
-            if hex_val in unique_colors:
-                # Merge frequency and contexts
-                existing = unique_colors[hex_val]
-                existing.frequency += color.frequency
-                existing.contexts = list(set(existing.contexts + color.contexts))
-                existing.elements = list(set(existing.elements + color.elements))
-                existing.css_properties = list(set(existing.css_properties + color.css_properties))
-            else:
-                color.value = hex_val
-                unique_colors[hex_val] = color
-        # Step 2: Merge visually similar colors
-        merged_colors = self._merge_similar_colors(list(unique_colors.values()))
-        # Step 3: Infer roles and names
-        for color in merged_colors:
-            role = self._infer_color_role(color)
-            if role:
-                color.suggested_name = self._generate_color_name(color, role)
-            else:
-                color.suggested_name = self._generate_color_name_from_value(color)
-            # Update confidence based on frequency
-            color.confidence = self._calculate_confidence(color.frequency)
-        # Sort by frequency (most used first)
-        merged_colors.sort(key=lambda c: -c.frequency)
-        return merged_colors
-    def _merge_similar_colors(self, colors: list[ColorToken]) -> list[ColorToken]:
-        """Merge colors that are visually very similar."""
-        if len(colors) <= 1:
-            return colors
-        merged = []
-        used = set()
-        for i, color1 in enumerate(colors):
-            if i in used:
-                continue
-            # Find similar colors
-            similar_group = [color1]
-            for j, color2 in enumerate(colors[i+1:], i+1):
-                if j in used:
-                    continue
-                if self._colors_are_similar(color1.value, color2.value):
-                    similar_group.append(color2)
-                    used.add(j)
-            # Merge the group - keep the most frequent
-            similar_group.sort(key=lambda c: -c.frequency)
-            primary = similar_group[0]
-            # Aggregate data from similar colors
-            for other in similar_group[1:]:
-                primary.frequency += other.frequency
-                primary.contexts = list(set(primary.contexts + other.contexts))
-                primary.elements = list(set(primary.elements + other.elements))
-            merged.append(primary)
-            used.add(i)
-        return merged
-    def _colors_are_similar(self, hex1: str, hex2: str) -> bool:
-        """Check if two colors are visually similar."""
-        try:
-            parsed1 = parse_color(hex1)
-            parsed2 = parse_color(hex2)
-            if parsed1 is None or parsed2 is None:
-                return False
-            if parsed1.rgb is None or parsed2.rgb is None:
-                return False
-            rgb1 = parsed1.rgb
-            rgb2 = parsed2.rgb
-            # Calculate Euclidean distance in RGB space
-            distance = sum((a - b) ** 2 for a, b in zip(rgb1, rgb2)) ** 0.5
-            return distance < self.color_similarity_threshold
-        except Exception:
-            return False
-    def _infer_color_role(self, color: ColorToken) -> Optional[str]:
-        """Infer the semantic role of a color from its contexts."""
-        all_context = " ".join(color.contexts + color.elements).lower()
-        for role, keywords in self.color_role_keywords.items():
-            for keyword in keywords:
-                if keyword in all_context:
-                    return role
-        # Try to infer from color category
-        category = categorize_color(color.value)
-        if category in ["gray", "white", "black"]:
-            return "neutral"
-        return None
-    def _generate_color_name(self, color: ColorToken, role: str) -> str:
-        """Generate a semantic name for a color."""
-        # Determine shade level based on luminance
-        parsed = parse_color(color.value)
-        if parsed and parsed.rgb:
-            rgb = parsed.rgb
-            luminance = (0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2]) / 255
-            if luminance > 0.8:
-                shade = "50"
-            elif luminance > 0.6:
-                shade = "200"
-            elif luminance > 0.4:
-                shade = "500"
-            elif luminance > 0.2:
-                shade = "700"
-            else:
-                shade = "900"
-        else:
-            shade = "500"
-        return f"color.{role}.{shade}"
-    def _generate_color_name_from_value(self, color: ColorToken) -> str:
-        """Generate a name based on the color value itself."""
-        category = categorize_color(color.value)
-        parsed = parse_color(color.value)
-        if parsed and parsed.rgb:
-            rgb = parsed.rgb
-            luminance = (0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2]) / 255
-            if luminance > 0.6:
-                shade = "light"
-            elif luminance > 0.3:
-                shade = "base"
-            else:
-                shade = "dark"
-        else:
-            shade = "base"
-        return f"color.{category}.{shade}"
-    def _normalize_typography(self, typography: list[TypographyToken]) -> list[TypographyToken]:
-        """
-        Normalize typography tokens:
-        - Deduplicate identical styles
-        - Infer type scale categories
-        - Assign suggested names
-        """
-        if not typography:
-            return []
-        # Deduplicate by unique style combination
-        unique_typo = {}
-        for typo in typography:
-            key = f"{typo.font_family}|{typo.font_size}|{typo.font_weight}|{typo.line_height}"
-            if key in unique_typo:
-                existing = unique_typo[key]
-                existing.frequency += typo.frequency
-                existing.elements = list(set(existing.elements + typo.elements))
-            else:
-                unique_typo[key] = typo
-        result = list(unique_typo.values())
-        # Infer names based on size and elements
-        for typo in result:
-            typo.suggested_name = self._generate_typography_name(typo)
-            typo.confidence = self._calculate_confidence(typo.frequency)
-        # Sort by font size (largest first)
-        result.sort(key=lambda t: -self._parse_font_size(t.font_size))
-        return result
-    def _generate_typography_name(self, typo: TypographyToken) -> str:
-        """Generate a semantic name for typography."""
-        size_px = self._parse_font_size(typo.font_size)
-        elements = " ".join(typo.elements).lower()
-        # Determine category from elements
-        if any(h in elements for h in ["h1", "hero", "display"]):
-            category = "display"
-        elif any(h in elements for h in ["h2", "h3", "h4", "h5", "h6", "heading", "title"]):
-            category = "heading"
-        elif any(h in elements for h in ["label", "caption", "small", "meta"]):
-            category = "label"
-        elif any(h in elements for h in ["body", "p", "paragraph", "text"]):
-            category = "body"
-        else:
-            category = "text"
-        # Determine size tier
-        if size_px >= 32:
-            size_tier = "xl"
-        elif size_px >= 24:
-            size_tier = "lg"
-        elif size_px >= 18:
-            size_tier = "md"
-        elif size_px >= 14:
-            size_tier = "sm"
-        else:
-            size_tier = "xs"
-        return f"font.{category}.{size_tier}"
-    def _parse_font_size(self, size: str) -> float:
-        """Parse font size string to pixels."""
-        if not size:
-            return 16
-        size = size.lower().strip()
-        # Handle px
-        if "px" in size:
-            try:
-                return float(size.replace("px", ""))
-            except ValueError:
-                return 16
-        # Handle rem (assume 16px base)
-        if "rem" in size:
-            try:
-                return float(size.replace("rem", "")) * 16
-            except ValueError:
-                return 16
-        # Handle em (assume 16px base)
-        if "em" in size:
-            try:
-                return float(size.replace("em", "")) * 16
-            except ValueError:
-                return 16
-        # Try plain number
-        try:
-            return float(size)
-        except ValueError:
-            return 16
-    def _normalize_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]:
-        """
-        Normalize spacing tokens:
-        - Merge similar values
-        - Align to base-8 grid if close
-        - Assign suggested names
-        """
-        if not spacing:
-            return []
-        # Deduplicate by value
-        unique_spacing = {}
-        for space in spacing:
-            key = space.value
-            if key in unique_spacing:
-                existing = unique_spacing[key]
-                existing.frequency += space.frequency
-                existing.contexts = list(set(existing.contexts + space.contexts))
-            else:
-                unique_spacing[key] = space
-        result = list(unique_spacing.values())
-        # Merge very similar values
-        result = self._merge_similar_spacing(result)
-        # Assign names
-        for space in result:
-            space.suggested_name = self._generate_spacing_name(space)
-            space.confidence = self._calculate_confidence(space.frequency)
-        # Sort by value
-        result.sort(key=lambda s: s.value_px)
-        return result
-    def _merge_similar_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]:
-        """Merge spacing values that are very close."""
-        if len(spacing) <= 1:
-            return spacing
-        # Sort by pixel value
-        spacing.sort(key=lambda s: s.value_px)
-        merged = []
-        i = 0
-        while i < len(spacing):
-            current = spacing[i]
-            group = [current]
-            # Find adjacent similar values
-            j = i + 1
-            while j < len(spacing):
-                if abs(spacing[j].value_px - current.value_px) <= self.spacing_merge_threshold:
-                    group.append(spacing[j])
-                    j += 1
-                else:
-                    break
-            # Merge group - prefer base-8 aligned value or most frequent
-            group.sort(key=lambda s: (-s.fits_base_8, -s.frequency))
-            primary = group[0]
-            for other in group[1:]:
-                primary.frequency += other.frequency
-                primary.contexts = list(set(primary.contexts + other.contexts))
-            merged.append(primary)
-            i = j
-        return merged
-    def _generate_spacing_name(self, space: SpacingToken) -> str:
-        """Generate a semantic name for spacing."""
-        px = space.value_px
-        # Map to t-shirt sizes based on value
-        if px <= 2:
-            size = "px"
-        elif px <= 4:
-            size = "0.5"
-        elif px <= 8:
-            size = "1"
-        elif px <= 12:
-            size = "1.5"
-        elif px <= 16:
-            size = "2"
-        elif px <= 20:
-            size = "2.5"
-        elif px <= 24:
-            size = "3"
-        elif px <= 32:
-            size = "4"
-        elif px <= 40:
-            size = "5"
-        elif px <= 48:
-            size = "6"
-        elif px <= 64:
-            size = "8"
-        elif px <= 80:
-            size = "10"
-        elif px <= 96:
-            size = "12"
-        else:
-            size = str(int(px / 4))
-        return f"space.{size}"
-    def _calculate_confidence(self, frequency: int) -> Confidence:
-        """Calculate confidence based on frequency."""
-        if frequency >= 10:
-            return Confidence.HIGH
-        elif frequency >= 3:
-            return Confidence.MEDIUM
-        else:
-            return Confidence.LOW
-def normalize_tokens(extracted: ExtractedTokens) -> NormalizedTokens:
-    """Convenience function to normalize tokens."""
-    normalizer = TokenNormalizer()
-    return normalizer.normalize(extracted)