""" Token Schema Definitions Design System Extractor v2 Pydantic models for all token types and extraction results. These are the core data structures used throughout the application. """ from datetime import datetime from enum import Enum from typing import Optional, Any from pydantic import BaseModel, Field, field_validator # ============================================================================= # ENUMS # ============================================================================= class TokenSource(str, Enum): """Origin of a token value.""" DETECTED = "detected" # Directly found in CSS INFERRED = "inferred" # Derived from patterns UPGRADED = "upgraded" # User-selected improvement MANUAL = "manual" # User manually added class Confidence(str, Enum): """Confidence level for extracted tokens.""" HIGH = "high" # 10+ occurrences, consistent usage MEDIUM = "medium" # 3-9 occurrences LOW = "low" # 1-2 occurrences or conflicting class Viewport(str, Enum): """Viewport type.""" DESKTOP = "desktop" # 1440px width MOBILE = "mobile" # 375px width class PageType(str, Enum): """Type of page template.""" HOMEPAGE = "homepage" LISTING = "listing" DETAIL = "detail" FORM = "form" MARKETING = "marketing" AUTH = "auth" CHECKOUT = "checkout" ABOUT = "about" CONTACT = "contact" OTHER = "other" # ============================================================================= # BASE TOKEN MODEL # ============================================================================= class BaseToken(BaseModel): """Base class for all tokens.""" source: TokenSource = TokenSource.DETECTED confidence: Confidence = Confidence.MEDIUM frequency: int = 0 suggested_name: Optional[str] = None # For tracking user decisions accepted: bool = True flagged: bool = False notes: Optional[str] = None # ============================================================================= # COLOR TOKENS # ============================================================================= class ColorToken(BaseToken): """Extracted color token.""" value: str # hex value (e.g., "#007bff") value_rgb: Optional[str] = None # "rgb(0, 123, 255)" value_hsl: Optional[str] = None # "hsl(211, 100%, 50%)" # Context information contexts: list[str] = Field(default_factory=list) # ["background", "text", "border"] elements: list[str] = Field(default_factory=list) # ["button", "header", "link"] css_properties: list[str] = Field(default_factory=list) # ["background-color", "color"] # Accessibility contrast_white: Optional[float] = None # Contrast ratio against white contrast_black: Optional[float] = None # Contrast ratio against black wcag_aa_large_text: bool = False wcag_aa_small_text: bool = False wcag_aaa_large_text: bool = False wcag_aaa_small_text: bool = False @field_validator("value") @classmethod def validate_hex(cls, v: str) -> str: """Ensure hex color is properly formatted.""" v = v.strip().lower() if not v.startswith("#"): v = f"#{v}" # Convert 3-digit hex to 6-digit if len(v) == 4: v = f"#{v[1]}{v[1]}{v[2]}{v[2]}{v[3]}{v[3]}" return v class ColorRamp(BaseModel): """Generated color ramp with shades.""" base_color: str # Original extracted color name: str # e.g., "primary", "neutral" shades: dict[str, str] = Field(default_factory=dict) # {"50": "#e6f2ff", "500": "#007bff", ...} source: TokenSource = TokenSource.UPGRADED # ============================================================================= # TYPOGRAPHY TOKENS # ============================================================================= class TypographyToken(BaseToken): """Extracted typography token.""" font_family: str font_size: str # "16px" or "1rem" font_size_px: Optional[float] = None # Computed px value font_weight: int = 400 line_height: str = "1.5" # "1.5" or "24px" line_height_computed: Optional[float] = None # Computed ratio letter_spacing: Optional[str] = None text_transform: Optional[str] = None # "uppercase", "lowercase", etc. # Context elements: list[str] = Field(default_factory=list) # ["h1", "p", "button"] css_selectors: list[str] = Field(default_factory=list) # [".heading", ".body-text"] class TypeScale(BaseModel): """Typography scale configuration.""" name: str # "Major Third", "Perfect Fourth" ratio: float # 1.25, 1.333 base_size: int = 16 # px sizes: dict[str, str] = Field(default_factory=dict) # {"xs": "12px", "sm": "14px", ...} source: TokenSource = TokenSource.UPGRADED class FontFamily(BaseModel): """Font family information.""" name: str # "Inter" fallbacks: list[str] = Field(default_factory=list) # ["system-ui", "sans-serif"] category: str = "sans-serif" # "serif", "sans-serif", "monospace" frequency: int = 0 usage: str = "primary" # "primary", "secondary", "accent", "monospace" # ============================================================================= # SPACING TOKENS # ============================================================================= class SpacingToken(BaseToken): """Extracted spacing token.""" value: str # "16px" value_px: int # 16 # Context contexts: list[str] = Field(default_factory=list) # ["margin", "padding", "gap"] properties: list[str] = Field(default_factory=list) # ["margin-top", "padding-left"] # Analysis fits_base_4: bool = False # Divisible by 4 fits_base_8: bool = False # Divisible by 8 is_outlier: bool = False # Doesn't fit common patterns class SpacingScale(BaseModel): """Spacing scale configuration.""" name: str # "8px base" base: int # 8 scale: list[int] = Field(default_factory=list) # [4, 8, 16, 24, 32, 48, 64] names: dict[int, str] = Field(default_factory=dict) # {4: "xs", 8: "sm", 16: "md"} source: TokenSource = TokenSource.UPGRADED # ============================================================================= # BORDER RADIUS TOKENS # ============================================================================= class RadiusToken(BaseToken): """Extracted border radius token.""" value: str # "8px" or "50%" value_px: Optional[int] = None # If px value # Context elements: list[str] = Field(default_factory=list) # ["button", "card", "input"] # Analysis fits_base_4: bool = False fits_base_8: bool = False # ============================================================================= # SHADOW TOKENS # ============================================================================= class ShadowToken(BaseToken): """Extracted box shadow token.""" value: str # Full CSS shadow value # Parsed components offset_x: Optional[str] = None offset_y: Optional[str] = None blur: Optional[str] = None spread: Optional[str] = None color: Optional[str] = None inset: bool = False # Context elements: list[str] = Field(default_factory=list) # ============================================================================= # PAGE & CRAWL MODELS # ============================================================================= class DiscoveredPage(BaseModel): """A page discovered during crawling.""" url: str title: Optional[str] = None page_type: PageType = PageType.OTHER depth: int = 0 # Distance from homepage selected: bool = True # User can deselect pages # Crawl status crawled: bool = False error: Optional[str] = None class CrawlResult(BaseModel): """Result of crawling a single page.""" url: str viewport: Viewport success: bool # Timing started_at: datetime completed_at: Optional[datetime] = None duration_ms: Optional[int] = None # Results colors_found: int = 0 typography_found: int = 0 spacing_found: int = 0 # Errors error: Optional[str] = None warnings: list[str] = Field(default_factory=list) # ============================================================================= # EXTRACTION RESULT # ============================================================================= class ExtractedTokens(BaseModel): """Complete extraction result for one viewport.""" viewport: Viewport source_url: str pages_crawled: list[str] = Field(default_factory=list) # Extracted tokens colors: list[ColorToken] = Field(default_factory=list) typography: list[TypographyToken] = Field(default_factory=list) spacing: list[SpacingToken] = Field(default_factory=list) radius: list[RadiusToken] = Field(default_factory=list) shadows: list[ShadowToken] = Field(default_factory=list) # Detected patterns font_families: list[FontFamily] = Field(default_factory=list) base_font_size: Optional[str] = None spacing_base: Optional[int] = None # Detected: 4 or 8 naming_convention: Optional[str] = None # "bem", "utility", "none" # Metadata extraction_timestamp: datetime = Field(default_factory=datetime.now) extraction_duration_ms: Optional[int] = None # Quality indicators total_elements_analyzed: int = 0 unique_colors: int = 0 unique_font_sizes: int = 0 unique_spacing_values: int = 0 # Issues errors: list[str] = Field(default_factory=list) warnings: list[str] = Field(default_factory=list) def summary(self) -> dict: """Get extraction summary.""" return { "viewport": self.viewport.value, "pages_crawled": len(self.pages_crawled), "colors": len(self.colors), "typography": len(self.typography), "spacing": len(self.spacing), "radius": len(self.radius), "shadows": len(self.shadows), "font_families": len(self.font_families), "errors": len(self.errors), "warnings": len(self.warnings), } # ============================================================================= # NORMALIZED TOKENS (Agent 2 Output) # ============================================================================= class NormalizedTokens(BaseModel): """Normalized and structured tokens from Agent 2.""" viewport: Viewport source_url: str # Normalized tokens with suggested names colors: dict[str, ColorToken] = Field(default_factory=dict) # {"primary-500": ColorToken, ...} typography: dict[str, TypographyToken] = Field(default_factory=dict) spacing: dict[str, SpacingToken] = Field(default_factory=dict) radius: dict[str, RadiusToken] = Field(default_factory=dict) shadows: dict[str, ShadowToken] = Field(default_factory=dict) # Detected info font_families: list[FontFamily] = Field(default_factory=list) detected_spacing_base: Optional[int] = None detected_naming_convention: Optional[str] = None # Duplicates & conflicts duplicate_colors: list[tuple[str, str]] = Field(default_factory=list) # [("#1a1a1a", "#1b1b1b"), ...] conflicting_tokens: list[str] = Field(default_factory=list) # Metadata normalized_at: datetime = Field(default_factory=datetime.now) # ============================================================================= # UPGRADE OPTIONS (Agent 3 Output) # ============================================================================= class UpgradeOption(BaseModel): """A single upgrade option.""" id: str name: str description: str category: str # "typography", "spacing", "colors", "naming" # The actual values values: dict[str, Any] = Field(default_factory=dict) # Metadata pros: list[str] = Field(default_factory=list) cons: list[str] = Field(default_factory=list) effort: str = "low" # "low", "medium", "high" recommended: bool = False # Selection state selected: bool = False class UpgradeRecommendations(BaseModel): """All upgrade recommendations from Agent 3.""" # Options by category typography_scales: list[UpgradeOption] = Field(default_factory=list) spacing_systems: list[UpgradeOption] = Field(default_factory=list) color_ramps: list[UpgradeOption] = Field(default_factory=list) naming_conventions: list[UpgradeOption] = Field(default_factory=list) # LLM analysis results llm_rationale: str = "" detected_patterns: list[str] = Field(default_factory=list) brand_analysis: list[dict] = Field(default_factory=list) # From LLM research color_observations: str = "" # Accessibility accessibility_issues: list[str] = Field(default_factory=list) accessibility_fixes: list[UpgradeOption] = Field(default_factory=list) # Metadata generated_at: datetime = Field(default_factory=datetime.now) # ============================================================================= # FINAL OUTPUT (Agent 4 Output) # ============================================================================= class TokenMetadata(BaseModel): """Metadata for exported tokens.""" source_url: str extracted_at: datetime version: str viewport: Viewport generator: str = "Design System Extractor v2" class FinalTokens(BaseModel): """Final exported token set.""" metadata: TokenMetadata # Token collections colors: dict[str, dict] = Field(default_factory=dict) typography: dict[str, dict] = Field(default_factory=dict) spacing: dict[str, dict] = Field(default_factory=dict) radius: dict[str, dict] = Field(default_factory=dict) shadows: dict[str, dict] = Field(default_factory=dict) def to_tokens_studio_format(self) -> dict: """Convert to Tokens Studio compatible format.""" return { "$metadata": { "source": self.metadata.source_url, "version": self.metadata.version, }, "color": self.colors, "typography": self.typography, "spacing": self.spacing, "borderRadius": self.radius, "boxShadow": self.shadows, } def to_css_variables(self) -> str: """Convert to CSS custom properties.""" lines = [":root {"] for name, data in self.colors.items(): value = data.get("value", data) if isinstance(data, dict) else data lines.append(f" --color-{name}: {value};") for name, data in self.spacing.items(): value = data.get("value", data) if isinstance(data, dict) else data lines.append(f" --space-{name}: {value};") lines.append("}") return "\n".join(lines) # ============================================================================= # LANGGRAPH STATE # ============================================================================= class WorkflowState(BaseModel): """LangGraph workflow state.""" # Input base_url: str # Discovery phase discovered_pages: list[DiscoveredPage] = Field(default_factory=list) confirmed_pages: list[str] = Field(default_factory=list) # Extraction phase desktop_tokens: Optional[ExtractedTokens] = None mobile_tokens: Optional[ExtractedTokens] = None # Normalization phase desktop_normalized: Optional[NormalizedTokens] = None mobile_normalized: Optional[NormalizedTokens] = None # Upgrade phase upgrade_recommendations: Optional[UpgradeRecommendations] = None selected_upgrades: dict[str, str] = Field(default_factory=dict) # {"typography_scale": "major_third", ...} # Generation phase desktop_final: Optional[FinalTokens] = None mobile_final: Optional[FinalTokens] = None # Workflow status current_stage: str = "init" # "init", "discover", "confirm", "extract", "normalize", "review", "upgrade", "generate", "export" errors: list[str] = Field(default_factory=list) warnings: list[str] = Field(default_factory=list) # Timestamps started_at: Optional[datetime] = None completed_at: Optional[datetime] = None class Config: arbitrary_types_allowed = True