riazmo commited on
Commit
222ce98
·
verified ·
1 Parent(s): dfc9d31

Upload token_schema.py

Browse files
Files changed (1) hide show
  1. core/token_schema.py +479 -0
core/token_schema.py ADDED
@@ -0,0 +1,479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Token Schema Definitions
3
+ Design System Extractor v2
4
+
5
+ Pydantic models for all token types and extraction results.
6
+ These are the core data structures used throughout the application.
7
+ """
8
+
9
+ from datetime import datetime
10
+ from enum import Enum
11
+ from typing import Optional, Any
12
+ from pydantic import BaseModel, Field, field_validator
13
+
14
+
15
+ # =============================================================================
16
+ # ENUMS
17
+ # =============================================================================
18
+
19
+ class TokenSource(str, Enum):
20
+ """Origin of a token value."""
21
+ DETECTED = "detected" # Directly found in CSS
22
+ INFERRED = "inferred" # Derived from patterns
23
+ UPGRADED = "upgraded" # User-selected improvement
24
+ MANUAL = "manual" # User manually added
25
+
26
+
27
+ class Confidence(str, Enum):
28
+ """Confidence level for extracted tokens."""
29
+ HIGH = "high" # 10+ occurrences, consistent usage
30
+ MEDIUM = "medium" # 3-9 occurrences
31
+ LOW = "low" # 1-2 occurrences or conflicting
32
+
33
+
34
+ class Viewport(str, Enum):
35
+ """Viewport type."""
36
+ DESKTOP = "desktop" # 1440px width
37
+ MOBILE = "mobile" # 375px width
38
+
39
+
40
+ class PageType(str, Enum):
41
+ """Type of page template."""
42
+ HOMEPAGE = "homepage"
43
+ LISTING = "listing"
44
+ DETAIL = "detail"
45
+ FORM = "form"
46
+ MARKETING = "marketing"
47
+ AUTH = "auth"
48
+ CHECKOUT = "checkout"
49
+ ABOUT = "about"
50
+ CONTACT = "contact"
51
+ OTHER = "other"
52
+
53
+
54
+ # =============================================================================
55
+ # BASE TOKEN MODEL
56
+ # =============================================================================
57
+
58
+ class BaseToken(BaseModel):
59
+ """Base class for all tokens."""
60
+ source: TokenSource = TokenSource.DETECTED
61
+ confidence: Confidence = Confidence.MEDIUM
62
+ frequency: int = 0
63
+ suggested_name: Optional[str] = None
64
+
65
+ # For tracking user decisions
66
+ accepted: bool = True
67
+ flagged: bool = False
68
+ notes: Optional[str] = None
69
+
70
+
71
+ # =============================================================================
72
+ # COLOR TOKENS
73
+ # =============================================================================
74
+
75
+ class ColorToken(BaseToken):
76
+ """Extracted color token."""
77
+ value: str # hex value (e.g., "#007bff")
78
+ value_rgb: Optional[str] = None # "rgb(0, 123, 255)"
79
+ value_hsl: Optional[str] = None # "hsl(211, 100%, 50%)"
80
+
81
+ # Context information
82
+ contexts: list[str] = Field(default_factory=list) # ["background", "text", "border"]
83
+ elements: list[str] = Field(default_factory=list) # ["button", "header", "link"]
84
+ css_properties: list[str] = Field(default_factory=list) # ["background-color", "color"]
85
+
86
+ # Accessibility
87
+ contrast_white: Optional[float] = None # Contrast ratio against white
88
+ contrast_black: Optional[float] = None # Contrast ratio against black
89
+ wcag_aa_large_text: bool = False
90
+ wcag_aa_small_text: bool = False
91
+ wcag_aaa_large_text: bool = False
92
+ wcag_aaa_small_text: bool = False
93
+
94
+ @field_validator("value")
95
+ @classmethod
96
+ def validate_hex(cls, v: str) -> str:
97
+ """Ensure hex color is properly formatted."""
98
+ v = v.strip().lower()
99
+ if not v.startswith("#"):
100
+ v = f"#{v}"
101
+ # Convert 3-digit hex to 6-digit
102
+ if len(v) == 4:
103
+ v = f"#{v[1]}{v[1]}{v[2]}{v[2]}{v[3]}{v[3]}"
104
+ return v
105
+
106
+
107
+ class ColorRamp(BaseModel):
108
+ """Generated color ramp with shades."""
109
+ base_color: str # Original extracted color
110
+ name: str # e.g., "primary", "neutral"
111
+ shades: dict[str, str] = Field(default_factory=dict) # {"50": "#e6f2ff", "500": "#007bff", ...}
112
+ source: TokenSource = TokenSource.UPGRADED
113
+
114
+
115
+ # =============================================================================
116
+ # TYPOGRAPHY TOKENS
117
+ # =============================================================================
118
+
119
+ class TypographyToken(BaseToken):
120
+ """Extracted typography token."""
121
+ font_family: str
122
+ font_size: str # "16px" or "1rem"
123
+ font_size_px: Optional[float] = None # Computed px value
124
+ font_weight: int = 400
125
+ line_height: str = "1.5" # "1.5" or "24px"
126
+ line_height_computed: Optional[float] = None # Computed ratio
127
+ letter_spacing: Optional[str] = None
128
+ text_transform: Optional[str] = None # "uppercase", "lowercase", etc.
129
+
130
+ # Context
131
+ elements: list[str] = Field(default_factory=list) # ["h1", "p", "button"]
132
+ css_selectors: list[str] = Field(default_factory=list) # [".heading", ".body-text"]
133
+
134
+
135
+ class TypeScale(BaseModel):
136
+ """Typography scale configuration."""
137
+ name: str # "Major Third", "Perfect Fourth"
138
+ ratio: float # 1.25, 1.333
139
+ base_size: int = 16 # px
140
+ sizes: dict[str, str] = Field(default_factory=dict) # {"xs": "12px", "sm": "14px", ...}
141
+ source: TokenSource = TokenSource.UPGRADED
142
+
143
+
144
+ class FontFamily(BaseModel):
145
+ """Font family information."""
146
+ name: str # "Inter"
147
+ fallbacks: list[str] = Field(default_factory=list) # ["system-ui", "sans-serif"]
148
+ category: str = "sans-serif" # "serif", "sans-serif", "monospace"
149
+ frequency: int = 0
150
+ usage: str = "primary" # "primary", "secondary", "accent", "monospace"
151
+
152
+
153
+ # =============================================================================
154
+ # SPACING TOKENS
155
+ # =============================================================================
156
+
157
+ class SpacingToken(BaseToken):
158
+ """Extracted spacing token."""
159
+ value: str # "16px"
160
+ value_px: int # 16
161
+
162
+ # Context
163
+ contexts: list[str] = Field(default_factory=list) # ["margin", "padding", "gap"]
164
+ properties: list[str] = Field(default_factory=list) # ["margin-top", "padding-left"]
165
+
166
+ # Analysis
167
+ fits_base_4: bool = False # Divisible by 4
168
+ fits_base_8: bool = False # Divisible by 8
169
+ is_outlier: bool = False # Doesn't fit common patterns
170
+
171
+
172
+ class SpacingScale(BaseModel):
173
+ """Spacing scale configuration."""
174
+ name: str # "8px base"
175
+ base: int # 8
176
+ scale: list[int] = Field(default_factory=list) # [4, 8, 16, 24, 32, 48, 64]
177
+ names: dict[int, str] = Field(default_factory=dict) # {4: "xs", 8: "sm", 16: "md"}
178
+ source: TokenSource = TokenSource.UPGRADED
179
+
180
+
181
+ # =============================================================================
182
+ # BORDER RADIUS TOKENS
183
+ # =============================================================================
184
+
185
+ class RadiusToken(BaseToken):
186
+ """Extracted border radius token."""
187
+ value: str # "8px" or "50%"
188
+ value_px: Optional[int] = None # If px value
189
+
190
+ # Context
191
+ elements: list[str] = Field(default_factory=list) # ["button", "card", "input"]
192
+
193
+ # Analysis
194
+ fits_base_4: bool = False
195
+ fits_base_8: bool = False
196
+
197
+
198
+ # =============================================================================
199
+ # SHADOW TOKENS
200
+ # =============================================================================
201
+
202
+ class ShadowToken(BaseToken):
203
+ """Extracted box shadow token."""
204
+ value: str # Full CSS shadow value
205
+
206
+ # Parsed components
207
+ offset_x: Optional[str] = None
208
+ offset_y: Optional[str] = None
209
+ blur: Optional[str] = None
210
+ spread: Optional[str] = None
211
+ color: Optional[str] = None
212
+ inset: bool = False
213
+
214
+ # Context
215
+ elements: list[str] = Field(default_factory=list)
216
+
217
+
218
+ # =============================================================================
219
+ # PAGE & CRAWL MODELS
220
+ # =============================================================================
221
+
222
+ class DiscoveredPage(BaseModel):
223
+ """A page discovered during crawling."""
224
+ url: str
225
+ title: Optional[str] = None
226
+ page_type: PageType = PageType.OTHER
227
+ depth: int = 0 # Distance from homepage
228
+ selected: bool = True # User can deselect pages
229
+
230
+ # Crawl status
231
+ crawled: bool = False
232
+ error: Optional[str] = None
233
+
234
+
235
+ class CrawlResult(BaseModel):
236
+ """Result of crawling a single page."""
237
+ url: str
238
+ viewport: Viewport
239
+ success: bool
240
+
241
+ # Timing
242
+ started_at: datetime
243
+ completed_at: Optional[datetime] = None
244
+ duration_ms: Optional[int] = None
245
+
246
+ # Results
247
+ colors_found: int = 0
248
+ typography_found: int = 0
249
+ spacing_found: int = 0
250
+
251
+ # Errors
252
+ error: Optional[str] = None
253
+ warnings: list[str] = Field(default_factory=list)
254
+
255
+
256
+ # =============================================================================
257
+ # EXTRACTION RESULT
258
+ # =============================================================================
259
+
260
+ class ExtractedTokens(BaseModel):
261
+ """Complete extraction result for one viewport."""
262
+ viewport: Viewport
263
+ source_url: str
264
+ pages_crawled: list[str] = Field(default_factory=list)
265
+
266
+ # Extracted tokens
267
+ colors: list[ColorToken] = Field(default_factory=list)
268
+ typography: list[TypographyToken] = Field(default_factory=list)
269
+ spacing: list[SpacingToken] = Field(default_factory=list)
270
+ radius: list[RadiusToken] = Field(default_factory=list)
271
+ shadows: list[ShadowToken] = Field(default_factory=list)
272
+
273
+ # Detected patterns
274
+ font_families: list[FontFamily] = Field(default_factory=list)
275
+ base_font_size: Optional[str] = None
276
+ spacing_base: Optional[int] = None # Detected: 4 or 8
277
+ naming_convention: Optional[str] = None # "bem", "utility", "none"
278
+
279
+ # Metadata
280
+ extraction_timestamp: datetime = Field(default_factory=datetime.now)
281
+ extraction_duration_ms: Optional[int] = None
282
+
283
+ # Quality indicators
284
+ total_elements_analyzed: int = 0
285
+ unique_colors: int = 0
286
+ unique_font_sizes: int = 0
287
+ unique_spacing_values: int = 0
288
+
289
+ # Issues
290
+ errors: list[str] = Field(default_factory=list)
291
+ warnings: list[str] = Field(default_factory=list)
292
+
293
+ def summary(self) -> dict:
294
+ """Get extraction summary."""
295
+ return {
296
+ "viewport": self.viewport.value,
297
+ "pages_crawled": len(self.pages_crawled),
298
+ "colors": len(self.colors),
299
+ "typography": len(self.typography),
300
+ "spacing": len(self.spacing),
301
+ "radius": len(self.radius),
302
+ "shadows": len(self.shadows),
303
+ "font_families": len(self.font_families),
304
+ "errors": len(self.errors),
305
+ "warnings": len(self.warnings),
306
+ }
307
+
308
+
309
+ # =============================================================================
310
+ # NORMALIZED TOKENS (Agent 2 Output)
311
+ # =============================================================================
312
+
313
+ class NormalizedTokens(BaseModel):
314
+ """Normalized and structured tokens from Agent 2."""
315
+ viewport: Viewport
316
+ source_url: str
317
+
318
+ # Normalized tokens with suggested names
319
+ colors: dict[str, ColorToken] = Field(default_factory=dict) # {"primary-500": ColorToken, ...}
320
+ typography: dict[str, TypographyToken] = Field(default_factory=dict)
321
+ spacing: dict[str, SpacingToken] = Field(default_factory=dict)
322
+ radius: dict[str, RadiusToken] = Field(default_factory=dict)
323
+ shadows: dict[str, ShadowToken] = Field(default_factory=dict)
324
+
325
+ # Detected info
326
+ font_families: list[FontFamily] = Field(default_factory=list)
327
+ detected_spacing_base: Optional[int] = None
328
+ detected_naming_convention: Optional[str] = None
329
+
330
+ # Duplicates & conflicts
331
+ duplicate_colors: list[tuple[str, str]] = Field(default_factory=list) # [("#1a1a1a", "#1b1b1b"), ...]
332
+ conflicting_tokens: list[str] = Field(default_factory=list)
333
+
334
+ # Metadata
335
+ normalized_at: datetime = Field(default_factory=datetime.now)
336
+
337
+
338
+ # =============================================================================
339
+ # UPGRADE OPTIONS (Agent 3 Output)
340
+ # =============================================================================
341
+
342
+ class UpgradeOption(BaseModel):
343
+ """A single upgrade option."""
344
+ id: str
345
+ name: str
346
+ description: str
347
+ category: str # "typography", "spacing", "colors", "naming"
348
+
349
+ # The actual values
350
+ values: dict[str, Any] = Field(default_factory=dict)
351
+
352
+ # Metadata
353
+ pros: list[str] = Field(default_factory=list)
354
+ cons: list[str] = Field(default_factory=list)
355
+ effort: str = "low" # "low", "medium", "high"
356
+ recommended: bool = False
357
+
358
+ # Selection state
359
+ selected: bool = False
360
+
361
+
362
+ class UpgradeRecommendations(BaseModel):
363
+ """All upgrade recommendations from Agent 3."""
364
+
365
+ # Options by category
366
+ typography_scales: list[UpgradeOption] = Field(default_factory=list)
367
+ spacing_systems: list[UpgradeOption] = Field(default_factory=list)
368
+ color_ramps: list[UpgradeOption] = Field(default_factory=list)
369
+ naming_conventions: list[UpgradeOption] = Field(default_factory=list)
370
+
371
+ # LLM analysis results
372
+ llm_rationale: str = ""
373
+ detected_patterns: list[str] = Field(default_factory=list)
374
+ brand_analysis: list[dict] = Field(default_factory=list) # From LLM research
375
+ color_observations: str = ""
376
+
377
+ # Accessibility
378
+ accessibility_issues: list[str] = Field(default_factory=list)
379
+ accessibility_fixes: list[UpgradeOption] = Field(default_factory=list)
380
+
381
+ # Metadata
382
+ generated_at: datetime = Field(default_factory=datetime.now)
383
+
384
+
385
+ # =============================================================================
386
+ # FINAL OUTPUT (Agent 4 Output)
387
+ # =============================================================================
388
+
389
+ class TokenMetadata(BaseModel):
390
+ """Metadata for exported tokens."""
391
+ source_url: str
392
+ extracted_at: datetime
393
+ version: str
394
+ viewport: Viewport
395
+ generator: str = "Design System Extractor v2"
396
+
397
+
398
+ class FinalTokens(BaseModel):
399
+ """Final exported token set."""
400
+ metadata: TokenMetadata
401
+
402
+ # Token collections
403
+ colors: dict[str, dict] = Field(default_factory=dict)
404
+ typography: dict[str, dict] = Field(default_factory=dict)
405
+ spacing: dict[str, dict] = Field(default_factory=dict)
406
+ radius: dict[str, dict] = Field(default_factory=dict)
407
+ shadows: dict[str, dict] = Field(default_factory=dict)
408
+
409
+ def to_tokens_studio_format(self) -> dict:
410
+ """Convert to Tokens Studio compatible format."""
411
+ return {
412
+ "$metadata": {
413
+ "source": self.metadata.source_url,
414
+ "version": self.metadata.version,
415
+ },
416
+ "color": self.colors,
417
+ "typography": self.typography,
418
+ "spacing": self.spacing,
419
+ "borderRadius": self.radius,
420
+ "boxShadow": self.shadows,
421
+ }
422
+
423
+ def to_css_variables(self) -> str:
424
+ """Convert to CSS custom properties."""
425
+ lines = [":root {"]
426
+
427
+ for name, data in self.colors.items():
428
+ value = data.get("value", data) if isinstance(data, dict) else data
429
+ lines.append(f" --color-{name}: {value};")
430
+
431
+ for name, data in self.spacing.items():
432
+ value = data.get("value", data) if isinstance(data, dict) else data
433
+ lines.append(f" --space-{name}: {value};")
434
+
435
+ lines.append("}")
436
+ return "\n".join(lines)
437
+
438
+
439
+ # =============================================================================
440
+ # LANGGRAPH STATE
441
+ # =============================================================================
442
+
443
+ class WorkflowState(BaseModel):
444
+ """LangGraph workflow state."""
445
+
446
+ # Input
447
+ base_url: str
448
+
449
+ # Discovery phase
450
+ discovered_pages: list[DiscoveredPage] = Field(default_factory=list)
451
+ confirmed_pages: list[str] = Field(default_factory=list)
452
+
453
+ # Extraction phase
454
+ desktop_tokens: Optional[ExtractedTokens] = None
455
+ mobile_tokens: Optional[ExtractedTokens] = None
456
+
457
+ # Normalization phase
458
+ desktop_normalized: Optional[NormalizedTokens] = None
459
+ mobile_normalized: Optional[NormalizedTokens] = None
460
+
461
+ # Upgrade phase
462
+ upgrade_recommendations: Optional[UpgradeRecommendations] = None
463
+ selected_upgrades: dict[str, str] = Field(default_factory=dict) # {"typography_scale": "major_third", ...}
464
+
465
+ # Generation phase
466
+ desktop_final: Optional[FinalTokens] = None
467
+ mobile_final: Optional[FinalTokens] = None
468
+
469
+ # Workflow status
470
+ current_stage: str = "init" # "init", "discover", "confirm", "extract", "normalize", "review", "upgrade", "generate", "export"
471
+ errors: list[str] = Field(default_factory=list)
472
+ warnings: list[str] = Field(default_factory=list)
473
+
474
+ # Timestamps
475
+ started_at: Optional[datetime] = None
476
+ completed_at: Optional[datetime] = None
477
+
478
+ class Config:
479
+ arbitrary_types_allowed = True