riazmo commited on
Commit
dfc9d31
·
verified ·
1 Parent(s): dcc5281

Delete core/token_schema.py

Browse files
Files changed (1) hide show
  1. core/token_schema.py +0 -477
core/token_schema.py DELETED
@@ -1,477 +0,0 @@
1
- """
2
- Token Schema Definitions
3
- Design System Extractor v2
4
-
5
- Pydantic models for all token types and extraction results.
6
- These are the core data structures used throughout the application.
7
- """
8
-
9
- from datetime import datetime
10
- from enum import Enum
11
- from typing import Optional, Any
12
- from pydantic import BaseModel, Field, field_validator
13
-
14
-
15
- # =============================================================================
16
- # ENUMS
17
- # =============================================================================
18
-
19
- class TokenSource(str, Enum):
20
- """Origin of a token value."""
21
- DETECTED = "detected" # Directly found in CSS
22
- INFERRED = "inferred" # Derived from patterns
23
- UPGRADED = "upgraded" # User-selected improvement
24
- MANUAL = "manual" # User manually added
25
-
26
-
27
- class Confidence(str, Enum):
28
- """Confidence level for extracted tokens."""
29
- HIGH = "high" # 10+ occurrences, consistent usage
30
- MEDIUM = "medium" # 3-9 occurrences
31
- LOW = "low" # 1-2 occurrences or conflicting
32
-
33
-
34
- class Viewport(str, Enum):
35
- """Viewport type."""
36
- DESKTOP = "desktop" # 1440px width
37
- MOBILE = "mobile" # 375px width
38
-
39
-
40
- class PageType(str, Enum):
41
- """Type of page template."""
42
- HOMEPAGE = "homepage"
43
- LISTING = "listing"
44
- DETAIL = "detail"
45
- FORM = "form"
46
- MARKETING = "marketing"
47
- AUTH = "auth"
48
- CHECKOUT = "checkout"
49
- ABOUT = "about"
50
- CONTACT = "contact"
51
- OTHER = "other"
52
-
53
-
54
- # =============================================================================
55
- # BASE TOKEN MODEL
56
- # =============================================================================
57
-
58
- class BaseToken(BaseModel):
59
- """Base class for all tokens."""
60
- source: TokenSource = TokenSource.DETECTED
61
- confidence: Confidence = Confidence.MEDIUM
62
- frequency: int = 0
63
- suggested_name: Optional[str] = None
64
-
65
- # For tracking user decisions
66
- accepted: bool = True
67
- flagged: bool = False
68
- notes: Optional[str] = None
69
-
70
-
71
- # =============================================================================
72
- # COLOR TOKENS
73
- # =============================================================================
74
-
75
- class ColorToken(BaseToken):
76
- """Extracted color token."""
77
- value: str # hex value (e.g., "#007bff")
78
- value_rgb: Optional[str] = None # "rgb(0, 123, 255)"
79
- value_hsl: Optional[str] = None # "hsl(211, 100%, 50%)"
80
-
81
- # Context information
82
- contexts: list[str] = Field(default_factory=list) # ["background", "text", "border"]
83
- elements: list[str] = Field(default_factory=list) # ["button", "header", "link"]
84
- css_properties: list[str] = Field(default_factory=list) # ["background-color", "color"]
85
-
86
- # Accessibility
87
- contrast_white: Optional[float] = None # Contrast ratio against white
88
- contrast_black: Optional[float] = None # Contrast ratio against black
89
- wcag_aa_large_text: bool = False
90
- wcag_aa_small_text: bool = False
91
- wcag_aaa_large_text: bool = False
92
- wcag_aaa_small_text: bool = False
93
-
94
- @field_validator("value")
95
- @classmethod
96
- def validate_hex(cls, v: str) -> str:
97
- """Ensure hex color is properly formatted."""
98
- v = v.strip().lower()
99
- if not v.startswith("#"):
100
- v = f"#{v}"
101
- # Convert 3-digit hex to 6-digit
102
- if len(v) == 4:
103
- v = f"#{v[1]}{v[1]}{v[2]}{v[2]}{v[3]}{v[3]}"
104
- return v
105
-
106
-
107
- class ColorRamp(BaseModel):
108
- """Generated color ramp with shades."""
109
- base_color: str # Original extracted color
110
- name: str # e.g., "primary", "neutral"
111
- shades: dict[str, str] = Field(default_factory=dict) # {"50": "#e6f2ff", "500": "#007bff", ...}
112
- source: TokenSource = TokenSource.UPGRADED
113
-
114
-
115
- # =============================================================================
116
- # TYPOGRAPHY TOKENS
117
- # =============================================================================
118
-
119
- class TypographyToken(BaseToken):
120
- """Extracted typography token."""
121
- font_family: str
122
- font_size: str # "16px" or "1rem"
123
- font_size_px: Optional[float] = None # Computed px value
124
- font_weight: int = 400
125
- line_height: str = "1.5" # "1.5" or "24px"
126
- line_height_computed: Optional[float] = None # Computed ratio
127
- letter_spacing: Optional[str] = None
128
- text_transform: Optional[str] = None # "uppercase", "lowercase", etc.
129
-
130
- # Context
131
- elements: list[str] = Field(default_factory=list) # ["h1", "p", "button"]
132
- css_selectors: list[str] = Field(default_factory=list) # [".heading", ".body-text"]
133
-
134
-
135
- class TypeScale(BaseModel):
136
- """Typography scale configuration."""
137
- name: str # "Major Third", "Perfect Fourth"
138
- ratio: float # 1.25, 1.333
139
- base_size: int = 16 # px
140
- sizes: dict[str, str] = Field(default_factory=dict) # {"xs": "12px", "sm": "14px", ...}
141
- source: TokenSource = TokenSource.UPGRADED
142
-
143
-
144
- class FontFamily(BaseModel):
145
- """Font family information."""
146
- name: str # "Inter"
147
- fallbacks: list[str] = Field(default_factory=list) # ["system-ui", "sans-serif"]
148
- category: str = "sans-serif" # "serif", "sans-serif", "monospace"
149
- frequency: int = 0
150
- usage: str = "primary" # "primary", "secondary", "accent", "monospace"
151
-
152
-
153
- # =============================================================================
154
- # SPACING TOKENS
155
- # =============================================================================
156
-
157
- class SpacingToken(BaseToken):
158
- """Extracted spacing token."""
159
- value: str # "16px"
160
- value_px: int # 16
161
-
162
- # Context
163
- contexts: list[str] = Field(default_factory=list) # ["margin", "padding", "gap"]
164
- properties: list[str] = Field(default_factory=list) # ["margin-top", "padding-left"]
165
-
166
- # Analysis
167
- fits_base_4: bool = False # Divisible by 4
168
- fits_base_8: bool = False # Divisible by 8
169
- is_outlier: bool = False # Doesn't fit common patterns
170
-
171
-
172
- class SpacingScale(BaseModel):
173
- """Spacing scale configuration."""
174
- name: str # "8px base"
175
- base: int # 8
176
- scale: list[int] = Field(default_factory=list) # [4, 8, 16, 24, 32, 48, 64]
177
- names: dict[int, str] = Field(default_factory=dict) # {4: "xs", 8: "sm", 16: "md"}
178
- source: TokenSource = TokenSource.UPGRADED
179
-
180
-
181
- # =============================================================================
182
- # BORDER RADIUS TOKENS
183
- # =============================================================================
184
-
185
- class RadiusToken(BaseToken):
186
- """Extracted border radius token."""
187
- value: str # "8px" or "50%"
188
- value_px: Optional[int] = None # If px value
189
-
190
- # Context
191
- elements: list[str] = Field(default_factory=list) # ["button", "card", "input"]
192
-
193
- # Analysis
194
- fits_base_4: bool = False
195
- fits_base_8: bool = False
196
-
197
-
198
- # =============================================================================
199
- # SHADOW TOKENS
200
- # =============================================================================
201
-
202
- class ShadowToken(BaseToken):
203
- """Extracted box shadow token."""
204
- value: str # Full CSS shadow value
205
-
206
- # Parsed components
207
- offset_x: Optional[str] = None
208
- offset_y: Optional[str] = None
209
- blur: Optional[str] = None
210
- spread: Optional[str] = None
211
- color: Optional[str] = None
212
- inset: bool = False
213
-
214
- # Context
215
- elements: list[str] = Field(default_factory=list)
216
-
217
-
218
- # =============================================================================
219
- # PAGE & CRAWL MODELS
220
- # =============================================================================
221
-
222
- class DiscoveredPage(BaseModel):
223
- """A page discovered during crawling."""
224
- url: str
225
- title: Optional[str] = None
226
- page_type: PageType = PageType.OTHER
227
- depth: int = 0 # Distance from homepage
228
- selected: bool = True # User can deselect pages
229
-
230
- # Crawl status
231
- crawled: bool = False
232
- error: Optional[str] = None
233
-
234
-
235
- class CrawlResult(BaseModel):
236
- """Result of crawling a single page."""
237
- url: str
238
- viewport: Viewport
239
- success: bool
240
-
241
- # Timing
242
- started_at: datetime
243
- completed_at: Optional[datetime] = None
244
- duration_ms: Optional[int] = None
245
-
246
- # Results
247
- colors_found: int = 0
248
- typography_found: int = 0
249
- spacing_found: int = 0
250
-
251
- # Errors
252
- error: Optional[str] = None
253
- warnings: list[str] = Field(default_factory=list)
254
-
255
-
256
- # =============================================================================
257
- # EXTRACTION RESULT
258
- # =============================================================================
259
-
260
- class ExtractedTokens(BaseModel):
261
- """Complete extraction result for one viewport."""
262
- viewport: Viewport
263
- source_url: str
264
- pages_crawled: list[str] = Field(default_factory=list)
265
-
266
- # Extracted tokens
267
- colors: list[ColorToken] = Field(default_factory=list)
268
- typography: list[TypographyToken] = Field(default_factory=list)
269
- spacing: list[SpacingToken] = Field(default_factory=list)
270
- radius: list[RadiusToken] = Field(default_factory=list)
271
- shadows: list[ShadowToken] = Field(default_factory=list)
272
-
273
- # Detected patterns
274
- font_families: list[FontFamily] = Field(default_factory=list)
275
- base_font_size: Optional[str] = None
276
- spacing_base: Optional[int] = None # Detected: 4 or 8
277
- naming_convention: Optional[str] = None # "bem", "utility", "none"
278
-
279
- # Metadata
280
- extraction_timestamp: datetime = Field(default_factory=datetime.now)
281
- extraction_duration_ms: Optional[int] = None
282
-
283
- # Quality indicators
284
- total_elements_analyzed: int = 0
285
- unique_colors: int = 0
286
- unique_font_sizes: int = 0
287
- unique_spacing_values: int = 0
288
-
289
- # Issues
290
- errors: list[str] = Field(default_factory=list)
291
- warnings: list[str] = Field(default_factory=list)
292
-
293
- def summary(self) -> dict:
294
- """Get extraction summary."""
295
- return {
296
- "viewport": self.viewport.value,
297
- "pages_crawled": len(self.pages_crawled),
298
- "colors": len(self.colors),
299
- "typography": len(self.typography),
300
- "spacing": len(self.spacing),
301
- "radius": len(self.radius),
302
- "shadows": len(self.shadows),
303
- "font_families": len(self.font_families),
304
- "errors": len(self.errors),
305
- "warnings": len(self.warnings),
306
- }
307
-
308
-
309
- # =============================================================================
310
- # NORMALIZED TOKENS (Agent 2 Output)
311
- # =============================================================================
312
-
313
- class NormalizedTokens(BaseModel):
314
- """Normalized and structured tokens from Agent 2."""
315
- viewport: Viewport
316
- source_url: str
317
-
318
- # Normalized tokens with suggested names
319
- colors: dict[str, ColorToken] = Field(default_factory=dict) # {"primary-500": ColorToken, ...}
320
- typography: dict[str, TypographyToken] = Field(default_factory=dict)
321
- spacing: dict[str, SpacingToken] = Field(default_factory=dict)
322
- radius: dict[str, RadiusToken] = Field(default_factory=dict)
323
- shadows: dict[str, ShadowToken] = Field(default_factory=dict)
324
-
325
- # Detected info
326
- font_families: list[FontFamily] = Field(default_factory=list)
327
- detected_spacing_base: Optional[int] = None
328
- detected_naming_convention: Optional[str] = None
329
-
330
- # Duplicates & conflicts
331
- duplicate_colors: list[tuple[str, str]] = Field(default_factory=list) # [("#1a1a1a", "#1b1b1b"), ...]
332
- conflicting_tokens: list[str] = Field(default_factory=list)
333
-
334
- # Metadata
335
- normalized_at: datetime = Field(default_factory=datetime.now)
336
-
337
-
338
- # =============================================================================
339
- # UPGRADE OPTIONS (Agent 3 Output)
340
- # =============================================================================
341
-
342
- class UpgradeOption(BaseModel):
343
- """A single upgrade option."""
344
- id: str
345
- name: str
346
- description: str
347
- category: str # "typography", "spacing", "colors", "naming"
348
-
349
- # The actual values
350
- values: dict[str, Any] = Field(default_factory=dict)
351
-
352
- # Metadata
353
- pros: list[str] = Field(default_factory=list)
354
- cons: list[str] = Field(default_factory=list)
355
- effort: str = "low" # "low", "medium", "high"
356
- recommended: bool = False
357
-
358
- # Selection state
359
- selected: bool = False
360
-
361
-
362
- class UpgradeRecommendations(BaseModel):
363
- """All upgrade recommendations from Agent 3."""
364
-
365
- # Options by category
366
- typography_scales: list[UpgradeOption] = Field(default_factory=list)
367
- spacing_systems: list[UpgradeOption] = Field(default_factory=list)
368
- color_ramps: list[UpgradeOption] = Field(default_factory=list)
369
- naming_conventions: list[UpgradeOption] = Field(default_factory=list)
370
-
371
- # LLM analysis results
372
- llm_rationale: str = ""
373
- detected_patterns: list[str] = Field(default_factory=list)
374
-
375
- # Accessibility
376
- accessibility_issues: list[str] = Field(default_factory=list)
377
- accessibility_fixes: list[UpgradeOption] = Field(default_factory=list)
378
-
379
- # Metadata
380
- generated_at: datetime = Field(default_factory=datetime.now)
381
-
382
-
383
- # =============================================================================
384
- # FINAL OUTPUT (Agent 4 Output)
385
- # =============================================================================
386
-
387
- class TokenMetadata(BaseModel):
388
- """Metadata for exported tokens."""
389
- source_url: str
390
- extracted_at: datetime
391
- version: str
392
- viewport: Viewport
393
- generator: str = "Design System Extractor v2"
394
-
395
-
396
- class FinalTokens(BaseModel):
397
- """Final exported token set."""
398
- metadata: TokenMetadata
399
-
400
- # Token collections
401
- colors: dict[str, dict] = Field(default_factory=dict)
402
- typography: dict[str, dict] = Field(default_factory=dict)
403
- spacing: dict[str, dict] = Field(default_factory=dict)
404
- radius: dict[str, dict] = Field(default_factory=dict)
405
- shadows: dict[str, dict] = Field(default_factory=dict)
406
-
407
- def to_tokens_studio_format(self) -> dict:
408
- """Convert to Tokens Studio compatible format."""
409
- return {
410
- "$metadata": {
411
- "source": self.metadata.source_url,
412
- "version": self.metadata.version,
413
- },
414
- "color": self.colors,
415
- "typography": self.typography,
416
- "spacing": self.spacing,
417
- "borderRadius": self.radius,
418
- "boxShadow": self.shadows,
419
- }
420
-
421
- def to_css_variables(self) -> str:
422
- """Convert to CSS custom properties."""
423
- lines = [":root {"]
424
-
425
- for name, data in self.colors.items():
426
- value = data.get("value", data) if isinstance(data, dict) else data
427
- lines.append(f" --color-{name}: {value};")
428
-
429
- for name, data in self.spacing.items():
430
- value = data.get("value", data) if isinstance(data, dict) else data
431
- lines.append(f" --space-{name}: {value};")
432
-
433
- lines.append("}")
434
- return "\n".join(lines)
435
-
436
-
437
- # =============================================================================
438
- # LANGGRAPH STATE
439
- # =============================================================================
440
-
441
- class WorkflowState(BaseModel):
442
- """LangGraph workflow state."""
443
-
444
- # Input
445
- base_url: str
446
-
447
- # Discovery phase
448
- discovered_pages: list[DiscoveredPage] = Field(default_factory=list)
449
- confirmed_pages: list[str] = Field(default_factory=list)
450
-
451
- # Extraction phase
452
- desktop_tokens: Optional[ExtractedTokens] = None
453
- mobile_tokens: Optional[ExtractedTokens] = None
454
-
455
- # Normalization phase
456
- desktop_normalized: Optional[NormalizedTokens] = None
457
- mobile_normalized: Optional[NormalizedTokens] = None
458
-
459
- # Upgrade phase
460
- upgrade_recommendations: Optional[UpgradeRecommendations] = None
461
- selected_upgrades: dict[str, str] = Field(default_factory=dict) # {"typography_scale": "major_third", ...}
462
-
463
- # Generation phase
464
- desktop_final: Optional[FinalTokens] = None
465
- mobile_final: Optional[FinalTokens] = None
466
-
467
- # Workflow status
468
- current_stage: str = "init" # "init", "discover", "confirm", "extract", "normalize", "review", "upgrade", "generate", "export"
469
- errors: list[str] = Field(default_factory=list)
470
- warnings: list[str] = Field(default_factory=list)
471
-
472
- # Timestamps
473
- started_at: Optional[datetime] = None
474
- completed_at: Optional[datetime] = None
475
-
476
- class Config:
477
- arbitrary_types_allowed = True