riazmo commited on
Commit
92ac926
·
verified ·
1 Parent(s): 9e4c900

Delete agents/normalizer.py

Browse files
Files changed (1) hide show
  1. agents/normalizer.py +0 -462
agents/normalizer.py DELETED
@@ -1,462 +0,0 @@
1
- """
2
- Agent 2: Token Normalizer & Structurer
3
- Design System Extractor v2
4
-
5
- Persona: Design System Librarian
6
-
7
- Responsibilities:
8
- - Clean noisy extraction data
9
- - Deduplicate similar tokens (colors within threshold, similar spacing)
10
- - Infer naming patterns from class names and contexts
11
- - Tag tokens as: detected | inferred | low-confidence
12
- - Group colors by role (primary, secondary, neutral, etc.)
13
- """
14
-
15
- import re
16
- from typing import Optional
17
- from collections import defaultdict
18
-
19
- from core.token_schema import (
20
- ColorToken,
21
- TypographyToken,
22
- SpacingToken,
23
- ExtractedTokens,
24
- NormalizedTokens,
25
- Confidence,
26
- TokenSource,
27
- )
28
- from core.color_utils import (
29
- parse_color,
30
- normalize_hex,
31
- categorize_color,
32
- )
33
-
34
-
35
- class TokenNormalizer:
36
- """
37
- Normalizes and structures extracted tokens.
38
-
39
- This is Agent 2's job — taking raw extraction data and
40
- organizing it into a clean, deduplicated structure.
41
- """
42
-
43
- def __init__(self):
44
- # Thresholds for duplicate detection
45
- self.color_similarity_threshold = 10 # Delta in RGB space
46
- self.spacing_merge_threshold = 2 # px difference to merge
47
-
48
- # Naming patterns
49
- self.color_role_keywords = {
50
- "primary": ["primary", "brand", "main", "accent"],
51
- "secondary": ["secondary", "alt", "alternate"],
52
- "success": ["success", "green", "positive", "valid"],
53
- "warning": ["warning", "yellow", "caution", "alert"],
54
- "error": ["error", "red", "danger", "invalid", "negative"],
55
- "info": ["info", "blue", "notice"],
56
- "neutral": ["gray", "grey", "neutral", "muted", "subtle"],
57
- "background": ["bg", "background", "surface"],
58
- "text": ["text", "foreground", "content", "body"],
59
- "border": ["border", "divider", "separator", "line"],
60
- }
61
-
62
- def normalize(self, extracted: ExtractedTokens) -> NormalizedTokens:
63
- """
64
- Normalize extracted tokens.
65
-
66
- Args:
67
- extracted: Raw extraction results from Agent 1
68
-
69
- Returns:
70
- NormalizedTokens with cleaned, deduplicated data
71
- """
72
- # Process each token type
73
- colors = self._normalize_colors(extracted.colors)
74
- typography = self._normalize_typography(extracted.typography)
75
- spacing = self._normalize_spacing(extracted.spacing)
76
-
77
- # Create normalized result
78
- normalized = NormalizedTokens(
79
- viewport=extracted.viewport,
80
- colors=colors,
81
- typography=typography,
82
- spacing=spacing,
83
- radius=extracted.radius, # Pass through for now
84
- shadows=extracted.shadows, # Pass through for now
85
- font_families=extracted.font_families,
86
- pages_crawled=extracted.pages_crawled,
87
- total_elements=extracted.total_elements,
88
- )
89
-
90
- return normalized
91
-
92
- def _normalize_colors(self, colors: list[ColorToken]) -> list[ColorToken]:
93
- """
94
- Normalize color tokens:
95
- - Deduplicate similar colors
96
- - Infer color roles
97
- - Assign suggested names
98
- - Calculate confidence
99
- """
100
- if not colors:
101
- return []
102
-
103
- # Step 1: Deduplicate by exact hex value
104
- unique_colors = {}
105
- for color in colors:
106
- hex_val = normalize_hex(color.value)
107
- if hex_val in unique_colors:
108
- # Merge frequency and contexts
109
- existing = unique_colors[hex_val]
110
- existing.frequency += color.frequency
111
- existing.contexts = list(set(existing.contexts + color.contexts))
112
- existing.elements = list(set(existing.elements + color.elements))
113
- existing.css_properties = list(set(existing.css_properties + color.css_properties))
114
- else:
115
- color.value = hex_val
116
- unique_colors[hex_val] = color
117
-
118
- # Step 2: Merge visually similar colors
119
- merged_colors = self._merge_similar_colors(list(unique_colors.values()))
120
-
121
- # Step 3: Infer roles and names
122
- for color in merged_colors:
123
- role = self._infer_color_role(color)
124
- if role:
125
- color.suggested_name = self._generate_color_name(color, role)
126
- else:
127
- color.suggested_name = self._generate_color_name_from_value(color)
128
-
129
- # Update confidence based on frequency
130
- color.confidence = self._calculate_confidence(color.frequency)
131
-
132
- # Sort by frequency (most used first)
133
- merged_colors.sort(key=lambda c: -c.frequency)
134
-
135
- return merged_colors
136
-
137
- def _merge_similar_colors(self, colors: list[ColorToken]) -> list[ColorToken]:
138
- """Merge colors that are visually very similar."""
139
- if len(colors) <= 1:
140
- return colors
141
-
142
- merged = []
143
- used = set()
144
-
145
- for i, color1 in enumerate(colors):
146
- if i in used:
147
- continue
148
-
149
- # Find similar colors
150
- similar_group = [color1]
151
- for j, color2 in enumerate(colors[i+1:], i+1):
152
- if j in used:
153
- continue
154
- if self._colors_are_similar(color1.value, color2.value):
155
- similar_group.append(color2)
156
- used.add(j)
157
-
158
- # Merge the group - keep the most frequent
159
- similar_group.sort(key=lambda c: -c.frequency)
160
- primary = similar_group[0]
161
-
162
- # Aggregate data from similar colors
163
- for other in similar_group[1:]:
164
- primary.frequency += other.frequency
165
- primary.contexts = list(set(primary.contexts + other.contexts))
166
- primary.elements = list(set(primary.elements + other.elements))
167
-
168
- merged.append(primary)
169
- used.add(i)
170
-
171
- return merged
172
-
173
- def _colors_are_similar(self, hex1: str, hex2: str) -> bool:
174
- """Check if two colors are visually similar."""
175
- try:
176
- rgb1 = parse_color(hex1)
177
- rgb2 = parse_color(hex2)
178
- if rgb1 is None or rgb2 is None:
179
- return False
180
-
181
- # Calculate Euclidean distance in RGB space
182
- distance = sum((a - b) ** 2 for a, b in zip(rgb1, rgb2)) ** 0.5
183
- return distance < self.color_similarity_threshold
184
- except Exception:
185
- return False
186
-
187
- def _infer_color_role(self, color: ColorToken) -> Optional[str]:
188
- """Infer the semantic role of a color from its contexts."""
189
- all_context = " ".join(color.contexts + color.elements).lower()
190
-
191
- for role, keywords in self.color_role_keywords.items():
192
- for keyword in keywords:
193
- if keyword in all_context:
194
- return role
195
-
196
- # Try to infer from color category
197
- category = categorize_color(color.value)
198
- if category in ["gray", "white", "black"]:
199
- return "neutral"
200
-
201
- return None
202
-
203
- def _generate_color_name(self, color: ColorToken, role: str) -> str:
204
- """Generate a semantic name for a color."""
205
- # Determine shade level based on luminance
206
- rgb = parse_color(color.value)
207
- if rgb:
208
- luminance = (0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2]) / 255
209
- if luminance > 0.8:
210
- shade = "50"
211
- elif luminance > 0.6:
212
- shade = "200"
213
- elif luminance > 0.4:
214
- shade = "500"
215
- elif luminance > 0.2:
216
- shade = "700"
217
- else:
218
- shade = "900"
219
- else:
220
- shade = "500"
221
-
222
- return f"color.{role}.{shade}"
223
-
224
- def _generate_color_name_from_value(self, color: ColorToken) -> str:
225
- """Generate a name based on the color value itself."""
226
- category = categorize_color(color.value)
227
- rgb = parse_color(color.value)
228
-
229
- if rgb:
230
- luminance = (0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2]) / 255
231
- if luminance > 0.6:
232
- shade = "light"
233
- elif luminance > 0.3:
234
- shade = "base"
235
- else:
236
- shade = "dark"
237
- else:
238
- shade = "base"
239
-
240
- return f"color.{category}.{shade}"
241
-
242
- def _normalize_typography(self, typography: list[TypographyToken]) -> list[TypographyToken]:
243
- """
244
- Normalize typography tokens:
245
- - Deduplicate identical styles
246
- - Infer type scale categories
247
- - Assign suggested names
248
- """
249
- if not typography:
250
- return []
251
-
252
- # Deduplicate by unique style combination
253
- unique_typo = {}
254
- for typo in typography:
255
- key = f"{typo.font_family}|{typo.font_size}|{typo.font_weight}|{typo.line_height}"
256
- if key in unique_typo:
257
- existing = unique_typo[key]
258
- existing.frequency += typo.frequency
259
- existing.elements = list(set(existing.elements + typo.elements))
260
- else:
261
- unique_typo[key] = typo
262
-
263
- result = list(unique_typo.values())
264
-
265
- # Infer names based on size and elements
266
- for typo in result:
267
- typo.suggested_name = self._generate_typography_name(typo)
268
- typo.confidence = self._calculate_confidence(typo.frequency)
269
-
270
- # Sort by font size (largest first)
271
- result.sort(key=lambda t: -self._parse_font_size(t.font_size))
272
-
273
- return result
274
-
275
- def _generate_typography_name(self, typo: TypographyToken) -> str:
276
- """Generate a semantic name for typography."""
277
- size_px = self._parse_font_size(typo.font_size)
278
- elements = " ".join(typo.elements).lower()
279
-
280
- # Determine category from elements
281
- if any(h in elements for h in ["h1", "hero", "display"]):
282
- category = "display"
283
- elif any(h in elements for h in ["h2", "h3", "h4", "h5", "h6", "heading", "title"]):
284
- category = "heading"
285
- elif any(h in elements for h in ["label", "caption", "small", "meta"]):
286
- category = "label"
287
- elif any(h in elements for h in ["body", "p", "paragraph", "text"]):
288
- category = "body"
289
- else:
290
- category = "text"
291
-
292
- # Determine size tier
293
- if size_px >= 32:
294
- size_tier = "xl"
295
- elif size_px >= 24:
296
- size_tier = "lg"
297
- elif size_px >= 18:
298
- size_tier = "md"
299
- elif size_px >= 14:
300
- size_tier = "sm"
301
- else:
302
- size_tier = "xs"
303
-
304
- return f"font.{category}.{size_tier}"
305
-
306
- def _parse_font_size(self, size: str) -> float:
307
- """Parse font size string to pixels."""
308
- if not size:
309
- return 16
310
-
311
- size = size.lower().strip()
312
-
313
- # Handle px
314
- if "px" in size:
315
- try:
316
- return float(size.replace("px", ""))
317
- except ValueError:
318
- return 16
319
-
320
- # Handle rem (assume 16px base)
321
- if "rem" in size:
322
- try:
323
- return float(size.replace("rem", "")) * 16
324
- except ValueError:
325
- return 16
326
-
327
- # Handle em (assume 16px base)
328
- if "em" in size:
329
- try:
330
- return float(size.replace("em", "")) * 16
331
- except ValueError:
332
- return 16
333
-
334
- # Try plain number
335
- try:
336
- return float(size)
337
- except ValueError:
338
- return 16
339
-
340
- def _normalize_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]:
341
- """
342
- Normalize spacing tokens:
343
- - Merge similar values
344
- - Align to base-8 grid if close
345
- - Assign suggested names
346
- """
347
- if not spacing:
348
- return []
349
-
350
- # Deduplicate by value
351
- unique_spacing = {}
352
- for space in spacing:
353
- key = space.value
354
- if key in unique_spacing:
355
- existing = unique_spacing[key]
356
- existing.frequency += space.frequency
357
- existing.contexts = list(set(existing.contexts + space.contexts))
358
- else:
359
- unique_spacing[key] = space
360
-
361
- result = list(unique_spacing.values())
362
-
363
- # Merge very similar values
364
- result = self._merge_similar_spacing(result)
365
-
366
- # Assign names
367
- for space in result:
368
- space.suggested_name = self._generate_spacing_name(space)
369
- space.confidence = self._calculate_confidence(space.frequency)
370
-
371
- # Sort by value
372
- result.sort(key=lambda s: s.value_px)
373
-
374
- return result
375
-
376
- def _merge_similar_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]:
377
- """Merge spacing values that are very close."""
378
- if len(spacing) <= 1:
379
- return spacing
380
-
381
- # Sort by pixel value
382
- spacing.sort(key=lambda s: s.value_px)
383
-
384
- merged = []
385
- i = 0
386
-
387
- while i < len(spacing):
388
- current = spacing[i]
389
- group = [current]
390
-
391
- # Find adjacent similar values
392
- j = i + 1
393
- while j < len(spacing):
394
- if abs(spacing[j].value_px - current.value_px) <= self.spacing_merge_threshold:
395
- group.append(spacing[j])
396
- j += 1
397
- else:
398
- break
399
-
400
- # Merge group - prefer base-8 aligned value or most frequent
401
- group.sort(key=lambda s: (-s.fits_base_8, -s.frequency))
402
- primary = group[0]
403
-
404
- for other in group[1:]:
405
- primary.frequency += other.frequency
406
- primary.contexts = list(set(primary.contexts + other.contexts))
407
-
408
- merged.append(primary)
409
- i = j
410
-
411
- return merged
412
-
413
- def _generate_spacing_name(self, space: SpacingToken) -> str:
414
- """Generate a semantic name for spacing."""
415
- px = space.value_px
416
-
417
- # Map to t-shirt sizes based on value
418
- if px <= 2:
419
- size = "px"
420
- elif px <= 4:
421
- size = "0.5"
422
- elif px <= 8:
423
- size = "1"
424
- elif px <= 12:
425
- size = "1.5"
426
- elif px <= 16:
427
- size = "2"
428
- elif px <= 20:
429
- size = "2.5"
430
- elif px <= 24:
431
- size = "3"
432
- elif px <= 32:
433
- size = "4"
434
- elif px <= 40:
435
- size = "5"
436
- elif px <= 48:
437
- size = "6"
438
- elif px <= 64:
439
- size = "8"
440
- elif px <= 80:
441
- size = "10"
442
- elif px <= 96:
443
- size = "12"
444
- else:
445
- size = str(int(px / 4))
446
-
447
- return f"space.{size}"
448
-
449
- def _calculate_confidence(self, frequency: int) -> Confidence:
450
- """Calculate confidence based on frequency."""
451
- if frequency >= 10:
452
- return Confidence.HIGH
453
- elif frequency >= 3:
454
- return Confidence.MEDIUM
455
- else:
456
- return Confidence.LOW
457
-
458
-
459
- def normalize_tokens(extracted: ExtractedTokens) -> NormalizedTokens:
460
- """Convenience function to normalize tokens."""
461
- normalizer = TokenNormalizer()
462
- return normalizer.normalize(extracted)