riazmo commited on
Commit
b1b0b33
Β·
verified Β·
1 Parent(s): bfc258b

Upload semantic_analyzer.py

Browse files
Files changed (1) hide show
  1. agents/semantic_analyzer.py +877 -0
agents/semantic_analyzer.py ADDED
@@ -0,0 +1,877 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent 1C: Semantic Color Analyzer
3
+ Design System Extractor v2
4
+
5
+ Persona: Design System Semanticist
6
+
7
+ Responsibilities:
8
+ - Analyze colors based on their actual CSS usage
9
+ - Categorize into semantic roles (brand, text, background, border, feedback)
10
+ - Use LLM to understand color relationships and hierarchy
11
+ - Provide structured output for Stage 1 UI and Stage 2 analysis
12
+ """
13
+
14
+ import json
15
+ import re
16
+ from typing import Optional, Callable
17
+ from datetime import datetime
18
+
19
+ from core.color_utils import (
20
+ parse_color,
21
+ get_contrast_with_white,
22
+ get_contrast_with_black,
23
+ check_wcag_compliance,
24
+ )
25
+
26
+
27
+ class SemanticColorAnalyzer:
28
+ """
29
+ Analyzes extracted colors and categorizes them by semantic role.
30
+
31
+ Uses LLM to understand:
32
+ - Which colors are brand/primary colors (used on buttons, CTAs)
33
+ - Which colors are for text (used with 'color' property)
34
+ - Which colors are backgrounds (used with 'background-color')
35
+ - Which colors are borders (used with 'border-color')
36
+ - Which colors are feedback states (error, success, warning)
37
+ """
38
+
39
+ def __init__(self, llm_provider=None):
40
+ """
41
+ Initialize the semantic analyzer.
42
+
43
+ Args:
44
+ llm_provider: Optional LLM provider for AI analysis.
45
+ If None, uses rule-based fallback.
46
+ """
47
+ self.llm_provider = llm_provider
48
+ self.analysis_result = {}
49
+ self.logs = []
50
+
51
+ def log(self, message: str):
52
+ """Add timestamped log message."""
53
+ timestamp = datetime.now().strftime("%H:%M:%S")
54
+ self.logs.append(f"[{timestamp}] {message}")
55
+
56
+ def get_logs(self) -> str:
57
+ """Get all logs as string."""
58
+ return "\n".join(self.logs)
59
+
60
+ def _prepare_color_data_for_llm(self, colors: dict) -> str:
61
+ """
62
+ Prepare color data in a format optimized for LLM analysis.
63
+
64
+ Args:
65
+ colors: Dict of color tokens with metadata
66
+
67
+ Returns:
68
+ Formatted string for LLM prompt
69
+ """
70
+ color_entries = []
71
+
72
+ for name, token in colors.items():
73
+ # Handle both dict and object formats
74
+ if hasattr(token, 'value'):
75
+ hex_val = token.value
76
+ frequency = token.frequency
77
+ contexts = token.contexts if hasattr(token, 'contexts') else []
78
+ elements = token.elements if hasattr(token, 'elements') else []
79
+ css_props = token.css_properties if hasattr(token, 'css_properties') else []
80
+ else:
81
+ hex_val = token.get('value', '#000000')
82
+ frequency = token.get('frequency', 0)
83
+ contexts = token.get('contexts', [])
84
+ elements = token.get('elements', [])
85
+ css_props = token.get('css_properties', [])
86
+
87
+ # Calculate color properties
88
+ contrast_white = get_contrast_with_white(hex_val)
89
+ contrast_black = get_contrast_with_black(hex_val)
90
+
91
+ # Determine luminance
92
+ try:
93
+ r = int(hex_val[1:3], 16)
94
+ g = int(hex_val[3:5], 16)
95
+ b = int(hex_val[5:7], 16)
96
+ luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
97
+
98
+ # Calculate saturation
99
+ max_c = max(r, g, b)
100
+ min_c = min(r, g, b)
101
+ saturation = (max_c - min_c) / 255 if max_c > 0 else 0
102
+ except:
103
+ luminance = 0.5
104
+ saturation = 0
105
+
106
+ entry = {
107
+ "hex": hex_val,
108
+ "name": name,
109
+ "frequency": frequency,
110
+ "css_properties": css_props[:5], # Limit for prompt size
111
+ "elements": elements[:5],
112
+ "contexts": contexts[:3],
113
+ "luminance": round(luminance, 2),
114
+ "saturation": round(saturation, 2),
115
+ "contrast_on_white": round(contrast_white, 2),
116
+ "contrast_on_black": round(contrast_black, 2),
117
+ "aa_compliant_on_white": contrast_white >= 4.5,
118
+ }
119
+ color_entries.append(entry)
120
+
121
+ # Sort by frequency for LLM to see most important first
122
+ color_entries.sort(key=lambda x: -x['frequency'])
123
+
124
+ # Limit to top 50 colors for LLM (avoid token limits)
125
+ return json.dumps(color_entries[:50], indent=2)
126
+
127
+ def _build_llm_prompt(self, color_data: str) -> str:
128
+ """Build the prompt for LLM semantic analysis."""
129
+
130
+ return f"""You are a Design System Analyst specializing in color semantics.
131
+
132
+ TASK: Analyze these extracted colors and categorize them by their semantic role in the UI.
133
+
134
+ EXTRACTED COLORS (sorted by frequency):
135
+ {color_data}
136
+
137
+ ANALYSIS RULES:
138
+ 1. BRAND/PRIMARY colors are typically:
139
+ - Used on buttons, links, CTAs (elements: button, a, input[type=submit])
140
+ - Applied via background-color on interactive elements
141
+ - Saturated (saturation > 0.3) and not gray
142
+ - High frequency on interactive elements
143
+
144
+ 2. TEXT colors are typically:
145
+ - Applied via "color" CSS property (not background-color)
146
+ - Used on text elements (p, span, h1-h6, label)
147
+ - Form a hierarchy: primary (darkest), secondary (medium), muted (lightest)
148
+ - Low saturation (grays)
149
+
150
+ 3. BACKGROUND colors are typically:
151
+ - Applied via "background-color" on containers
152
+ - Used on div, section, main, body, card elements
153
+ - Light colors (luminance > 0.8) for light themes
154
+ - May include dark backgrounds for inverse sections
155
+
156
+ 4. BORDER colors are typically:
157
+ - Applied via border-color properties
158
+ - Often gray/neutral
159
+ - Lower frequency than text/background
160
+
161
+ 5. FEEDBACK colors are:
162
+ - Red variants = error
163
+ - Green variants = success
164
+ - Yellow/orange = warning
165
+ - Blue variants = info
166
+ - Often used with specific class contexts
167
+
168
+ OUTPUT FORMAT (JSON):
169
+ {{
170
+ "brand": {{
171
+ "primary": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
172
+ "secondary": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
173
+ "accent": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}}
174
+ }},
175
+ "text": {{
176
+ "primary": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
177
+ "secondary": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
178
+ "muted": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
179
+ "inverse": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}}
180
+ }},
181
+ "background": {{
182
+ "primary": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
183
+ "secondary": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
184
+ "tertiary": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
185
+ "inverse": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}}
186
+ }},
187
+ "border": {{
188
+ "default": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
189
+ "strong": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}}
190
+ }},
191
+ "feedback": {{
192
+ "error": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
193
+ "success": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
194
+ "warning": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}},
195
+ "info": {{"hex": "#xxx", "confidence": "high|medium|low", "reason": "..."}}
196
+ }},
197
+ "summary": {{
198
+ "total_colors_analyzed": 50,
199
+ "brand_colors_found": 2,
200
+ "has_clear_hierarchy": true,
201
+ "accessibility_notes": "..."
202
+ }}
203
+ }}
204
+
205
+ IMPORTANT:
206
+ - Only include roles where you found a matching color
207
+ - Set confidence based on how certain you are
208
+ - Provide brief reasoning for each categorization
209
+ - If no color fits a role, omit that key
210
+
211
+ Return ONLY valid JSON, no other text."""
212
+
213
+ def _rule_based_analysis(self, colors: dict) -> dict:
214
+ """
215
+ Fallback rule-based analysis when LLM is not available.
216
+
217
+ Uses heuristics based on:
218
+ - CSS properties (color vs background-color vs border-color)
219
+ - Element types (button, a, p, div, etc.)
220
+ - Color characteristics (saturation, luminance)
221
+ - Frequency
222
+ """
223
+ self.log(" Using rule-based analysis (no LLM)")
224
+
225
+ result = {
226
+ "brand": {},
227
+ "text": {},
228
+ "background": {},
229
+ "border": {},
230
+ "feedback": {},
231
+ "summary": {
232
+ "total_colors_analyzed": len(colors),
233
+ "brand_colors_found": 0,
234
+ "has_clear_hierarchy": False,
235
+ "accessibility_notes": "",
236
+ "method": "rule-based"
237
+ }
238
+ }
239
+
240
+ # Categorize colors
241
+ brand_candidates = []
242
+ text_candidates = []
243
+ background_candidates = []
244
+ border_candidates = []
245
+ feedback_candidates = {"error": [], "success": [], "warning": [], "info": []}
246
+
247
+ for name, token in colors.items():
248
+ # Extract data
249
+ if hasattr(token, 'value'):
250
+ hex_val = token.value
251
+ frequency = token.frequency
252
+ contexts = token.contexts if hasattr(token, 'contexts') else []
253
+ elements = token.elements if hasattr(token, 'elements') else []
254
+ css_props = token.css_properties if hasattr(token, 'css_properties') else []
255
+ else:
256
+ hex_val = token.get('value', '#000000')
257
+ frequency = token.get('frequency', 0)
258
+ contexts = token.get('contexts', [])
259
+ elements = token.get('elements', [])
260
+ css_props = token.get('css_properties', [])
261
+
262
+ # Calculate color properties
263
+ try:
264
+ r = int(hex_val[1:3], 16)
265
+ g = int(hex_val[3:5], 16)
266
+ b = int(hex_val[5:7], 16)
267
+ luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
268
+ max_c = max(r, g, b)
269
+ min_c = min(r, g, b)
270
+ saturation = (max_c - min_c) / 255 if max_c > 0 else 0
271
+
272
+ # Determine hue for feedback colors
273
+ if max_c == min_c:
274
+ hue = 0
275
+ elif max_c == r:
276
+ hue = 60 * ((g - b) / (max_c - min_c) % 6)
277
+ elif max_c == g:
278
+ hue = 60 * ((b - r) / (max_c - min_c) + 2)
279
+ else:
280
+ hue = 60 * ((r - g) / (max_c - min_c) + 4)
281
+ except:
282
+ luminance = 0.5
283
+ saturation = 0
284
+ hue = 0
285
+
286
+ color_info = {
287
+ "hex": hex_val,
288
+ "name": name,
289
+ "frequency": frequency,
290
+ "luminance": luminance,
291
+ "saturation": saturation,
292
+ "hue": hue,
293
+ "css_props": css_props,
294
+ "elements": elements,
295
+ "contexts": contexts,
296
+ }
297
+
298
+ # --- CATEGORIZATION RULES ---
299
+
300
+ # BRAND: Saturated colors - multiple detection methods
301
+ interactive_elements = ['button', 'a', 'input', 'select', 'submit', 'btn', 'cta']
302
+ is_interactive = any(el in str(elements).lower() for el in interactive_elements)
303
+ has_bg_prop = any('background' in str(p).lower() for p in css_props)
304
+
305
+ # Method 1: Interactive elements with background
306
+ if saturation > 0.25 and is_interactive and has_bg_prop:
307
+ brand_candidates.append(color_info)
308
+ # Method 2: Highly saturated + high frequency (works for Firecrawl)
309
+ elif saturation > 0.35 and frequency > 15:
310
+ brand_candidates.append(color_info)
311
+ # Method 3: Very saturated colors regardless of frequency
312
+ elif saturation > 0.5 and frequency > 5:
313
+ brand_candidates.append(color_info)
314
+ # Method 4: Cyan/Teal range (common brand colors)
315
+ elif 160 <= hue <= 200 and saturation > 0.4 and frequency > 10:
316
+ brand_candidates.append(color_info)
317
+ # Method 5: Lime/Green-Yellow (secondary brand colors)
318
+ elif 60 <= hue <= 90 and saturation > 0.5 and frequency > 5:
319
+ brand_candidates.append(color_info)
320
+
321
+ # TEXT: Low saturation, used with 'color' property
322
+ has_color_prop = any(p == 'color' or (p.endswith('-color') and 'background' not in p and 'border' not in p)
323
+ for p in css_props)
324
+ text_elements = ['p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'label', 'div', 'text']
325
+ is_text_element = any(el in str(elements).lower() for el in text_elements)
326
+
327
+ # Text detection - low saturation grays
328
+ if saturation < 0.15 and (has_color_prop or 'text' in str(contexts).lower()):
329
+ text_candidates.append(color_info)
330
+ elif saturation < 0.1 and 0.1 < luminance < 0.8: # Gray range
331
+ text_candidates.append(color_info)
332
+ elif saturation < 0.1 and luminance < 0.5 and frequency > 50: # Dark grays used a lot
333
+ text_candidates.append(color_info)
334
+
335
+ if saturation < 0.15 and (has_color_prop or 'text' in str(contexts).lower()):
336
+ text_candidates.append(color_info)
337
+ elif saturation < 0.1 and luminance < 0.7 and is_text_element:
338
+ text_candidates.append(color_info)
339
+
340
+ # BACKGROUND: Used with background-color on containers
341
+ container_elements = ['div', 'section', 'main', 'body', 'article', 'header', 'footer', 'card']
342
+ is_container = any(el in str(elements).lower() for el in container_elements)
343
+
344
+ if has_bg_prop and (is_container or 'background' in str(contexts).lower()):
345
+ if saturation < 0.15: # Mostly neutral backgrounds
346
+ background_candidates.append(color_info)
347
+
348
+ # BORDER: Used with border-color properties
349
+ has_border_prop = any('border' in str(p).lower() for p in css_props)
350
+
351
+ if has_border_prop or 'border' in str(contexts).lower():
352
+ border_candidates.append(color_info)
353
+
354
+ # FEEDBACK: Based on hue
355
+ if saturation > 0.3:
356
+ if 0 <= hue <= 30 or 330 <= hue <= 360: # Red
357
+ feedback_candidates["error"].append(color_info)
358
+ elif 90 <= hue <= 150: # Green
359
+ feedback_candidates["success"].append(color_info)
360
+ elif 30 <= hue <= 60: # Yellow/Orange
361
+ feedback_candidates["warning"].append(color_info)
362
+ elif 180 <= hue <= 250: # Blue
363
+ feedback_candidates["info"].append(color_info)
364
+
365
+ # --- SELECT BEST CANDIDATES ---
366
+
367
+ # Brand: Sort by frequency * saturation
368
+ brand_candidates.sort(key=lambda x: -(x['frequency'] * x['saturation']))
369
+ if brand_candidates:
370
+ result["brand"]["primary"] = {
371
+ "hex": brand_candidates[0]["hex"],
372
+ "confidence": "high" if brand_candidates[0]["frequency"] > 20 else "medium",
373
+ "reason": f"Most frequent saturated color on interactive elements (freq: {brand_candidates[0]['frequency']})"
374
+ }
375
+ result["summary"]["brand_colors_found"] += 1
376
+ if len(brand_candidates) > 1:
377
+ result["brand"]["secondary"] = {
378
+ "hex": brand_candidates[1]["hex"],
379
+ "confidence": "medium",
380
+ "reason": f"Second most frequent brand color (freq: {brand_candidates[1]['frequency']})"
381
+ }
382
+ result["summary"]["brand_colors_found"] += 1
383
+
384
+ # Text: Sort by luminance (darkest first for primary)
385
+ text_candidates.sort(key=lambda x: x['luminance'])
386
+ if text_candidates:
387
+ result["text"]["primary"] = {
388
+ "hex": text_candidates[0]["hex"],
389
+ "confidence": "high" if text_candidates[0]["luminance"] < 0.3 else "medium",
390
+ "reason": f"Darkest text color (luminance: {text_candidates[0]['luminance']:.2f})"
391
+ }
392
+ if len(text_candidates) > 1:
393
+ # Find secondary (mid-luminance)
394
+ mid_idx = len(text_candidates) // 2
395
+ result["text"]["secondary"] = {
396
+ "hex": text_candidates[mid_idx]["hex"],
397
+ "confidence": "medium",
398
+ "reason": f"Mid-tone text color (luminance: {text_candidates[mid_idx]['luminance']:.2f})"
399
+ }
400
+ if len(text_candidates) > 2:
401
+ result["text"]["muted"] = {
402
+ "hex": text_candidates[-1]["hex"],
403
+ "confidence": "medium",
404
+ "reason": f"Lightest text color (luminance: {text_candidates[-1]['luminance']:.2f})"
405
+ }
406
+
407
+ # Check for text hierarchy
408
+ if len(text_candidates) >= 3:
409
+ result["summary"]["has_clear_hierarchy"] = True
410
+
411
+ # Background: Sort by luminance (lightest first for primary)
412
+ background_candidates.sort(key=lambda x: -x['luminance'])
413
+ if background_candidates:
414
+ result["background"]["primary"] = {
415
+ "hex": background_candidates[0]["hex"],
416
+ "confidence": "high" if background_candidates[0]["luminance"] > 0.9 else "medium",
417
+ "reason": f"Lightest background (luminance: {background_candidates[0]['luminance']:.2f})"
418
+ }
419
+ if len(background_candidates) > 1:
420
+ result["background"]["secondary"] = {
421
+ "hex": background_candidates[1]["hex"],
422
+ "confidence": "medium",
423
+ "reason": f"Secondary background (luminance: {background_candidates[1]['luminance']:.2f})"
424
+ }
425
+ # Find dark background for inverse
426
+ dark_bgs = [c for c in background_candidates if c['luminance'] < 0.2]
427
+ if dark_bgs:
428
+ result["background"]["inverse"] = {
429
+ "hex": dark_bgs[0]["hex"],
430
+ "confidence": "medium",
431
+ "reason": f"Dark background for inverse sections (luminance: {dark_bgs[0]['luminance']:.2f})"
432
+ }
433
+
434
+ # Border: Sort by frequency
435
+ border_candidates.sort(key=lambda x: -x['frequency'])
436
+ if border_candidates:
437
+ result["border"]["default"] = {
438
+ "hex": border_candidates[0]["hex"],
439
+ "confidence": "medium",
440
+ "reason": f"Most common border color (freq: {border_candidates[0]['frequency']})"
441
+ }
442
+
443
+ # Feedback: Pick highest frequency for each
444
+ for feedback_type, candidates in feedback_candidates.items():
445
+ if candidates:
446
+ candidates.sort(key=lambda x: -x['frequency'])
447
+ result["feedback"][feedback_type] = {
448
+ "hex": candidates[0]["hex"],
449
+ "confidence": "medium",
450
+ "reason": f"Detected {feedback_type} color by hue analysis"
451
+ }
452
+
453
+ return result
454
+
455
+ async def analyze_with_llm(self, colors: dict, log_callback: Optional[Callable] = None) -> dict:
456
+ """
457
+ Analyze colors using LLM for semantic categorization.
458
+
459
+ Args:
460
+ colors: Dict of color tokens
461
+ log_callback: Optional callback for logging
462
+
463
+ Returns:
464
+ Semantic analysis result
465
+ """
466
+ def log(msg):
467
+ self.log(msg)
468
+ if log_callback:
469
+ log_callback(msg)
470
+
471
+ log("")
472
+ log("=" * 60)
473
+ log("🧠 SEMANTIC COLOR ANALYSIS (LLM)")
474
+ log("=" * 60)
475
+ log("")
476
+
477
+ # Prepare data for LLM
478
+ log(" πŸ“Š Preparing color data for analysis...")
479
+ color_data = self._prepare_color_data_for_llm(colors)
480
+ log(f" βœ… Prepared {min(50, len(colors))} colors for analysis")
481
+
482
+ # Check if LLM provider is available
483
+ if self.llm_provider is None:
484
+ log(" ⚠️ No LLM provider configured, using rule-based analysis")
485
+ self.analysis_result = self._rule_based_analysis(colors)
486
+ else:
487
+ try:
488
+ log(" πŸ€– Calling LLM for semantic analysis...")
489
+
490
+ prompt = self._build_llm_prompt(color_data)
491
+
492
+ # Call LLM
493
+ response = await self.llm_provider.generate(
494
+ prompt=prompt,
495
+ max_tokens=2000,
496
+ temperature=0.3, # Low temperature for consistent categorization
497
+ )
498
+
499
+ log(" βœ… LLM response received")
500
+
501
+ # Parse JSON response
502
+ try:
503
+ # Extract JSON from response
504
+ json_match = re.search(r'\{[\s\S]*\}', response)
505
+ if json_match:
506
+ self.analysis_result = json.loads(json_match.group())
507
+ self.analysis_result["summary"]["method"] = "llm"
508
+ log(" βœ… Successfully parsed LLM analysis")
509
+ else:
510
+ raise ValueError("No JSON found in response")
511
+
512
+ except json.JSONDecodeError as e:
513
+ log(f" ⚠️ Failed to parse LLM response: {e}")
514
+ log(" πŸ”„ Falling back to rule-based analysis")
515
+ self.analysis_result = self._rule_based_analysis(colors)
516
+
517
+ except Exception as e:
518
+ log(f" ❌ LLM analysis failed: {str(e)}")
519
+ log(" πŸ”„ Falling back to rule-based analysis")
520
+ self.analysis_result = self._rule_based_analysis(colors)
521
+
522
+ # Log results
523
+ self._log_analysis_results(log)
524
+
525
+ return self.analysis_result
526
+
527
+ def analyze_sync(self, colors: dict, log_callback: Optional[Callable] = None) -> dict:
528
+ """
529
+ Synchronous analysis using rule-based approach.
530
+
531
+ Args:
532
+ colors: Dict of color tokens
533
+ log_callback: Optional callback for logging
534
+
535
+ Returns:
536
+ Semantic analysis result
537
+ """
538
+ def log(msg):
539
+ self.log(msg)
540
+ if log_callback:
541
+ log_callback(msg)
542
+
543
+ log("")
544
+ log("=" * 60)
545
+ log("🧠 SEMANTIC COLOR ANALYSIS")
546
+ log("=" * 60)
547
+ log("")
548
+
549
+ log(f" πŸ“Š Analyzing {len(colors)} colors...")
550
+
551
+ self.analysis_result = self._rule_based_analysis(colors)
552
+
553
+ # Log results
554
+ self._log_analysis_results(log)
555
+
556
+ return self.analysis_result
557
+
558
+ def _log_analysis_results(self, log: Callable):
559
+ """Log the analysis results in a formatted way."""
560
+
561
+ result = self.analysis_result
562
+
563
+ log("")
564
+ log("πŸ“Š SEMANTIC ANALYSIS RESULTS:")
565
+ log("")
566
+
567
+ # Brand colors
568
+ if result.get("brand"):
569
+ log(" 🎨 BRAND COLORS:")
570
+ for role, data in result["brand"].items():
571
+ if data:
572
+ log(f" {role}: {data['hex']} ({data['confidence']})")
573
+ log(f" └─ {data['reason']}")
574
+
575
+ # Text colors
576
+ if result.get("text"):
577
+ log("")
578
+ log(" πŸ“ TEXT COLORS:")
579
+ for role, data in result["text"].items():
580
+ if data:
581
+ log(f" {role}: {data['hex']} ({data['confidence']})")
582
+
583
+ # Background colors
584
+ if result.get("background"):
585
+ log("")
586
+ log(" πŸ–ΌοΈ BACKGROUND COLORS:")
587
+ for role, data in result["background"].items():
588
+ if data:
589
+ log(f" {role}: {data['hex']} ({data['confidence']})")
590
+
591
+ # Border colors
592
+ if result.get("border"):
593
+ log("")
594
+ log(" πŸ“ BORDER COLORS:")
595
+ for role, data in result["border"].items():
596
+ if data:
597
+ log(f" {role}: {data['hex']} ({data['confidence']})")
598
+
599
+ # Feedback colors
600
+ if result.get("feedback"):
601
+ log("")
602
+ log(" 🚨 FEEDBACK COLORS:")
603
+ for role, data in result["feedback"].items():
604
+ if data:
605
+ log(f" {role}: {data['hex']} ({data['confidence']})")
606
+
607
+ # Summary
608
+ summary = result.get("summary", {})
609
+ log("")
610
+ log(" πŸ“ˆ SUMMARY:")
611
+ log(f" Total colors analyzed: {summary.get('total_colors_analyzed', 0)}")
612
+ log(f" Brand colors found: {summary.get('brand_colors_found', 0)}")
613
+ log(f" Clear hierarchy: {'Yes' if summary.get('has_clear_hierarchy') else 'No'}")
614
+ log(f" Analysis method: {summary.get('method', 'unknown')}")
615
+ log("")
616
+
617
+
618
+ def generate_semantic_preview_html(analysis_result: dict) -> str:
619
+ """
620
+ Generate HTML preview showing colors organized by semantic role.
621
+
622
+ Args:
623
+ analysis_result: Output from SemanticColorAnalyzer
624
+
625
+ Returns:
626
+ HTML string for Gradio HTML component
627
+ """
628
+
629
+ # Handle empty or invalid result
630
+ if not analysis_result:
631
+ return '''
632
+ <div style="padding: 40px; text-align: center; background: #fff3cd !important; border-radius: 8px; border: 1px solid #ffc107;">
633
+ <p style="color: #856404 !important; font-size: 14px; margin: 0;">
634
+ ⚠️ Semantic analysis did not produce results. Check the logs for errors.
635
+ </p>
636
+ </div>
637
+ '''
638
+
639
+ def color_card(hex_val: str, role: str, confidence: str, reason: str = "") -> str:
640
+ """Generate HTML for a single color card."""
641
+ # Determine text color based on luminance
642
+ try:
643
+ r = int(hex_val[1:3], 16)
644
+ g = int(hex_val[3:5], 16)
645
+ b = int(hex_val[5:7], 16)
646
+ luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
647
+ text_color = "#1a1a1a" if luminance > 0.5 else "#ffffff"
648
+ except:
649
+ text_color = "#1a1a1a"
650
+
651
+ confidence_badge = {
652
+ "high": '<span class="confidence high">High</span>',
653
+ "medium": '<span class="confidence medium">Medium</span>',
654
+ "low": '<span class="confidence low">Low</span>',
655
+ }.get(confidence, "")
656
+
657
+ return f'''
658
+ <div class="sem-color-card">
659
+ <div class="sem-color-swatch" style="background-color: {hex_val};">
660
+ <span class="sem-hex-label" style="color: {text_color};">{hex_val}</span>
661
+ </div>
662
+ <div class="sem-color-details">
663
+ <div class="sem-role-name">{role.replace("_", " ").title()}</div>
664
+ {confidence_badge}
665
+ </div>
666
+ </div>
667
+ '''
668
+
669
+ def category_section(title: str, icon: str, colors: dict) -> str:
670
+ """Generate HTML for a category section."""
671
+ if not colors:
672
+ return ""
673
+
674
+ cards_html = ""
675
+ for role, data in colors.items():
676
+ if data and isinstance(data, dict) and "hex" in data:
677
+ cards_html += color_card(
678
+ data["hex"],
679
+ role,
680
+ data.get("confidence", "medium"),
681
+ data.get("reason", "")
682
+ )
683
+
684
+ if not cards_html:
685
+ return ""
686
+
687
+ return f'''
688
+ <div class="sem-category-section">
689
+ <h3 class="sem-category-title">{icon} {title}</h3>
690
+ <div class="sem-color-grid">
691
+ {cards_html}
692
+ </div>
693
+ </div>
694
+ '''
695
+
696
+ # Build sections
697
+ sections_html = ""
698
+ sections_html += category_section("Brand Colors", "🎨", analysis_result.get("brand", {}))
699
+ sections_html += category_section("Text Colors", "πŸ“", analysis_result.get("text", {}))
700
+ sections_html += category_section("Background Colors", "πŸ–ΌοΈ", analysis_result.get("background", {}))
701
+ sections_html += category_section("Border Colors", "πŸ“", analysis_result.get("border", {}))
702
+ sections_html += category_section("Feedback Colors", "🚨", analysis_result.get("feedback", {}))
703
+
704
+ # Check if any sections were created
705
+ if not sections_html.strip():
706
+ return '''
707
+ <div style="padding: 40px; text-align: center; background: #fff3cd !important; border-radius: 8px; border: 1px solid #ffc107;">
708
+ <p style="color: #856404 !important; font-size: 14px; margin: 0;">
709
+ ⚠️ No semantic color categories were detected. The colors may not have enough context data (elements, CSS properties) for classification.
710
+ </p>
711
+ </div>
712
+ '''
713
+
714
+ # Summary
715
+ summary = analysis_result.get("summary", {})
716
+ summary_html = f'''
717
+ <div class="sem-summary-section">
718
+ <h3 class="sem-summary-title">πŸ“ˆ Analysis Summary</h3>
719
+ <div class="sem-summary-stats">
720
+ <div class="sem-stat">
721
+ <span class="sem-stat-value">{summary.get("total_colors_analyzed", 0)}</span>
722
+ <span class="sem-stat-label">Colors Analyzed</span>
723
+ </div>
724
+ <div class="sem-stat">
725
+ <span class="sem-stat-value">{summary.get("brand_colors_found", 0)}</span>
726
+ <span class="sem-stat-label">Brand Colors</span>
727
+ </div>
728
+ <div class="sem-stat">
729
+ <span class="sem-stat-value">{"βœ“" if summary.get("has_clear_hierarchy") else "βœ—"}</span>
730
+ <span class="sem-stat-label">Clear Hierarchy</span>
731
+ </div>
732
+ <div class="sem-stat">
733
+ <span class="sem-stat-value">{summary.get("method", "rule-based").upper()}</span>
734
+ <span class="sem-stat-label">Analysis Method</span>
735
+ </div>
736
+ </div>
737
+ </div>
738
+ '''
739
+
740
+ html = f'''
741
+ <style>
742
+ .sem-preview {{
743
+ font-family: system-ui, -apple-system, sans-serif;
744
+ padding: 20px;
745
+ background: #f5f5f5 !important;
746
+ border-radius: 12px;
747
+ }}
748
+
749
+ .sem-category-section {{
750
+ margin-bottom: 24px;
751
+ background: #ffffff !important;
752
+ border-radius: 8px;
753
+ padding: 16px;
754
+ border: 1px solid #d0d0d0 !important;
755
+ }}
756
+
757
+ .sem-category-title {{
758
+ font-size: 16px;
759
+ font-weight: 700;
760
+ color: #1a1a1a !important;
761
+ margin: 0 0 16px 0;
762
+ padding-bottom: 8px;
763
+ border-bottom: 2px solid #e0e0e0 !important;
764
+ }}
765
+
766
+ .sem-color-grid {{
767
+ display: grid;
768
+ grid-template-columns: repeat(auto-fill, minmax(140px, 1fr));
769
+ gap: 12px;
770
+ }}
771
+
772
+ .sem-color-card {{
773
+ background: #f0f0f0 !important;
774
+ border-radius: 8px;
775
+ overflow: hidden;
776
+ border: 1px solid #d0d0d0 !important;
777
+ }}
778
+
779
+ .sem-color-swatch {{
780
+ height: 80px;
781
+ display: flex;
782
+ align-items: center;
783
+ justify-content: center;
784
+ }}
785
+
786
+ .sem-hex-label {{
787
+ font-family: 'SF Mono', Monaco, monospace;
788
+ font-size: 12px;
789
+ font-weight: 600;
790
+ text-shadow: 0 1px 2px rgba(0,0,0,0.3);
791
+ }}
792
+
793
+ .sem-color-details {{
794
+ padding: 10px;
795
+ text-align: center;
796
+ background: #ffffff !important;
797
+ }}
798
+
799
+ .sem-role-name {{
800
+ font-size: 12px;
801
+ font-weight: 600;
802
+ color: #1a1a1a !important;
803
+ margin-bottom: 4px;
804
+ }}
805
+
806
+ .sem-preview .confidence {{
807
+ font-size: 10px;
808
+ padding: 2px 8px;
809
+ border-radius: 10px;
810
+ font-weight: 500;
811
+ display: inline-block;
812
+ }}
813
+
814
+ .sem-preview .confidence.high {{
815
+ background: #dcfce7 !important;
816
+ color: #166534 !important;
817
+ }}
818
+
819
+ .sem-preview .confidence.medium {{
820
+ background: #fef9c3 !important;
821
+ color: #854d0e !important;
822
+ }}
823
+
824
+ .sem-preview .confidence.low {{
825
+ background: #fee2e2 !important;
826
+ color: #991b1b !important;
827
+ }}
828
+
829
+ .sem-summary-section {{
830
+ background: #ffffff !important;
831
+ border-radius: 8px;
832
+ padding: 16px;
833
+ border: 1px solid #d0d0d0 !important;
834
+ }}
835
+
836
+ .sem-summary-title {{
837
+ font-size: 16px;
838
+ font-weight: 700;
839
+ color: #1a1a1a !important;
840
+ margin: 0 0 16px 0;
841
+ }}
842
+
843
+ .sem-summary-stats {{
844
+ display: grid;
845
+ grid-template-columns: repeat(4, 1fr);
846
+ gap: 16px;
847
+ }}
848
+
849
+ .sem-stat {{
850
+ text-align: center;
851
+ padding: 12px;
852
+ background: #f0f0f0 !important;
853
+ border-radius: 8px;
854
+ }}
855
+
856
+ .sem-stat-value {{
857
+ display: block;
858
+ font-size: 24px;
859
+ font-weight: 700;
860
+ color: #1a1a1a !important;
861
+ }}
862
+
863
+ .sem-stat-label {{
864
+ display: block;
865
+ font-size: 11px;
866
+ color: #555 !important;
867
+ margin-top: 4px;
868
+ }}
869
+ </style>
870
+
871
+ <div class="sem-preview">
872
+ {sections_html}
873
+ {summary_html}
874
+ </div>
875
+ '''
876
+
877
+ return html