Upload extractor.py
Browse files- agents/extractor.py +78 -5
agents/extractor.py
CHANGED
|
@@ -68,13 +68,16 @@ class TokenExtractor:
|
|
| 68 |
self.spacing: dict[str, SpacingToken] = {}
|
| 69 |
self.radius: dict[str, RadiusToken] = {}
|
| 70 |
self.shadows: dict[str, ShadowToken] = {}
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
| 72 |
# CSS Variables collection
|
| 73 |
self.css_variables: dict[str, str] = {}
|
| 74 |
-
|
| 75 |
# Font tracking
|
| 76 |
self.font_families: dict[str, FontFamily] = {}
|
| 77 |
-
|
| 78 |
# Statistics
|
| 79 |
self.total_elements = 0
|
| 80 |
self.errors: list[str] = []
|
|
@@ -258,7 +261,70 @@ class TokenExtractor:
|
|
| 258 |
""")
|
| 259 |
|
| 260 |
return styles_data
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
async def _extract_css_variables(self, page: Page) -> dict:
|
| 263 |
"""
|
| 264 |
Extract CSS custom properties (variables) from :root and stylesheets.
|
|
@@ -1038,7 +1104,14 @@ class TokenExtractor:
|
|
| 1038 |
self._aggregate_colors(page_colors)
|
| 1039 |
except Exception as e:
|
| 1040 |
self.warnings.append(f"Page scan failed: {str(e)}")
|
| 1041 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1042 |
# =========================================================
|
| 1043 |
# Log extraction results for this page
|
| 1044 |
# =========================================================
|
|
|
|
| 68 |
self.spacing: dict[str, SpacingToken] = {}
|
| 69 |
self.radius: dict[str, RadiusToken] = {}
|
| 70 |
self.shadows: dict[str, ShadowToken] = {}
|
| 71 |
+
|
| 72 |
+
# Foreground-background pairs extracted from actual DOM elements
|
| 73 |
+
self.fg_bg_pairs: list[dict] = []
|
| 74 |
+
|
| 75 |
# CSS Variables collection
|
| 76 |
self.css_variables: dict[str, str] = {}
|
| 77 |
+
|
| 78 |
# Font tracking
|
| 79 |
self.font_families: dict[str, FontFamily] = {}
|
| 80 |
+
|
| 81 |
# Statistics
|
| 82 |
self.total_elements = 0
|
| 83 |
self.errors: list[str] = []
|
|
|
|
| 261 |
""")
|
| 262 |
|
| 263 |
return styles_data
|
| 264 |
+
|
| 265 |
+
async def _extract_fg_bg_pairs(self, page: Page) -> list[dict]:
|
| 266 |
+
"""
|
| 267 |
+
Extract actual foreground-background color pairs from visible DOM elements.
|
| 268 |
+
|
| 269 |
+
For each visible element that has a non-transparent text color, walk up the
|
| 270 |
+
ancestor chain to find the effective background color. This gives us real
|
| 271 |
+
foreground/background pairs so we can do accurate WCAG AA checks instead of
|
| 272 |
+
only comparing every color against white/black.
|
| 273 |
+
"""
|
| 274 |
+
pairs = await page.evaluate("""
|
| 275 |
+
() => {
|
| 276 |
+
const pairs = [];
|
| 277 |
+
const seen = new Set();
|
| 278 |
+
|
| 279 |
+
function rgbToHex(rgb) {
|
| 280 |
+
if (!rgb || rgb === 'transparent' || rgb === 'rgba(0, 0, 0, 0)') return null;
|
| 281 |
+
const match = rgb.match(/rgba?\\((\\d+),\\s*(\\d+),\\s*(\\d+)/);
|
| 282 |
+
if (!match) return null;
|
| 283 |
+
const r = parseInt(match[1]);
|
| 284 |
+
const g = parseInt(match[2]);
|
| 285 |
+
const b = parseInt(match[3]);
|
| 286 |
+
return '#' + [r, g, b].map(c => c.toString(16).padStart(2, '0')).join('');
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
function getEffectiveBackground(el) {
|
| 290 |
+
let current = el;
|
| 291 |
+
while (current && current !== document.documentElement) {
|
| 292 |
+
const bg = window.getComputedStyle(current).backgroundColor;
|
| 293 |
+
if (bg && bg !== 'rgba(0, 0, 0, 0)' && bg !== 'transparent') {
|
| 294 |
+
return rgbToHex(bg);
|
| 295 |
+
}
|
| 296 |
+
current = current.parentElement;
|
| 297 |
+
}
|
| 298 |
+
return '#ffffff'; // default page background
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
const elements = document.querySelectorAll('*');
|
| 302 |
+
elements.forEach(el => {
|
| 303 |
+
const styles = window.getComputedStyle(el);
|
| 304 |
+
if (styles.display === 'none' || styles.visibility === 'hidden') return;
|
| 305 |
+
|
| 306 |
+
const fg = rgbToHex(styles.color);
|
| 307 |
+
if (!fg) return;
|
| 308 |
+
|
| 309 |
+
const bg = getEffectiveBackground(el);
|
| 310 |
+
if (!bg) return;
|
| 311 |
+
|
| 312 |
+
const key = fg + '|' + bg;
|
| 313 |
+
if (seen.has(key)) return;
|
| 314 |
+
seen.add(key);
|
| 315 |
+
|
| 316 |
+
pairs.push({
|
| 317 |
+
foreground: fg,
|
| 318 |
+
background: bg,
|
| 319 |
+
element: el.tagName.toLowerCase(),
|
| 320 |
+
});
|
| 321 |
+
});
|
| 322 |
+
|
| 323 |
+
return pairs;
|
| 324 |
+
}
|
| 325 |
+
""")
|
| 326 |
+
return pairs or []
|
| 327 |
+
|
| 328 |
async def _extract_css_variables(self, page: Page) -> dict:
|
| 329 |
"""
|
| 330 |
Extract CSS custom properties (variables) from :root and stylesheets.
|
|
|
|
| 1104 |
self._aggregate_colors(page_colors)
|
| 1105 |
except Exception as e:
|
| 1106 |
self.warnings.append(f"Page scan failed: {str(e)}")
|
| 1107 |
+
|
| 1108 |
+
# 8. Extract foreground-background color pairs for real AA checks
|
| 1109 |
+
try:
|
| 1110 |
+
fg_bg = await self._extract_fg_bg_pairs(page)
|
| 1111 |
+
self.fg_bg_pairs.extend(fg_bg)
|
| 1112 |
+
except Exception as e:
|
| 1113 |
+
self.warnings.append(f"FG/BG pair extraction failed: {str(e)}")
|
| 1114 |
+
|
| 1115 |
# =========================================================
|
| 1116 |
# Log extraction results for this page
|
| 1117 |
# =========================================================
|