Spaces:
Sleeping
Sleeping
| """ | |
| Test Verification Script | |
| Verifies that all 13 user-annotated differences are detected | |
| """ | |
| import os | |
| import sys | |
| from typing import Dict, List, Any | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| from state_schema import VisualDifference | |
| class TestVerification: | |
| """Verify detection of user-annotated differences""" | |
| # User's 13 manually annotated differences | |
| USER_ANNOTATED_DIFFERENCES = [ | |
| { | |
| "id": 1, | |
| "name": "Header height difference", | |
| "category": "Layout & Structure", | |
| "severity": "High", | |
| "description": "Header height differs between design and development" | |
| }, | |
| { | |
| "id": 2, | |
| "name": "Container width differs", | |
| "category": "Layout & Structure", | |
| "severity": "High", | |
| "description": "Main container width is different" | |
| }, | |
| { | |
| "id": 3, | |
| "name": "Checkout placement difference", | |
| "category": "Components & Elements", | |
| "severity": "High", | |
| "description": "Checkout heading is positioned differently" | |
| }, | |
| { | |
| "id": 4, | |
| "name": "Font family, size, letter spacing differs", | |
| "category": "Typography", | |
| "severity": "High", | |
| "description": "Typography properties changed" | |
| }, | |
| { | |
| "id": 5, | |
| "name": "Login link missing", | |
| "category": "Components & Elements", | |
| "severity": "High", | |
| "description": "Login link component is missing in development" | |
| }, | |
| { | |
| "id": 6, | |
| "name": "Payment component not visible", | |
| "category": "Components & Elements", | |
| "severity": "High", | |
| "description": "Payment component is hidden or not rendered" | |
| }, | |
| { | |
| "id": 7, | |
| "name": "Button size, height, color, no elevation/shadow", | |
| "category": "Buttons & Interactive", | |
| "severity": "High", | |
| "description": "Button styling differs" | |
| }, | |
| { | |
| "id": 8, | |
| "name": "Payment methods design missing", | |
| "category": "Components & Elements", | |
| "severity": "High", | |
| "description": "Payment methods section is missing" | |
| }, | |
| { | |
| "id": 9, | |
| "name": "Contact info & step number missing, font bold", | |
| "category": "Typography", | |
| "severity": "High", | |
| "description": "Contact info styling and visibility changed" | |
| }, | |
| { | |
| "id": 10, | |
| "name": "Icons missing", | |
| "category": "Components & Elements", | |
| "severity": "High", | |
| "description": "Various icons are not displayed" | |
| }, | |
| { | |
| "id": 11, | |
| "name": "Padding (left, right) differs", | |
| "category": "Spacing & Sizing", | |
| "severity": "Medium", | |
| "description": "Horizontal padding is different" | |
| }, | |
| { | |
| "id": 12, | |
| "name": "Image size different", | |
| "category": "Components & Elements", | |
| "severity": "Medium", | |
| "description": "Product images have different dimensions" | |
| }, | |
| { | |
| "id": 13, | |
| "name": "Spacing between components differs", | |
| "category": "Spacing & Sizing", | |
| "severity": "Medium", | |
| "description": "Gap between components is different" | |
| } | |
| ] | |
| def __init__(self): | |
| """Initialize verifier""" | |
| self.detected_differences: List[VisualDifference] = [] | |
| self.verification_results: Dict[str, Any] = {} | |
| def verify_detection(self, detected_differences: List[Any]) -> Dict[str, Any]: | |
| """ | |
| Verify that detected differences match user-annotated ones | |
| Args: | |
| detected_differences: List of detected differences | |
| Returns: | |
| Verification report | |
| """ | |
| self.detected_differences = detected_differences | |
| # Convert to list of dicts for easier comparison | |
| detected_list = [] | |
| for diff in detected_differences: | |
| if hasattr(diff, '__dict__'): | |
| detected_list.append(diff.__dict__) | |
| else: | |
| detected_list.append(diff) | |
| # Check each user-annotated difference | |
| results = { | |
| "total_annotated": len(self.USER_ANNOTATED_DIFFERENCES), | |
| "total_detected": len(detected_list), | |
| "matches": [], | |
| "missing": [], | |
| "extra": [] | |
| } | |
| # Find matches | |
| for annotated in self.USER_ANNOTATED_DIFFERENCES: | |
| found = False | |
| for detected in detected_list: | |
| if self._is_match(annotated, detected): | |
| results["matches"].append({ | |
| "annotated_id": annotated["id"], | |
| "annotated_name": annotated["name"], | |
| "detected_name": detected.get("name", ""), | |
| "category": annotated["category"], | |
| "severity": annotated["severity"] | |
| }) | |
| found = True | |
| break | |
| if not found: | |
| results["missing"].append({ | |
| "id": annotated["id"], | |
| "name": annotated["name"], | |
| "category": annotated["category"], | |
| "severity": annotated["severity"] | |
| }) | |
| # Find extra detections | |
| for detected in detected_list: | |
| found = False | |
| for annotated in self.USER_ANNOTATED_DIFFERENCES: | |
| if self._is_match(annotated, detected): | |
| found = True | |
| break | |
| if not found: | |
| results["extra"].append({ | |
| "name": detected.get("name", ""), | |
| "category": detected.get("category", ""), | |
| "severity": detected.get("severity", "") | |
| }) | |
| # Calculate detection rate | |
| results["detection_rate"] = (len(results["matches"]) / len(self.USER_ANNOTATED_DIFFERENCES)) * 100 | |
| self.verification_results = results | |
| return results | |
| def _is_match(self, annotated: Dict, detected: Dict) -> bool: | |
| """ | |
| Check if annotated and detected differences match | |
| Args: | |
| annotated: User-annotated difference | |
| detected: Detected difference | |
| Returns: | |
| True if they match | |
| """ | |
| # Match by name similarity | |
| annotated_name = annotated["name"].lower() | |
| detected_name = detected.get("name", "").lower() if isinstance(detected, dict) else "" | |
| # Check for keyword matches | |
| keywords = { | |
| "header": ["header", "height"], | |
| "container": ["container", "width"], | |
| "checkout": ["checkout", "placement"], | |
| "font": ["font", "typography"], | |
| "login": ["login", "link"], | |
| "payment": ["payment"], | |
| "button": ["button", "size", "color", "shadow", "elevation"], | |
| "contact": ["contact", "info"], | |
| "icon": ["icon"], | |
| "padding": ["padding"], | |
| "image": ["image", "size"], | |
| "spacing": ["spacing", "gap", "component"] | |
| } | |
| for key, terms in keywords.items(): | |
| if any(term in annotated_name for term in terms): | |
| if any(term in detected_name for term in terms): | |
| return True | |
| # Direct name match | |
| if annotated_name == detected_name: | |
| return True | |
| # Category and severity match | |
| if (annotated.get("category") == detected.get("category") and | |
| annotated.get("severity") == detected.get("severity")): | |
| # Check if description contains similar keywords | |
| annotated_desc = annotated.get("description", "").lower() | |
| detected_desc = detected.get("description", "").lower() if isinstance(detected, dict) else "" | |
| if any(word in detected_desc for word in annotated_desc.split()): | |
| return True | |
| return False | |
| def print_verification_report(self): | |
| """Print verification report""" | |
| if not self.verification_results: | |
| print("No verification results available") | |
| return | |
| results = self.verification_results | |
| print("\n" + "="*70) | |
| print("๐ VERIFICATION REPORT: USER-ANNOTATED DIFFERENCES") | |
| print("="*70) | |
| print(f"\n๐ Summary:") | |
| print(f" Total Annotated Differences: {results['total_annotated']}") | |
| print(f" Total Detected Differences: {results['total_detected']}") | |
| print(f" Matched: {len(results['matches'])}") | |
| print(f" Missing: {len(results['missing'])}") | |
| print(f" Extra: {len(results['extra'])}") | |
| print(f" Detection Rate: {results['detection_rate']:.1f}%") | |
| if results['matches']: | |
| print(f"\nโ Matched Differences ({len(results['matches'])}):") | |
| for match in results['matches']: | |
| print(f" #{match['annotated_id']}: {match['annotated_name']}") | |
| print(f" Category: {match['category']}") | |
| print(f" Severity: {match['severity']}") | |
| if results['missing']: | |
| print(f"\nโ Missing Differences ({len(results['missing'])}):") | |
| for missing in results['missing']: | |
| print(f" #{missing['id']}: {missing['name']}") | |
| print(f" Category: {missing['category']}") | |
| print(f" Severity: {missing['severity']}") | |
| if results['extra']: | |
| print(f"\nโ ๏ธ Extra Detections ({len(results['extra'])}):") | |
| for extra in results['extra']: | |
| print(f" โข {extra['name']}") | |
| print(f" Category: {extra['category']}") | |
| print("\n" + "="*70) | |
| def get_verification_markdown(self) -> str: | |
| """Get verification report as markdown""" | |
| if not self.verification_results: | |
| return "No verification results available" | |
| results = self.verification_results | |
| lines = [] | |
| lines.append("# ๐ Verification Report: User-Annotated Differences\n") | |
| lines.append("## Summary\n") | |
| lines.append(f"- **Total Annotated**: {results['total_annotated']}") | |
| lines.append(f"- **Total Detected**: {results['total_detected']}") | |
| lines.append(f"- **Matched**: {len(results['matches'])}") | |
| lines.append(f"- **Missing**: {len(results['missing'])}") | |
| lines.append(f"- **Extra**: {len(results['extra'])}") | |
| lines.append(f"- **Detection Rate**: {results['detection_rate']:.1f}%\n") | |
| if results['matches']: | |
| lines.append(f"## โ Matched Differences ({len(results['matches'])})\n") | |
| for match in results['matches']: | |
| lines.append(f"### #{match['annotated_id']}: {match['annotated_name']}") | |
| lines.append(f"- **Category**: {match['category']}") | |
| lines.append(f"- **Severity**: {match['severity']}\n") | |
| if results['missing']: | |
| lines.append(f"## โ Missing Differences ({len(results['missing'])})\n") | |
| for missing in results['missing']: | |
| lines.append(f"### #{missing['id']}: {missing['name']}") | |
| lines.append(f"- **Category**: {missing['category']}") | |
| lines.append(f"- **Severity**: {missing['severity']}\n") | |
| if results['extra']: | |
| lines.append(f"## โ ๏ธ Extra Detections ({len(results['extra'])})\n") | |
| for extra in results['extra']: | |
| lines.append(f"- **{extra['name']}** ({extra['category']})\n") | |
| return "\n".join(lines) | |
| def verify_system(detected_differences: List[Any]) -> Dict[str, Any]: | |
| """ | |
| Convenience function to verify system detection | |
| Args: | |
| detected_differences: List of detected differences | |
| Returns: | |
| Verification results | |
| """ | |
| verifier = TestVerification() | |
| results = verifier.verify_detection(detected_differences) | |
| verifier.print_verification_report() | |
| return results | |