""" Test Verification Script Verifies that all 13 user-annotated differences are detected """ import os import sys from typing import Dict, List, Any sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from state_schema import VisualDifference class TestVerification: """Verify detection of user-annotated differences""" # User's 13 manually annotated differences USER_ANNOTATED_DIFFERENCES = [ { "id": 1, "name": "Header height difference", "category": "Layout & Structure", "severity": "High", "description": "Header height differs between design and development" }, { "id": 2, "name": "Container width differs", "category": "Layout & Structure", "severity": "High", "description": "Main container width is different" }, { "id": 3, "name": "Checkout placement difference", "category": "Components & Elements", "severity": "High", "description": "Checkout heading is positioned differently" }, { "id": 4, "name": "Font family, size, letter spacing differs", "category": "Typography", "severity": "High", "description": "Typography properties changed" }, { "id": 5, "name": "Login link missing", "category": "Components & Elements", "severity": "High", "description": "Login link component is missing in development" }, { "id": 6, "name": "Payment component not visible", "category": "Components & Elements", "severity": "High", "description": "Payment component is hidden or not rendered" }, { "id": 7, "name": "Button size, height, color, no elevation/shadow", "category": "Buttons & Interactive", "severity": "High", "description": "Button styling differs" }, { "id": 8, "name": "Payment methods design missing", "category": "Components & Elements", "severity": "High", "description": "Payment methods section is missing" }, { "id": 9, "name": "Contact info & step number missing, font bold", "category": "Typography", "severity": "High", "description": "Contact info styling and visibility changed" }, { "id": 10, "name": "Icons missing", "category": "Components & Elements", "severity": "High", "description": "Various icons are not displayed" }, { "id": 11, "name": "Padding (left, right) differs", "category": "Spacing & Sizing", "severity": "Medium", "description": "Horizontal padding is different" }, { "id": 12, "name": "Image size different", "category": "Components & Elements", "severity": "Medium", "description": "Product images have different dimensions" }, { "id": 13, "name": "Spacing between components differs", "category": "Spacing & Sizing", "severity": "Medium", "description": "Gap between components is different" } ] def __init__(self): """Initialize verifier""" self.detected_differences: List[VisualDifference] = [] self.verification_results: Dict[str, Any] = {} def verify_detection(self, detected_differences: List[Any]) -> Dict[str, Any]: """ Verify that detected differences match user-annotated ones Args: detected_differences: List of detected differences Returns: Verification report """ self.detected_differences = detected_differences # Convert to list of dicts for easier comparison detected_list = [] for diff in detected_differences: if hasattr(diff, '__dict__'): detected_list.append(diff.__dict__) else: detected_list.append(diff) # Check each user-annotated difference results = { "total_annotated": len(self.USER_ANNOTATED_DIFFERENCES), "total_detected": len(detected_list), "matches": [], "missing": [], "extra": [] } # Find matches for annotated in self.USER_ANNOTATED_DIFFERENCES: found = False for detected in detected_list: if self._is_match(annotated, detected): results["matches"].append({ "annotated_id": annotated["id"], "annotated_name": annotated["name"], "detected_name": detected.get("name", ""), "category": annotated["category"], "severity": annotated["severity"] }) found = True break if not found: results["missing"].append({ "id": annotated["id"], "name": annotated["name"], "category": annotated["category"], "severity": annotated["severity"] }) # Find extra detections for detected in detected_list: found = False for annotated in self.USER_ANNOTATED_DIFFERENCES: if self._is_match(annotated, detected): found = True break if not found: results["extra"].append({ "name": detected.get("name", ""), "category": detected.get("category", ""), "severity": detected.get("severity", "") }) # Calculate detection rate results["detection_rate"] = (len(results["matches"]) / len(self.USER_ANNOTATED_DIFFERENCES)) * 100 self.verification_results = results return results def _is_match(self, annotated: Dict, detected: Dict) -> bool: """ Check if annotated and detected differences match Args: annotated: User-annotated difference detected: Detected difference Returns: True if they match """ # Match by name similarity annotated_name = annotated["name"].lower() detected_name = detected.get("name", "").lower() if isinstance(detected, dict) else "" # Check for keyword matches keywords = { "header": ["header", "height"], "container": ["container", "width"], "checkout": ["checkout", "placement"], "font": ["font", "typography"], "login": ["login", "link"], "payment": ["payment"], "button": ["button", "size", "color", "shadow", "elevation"], "contact": ["contact", "info"], "icon": ["icon"], "padding": ["padding"], "image": ["image", "size"], "spacing": ["spacing", "gap", "component"] } for key, terms in keywords.items(): if any(term in annotated_name for term in terms): if any(term in detected_name for term in terms): return True # Direct name match if annotated_name == detected_name: return True # Category and severity match if (annotated.get("category") == detected.get("category") and annotated.get("severity") == detected.get("severity")): # Check if description contains similar keywords annotated_desc = annotated.get("description", "").lower() detected_desc = detected.get("description", "").lower() if isinstance(detected, dict) else "" if any(word in detected_desc for word in annotated_desc.split()): return True return False def print_verification_report(self): """Print verification report""" if not self.verification_results: print("No verification results available") return results = self.verification_results print("\n" + "="*70) print("šŸ” VERIFICATION REPORT: USER-ANNOTATED DIFFERENCES") print("="*70) print(f"\nšŸ“Š Summary:") print(f" Total Annotated Differences: {results['total_annotated']}") print(f" Total Detected Differences: {results['total_detected']}") print(f" Matched: {len(results['matches'])}") print(f" Missing: {len(results['missing'])}") print(f" Extra: {len(results['extra'])}") print(f" Detection Rate: {results['detection_rate']:.1f}%") if results['matches']: print(f"\nāœ… Matched Differences ({len(results['matches'])}):") for match in results['matches']: print(f" #{match['annotated_id']}: {match['annotated_name']}") print(f" Category: {match['category']}") print(f" Severity: {match['severity']}") if results['missing']: print(f"\nāŒ Missing Differences ({len(results['missing'])}):") for missing in results['missing']: print(f" #{missing['id']}: {missing['name']}") print(f" Category: {missing['category']}") print(f" Severity: {missing['severity']}") if results['extra']: print(f"\nāš ļø Extra Detections ({len(results['extra'])}):") for extra in results['extra']: print(f" • {extra['name']}") print(f" Category: {extra['category']}") print("\n" + "="*70) def get_verification_markdown(self) -> str: """Get verification report as markdown""" if not self.verification_results: return "No verification results available" results = self.verification_results lines = [] lines.append("# šŸ” Verification Report: User-Annotated Differences\n") lines.append("## Summary\n") lines.append(f"- **Total Annotated**: {results['total_annotated']}") lines.append(f"- **Total Detected**: {results['total_detected']}") lines.append(f"- **Matched**: {len(results['matches'])}") lines.append(f"- **Missing**: {len(results['missing'])}") lines.append(f"- **Extra**: {len(results['extra'])}") lines.append(f"- **Detection Rate**: {results['detection_rate']:.1f}%\n") if results['matches']: lines.append(f"## āœ… Matched Differences ({len(results['matches'])})\n") for match in results['matches']: lines.append(f"### #{match['annotated_id']}: {match['annotated_name']}") lines.append(f"- **Category**: {match['category']}") lines.append(f"- **Severity**: {match['severity']}\n") if results['missing']: lines.append(f"## āŒ Missing Differences ({len(results['missing'])})\n") for missing in results['missing']: lines.append(f"### #{missing['id']}: {missing['name']}") lines.append(f"- **Category**: {missing['category']}") lines.append(f"- **Severity**: {missing['severity']}\n") if results['extra']: lines.append(f"## āš ļø Extra Detections ({len(results['extra'])})\n") for extra in results['extra']: lines.append(f"- **{extra['name']}** ({extra['category']})\n") return "\n".join(lines) def verify_system(detected_differences: List[Any]) -> Dict[str, Any]: """ Convenience function to verify system detection Args: detected_differences: List of detected differences Returns: Verification results """ verifier = TestVerification() results = verifier.verify_detection(detected_differences) verifier.print_verification_report() return results