import json from pathlib import Path from typing import List, Dict from .mapping_engine import MappingEngine class MappingValidator: def __init__(self, gold_dataset_path: str): self.gold_dataset_path = gold_dataset_path self.engine = MappingEngine( aliases={ "Artoria Pendragon": "saber", "Altria Pendragon": "saber", "Rin Tohsaka": "tohsaka_rin", "Reimu Hakurei": "hakurei_reimu", "Marisa Kirisame": "kirisame_marisa" } ) def run(self): with open(self.gold_dataset_path, 'r', encoding='utf-8') as f: gold_data = json.load(f) results = [] total = len(gold_data) matches = 0 false_links = 0 unmatched = 0 for entry in gold_data: anilist_name = entry["anilist"] expected_tag = entry["danbooru"] franchise = entry.get("franchise") best_match = self.engine.get_best_match(anilist_name, franchise) if best_match: is_correct = best_match.target_id == expected_tag results.append({ "name": anilist_name, "expected": expected_tag, "actual": best_match.target_id, "correct": is_correct, "strategy": best_match.strategy }) if is_correct: matches += 1 else: false_links += 1 else: unmatched += 1 accuracy = (matches / total) * 100 if total > 0 else 0 false_link_rate = (false_links / total) * 100 if total > 0 else 0 unmatched_rate = (unmatched / total) * 100 if total > 0 else 0 self.generate_report(accuracy, false_link_rate, unmatched_rate, results) def generate_report(self, accuracy, false_link_rate, unmatched_rate, results): report = f"""# Mapping Engine Performance Report ## Metrics - **Top-1 Accuracy**: {accuracy:.2f}% - **False Link Rate**: {false_link_rate:.2f}% - **Unmatched Rate**: {unmatched_rate:.2f}% ## Analysis - Target Accuracy: > 90% - Current Status: {"PASSED" if accuracy > 90 else "NEEDS IMPROVEMENT"} ## Top Failures (Sample) """ failures = [r for r in results if not r["correct"]][:10] for f in failures: report += f"- {f['name']}: Expected '{f['expected']}', got '{f['actual']}' ({f['strategy']})\n" Path("reports/MAPPING_REPORT.md").write_text(report, encoding='utf-8') print(f"Report generated: reports/MAPPING_REPORT.md (Accuracy: {accuracy:.2f}%)") if __name__ == "__main__": validator = MappingValidator("data/validation/mapping_gold.json") validator.run()