import difflib from typing import List, Optional, Dict, Tuple class MappingCandidate: def __init__(self, source_id: str, target_id: str, score: float, matched_strategies: List[str]): self.source_id = source_id self.target_id = target_id self.score = score self.matched_strategies = matched_strategies class MappingEngine: """ Resolves Source Entities to Target Entities using similarity scoring. Score = name_similarity + alias_similarity + copyright_match + franchise_match. """ def __init__(self): pass def string_similarity(self, a: str, b: str) -> float: if not a or not b: return 0.0 return difflib.SequenceMatcher(None, a.lower(), b.lower()).ratio() def calculate_score( self, source_name: str, source_aliases: List[str], source_franchise: str, target_name: str, target_copyrights: List[str] ) -> Tuple[float, List[str]]: strategies = [] # 1. Name Similarity (Max 0.4) name_sim = self.string_similarity(source_name, target_name) score_name = name_sim * 0.4 if name_sim > 0.8: strategies.append("name_match") # 2. Alias Similarity (Max 0.3) best_alias_sim = 0.0 for alias in source_aliases: sim = self.string_similarity(alias, target_name) if sim > best_alias_sim: best_alias_sim = sim score_alias = best_alias_sim * 0.3 if best_alias_sim > 0.8: strategies.append("alias_match") # 3. Franchise/Copyright Match (Max 0.3) best_franchise_sim = 0.0 for copy_tag in target_copyrights: sim = self.string_similarity(source_franchise, copy_tag) if sim > best_franchise_sim: best_franchise_sim = sim score_franchise = best_franchise_sim * 0.3 if best_franchise_sim > 0.8: strategies.append("franchise_match") total_score = score_name + score_alias + score_franchise # Boost exact matches if source_name.lower().replace(" ", "_") == target_name.lower(): total_score += 0.2 strategies.append("exact_match") return min(total_score, 1.0), strategies def resolve( self, source_id: str, source_name: str, source_aliases: List[str], source_franchise: str, target_db: List[Dict] ) -> List[MappingCandidate]: candidates = [] for target in target_db: score, strategies = self.calculate_score( source_name=source_name, source_aliases=source_aliases, source_franchise=source_franchise, target_name=target["name"], target_copyrights=target["copyrights"] ) if score > 0.4: candidates.append(MappingCandidate( source_id=source_id, target_id=target["id"], score=score, matched_strategies=strategies )) # Sort by score descending return sorted(candidates, key=lambda x: x.score, reverse=True)