Spaces:
Sleeping
Sleeping
| import difflib | |
| from typing import List, Optional, Dict, Tuple | |
| class MappingCandidate: | |
| def __init__(self, source_id: str, target_id: str, score: float, matched_strategies: List[str]): | |
| self.source_id = source_id | |
| self.target_id = target_id | |
| self.score = score | |
| self.matched_strategies = matched_strategies | |
| class MappingEngine: | |
| """ | |
| Resolves Source Entities to Target Entities using similarity scoring. | |
| Score = name_similarity + alias_similarity + copyright_match + franchise_match. | |
| """ | |
| def __init__(self): | |
| pass | |
| def string_similarity(self, a: str, b: str) -> float: | |
| if not a or not b: | |
| return 0.0 | |
| return difflib.SequenceMatcher(None, a.lower(), b.lower()).ratio() | |
| def calculate_score( | |
| self, | |
| source_name: str, | |
| source_aliases: List[str], | |
| source_franchise: str, | |
| target_name: str, | |
| target_copyrights: List[str] | |
| ) -> Tuple[float, List[str]]: | |
| strategies = [] | |
| # 1. Name Similarity (Max 0.4) | |
| name_sim = self.string_similarity(source_name, target_name) | |
| score_name = name_sim * 0.4 | |
| if name_sim > 0.8: | |
| strategies.append("name_match") | |
| # 2. Alias Similarity (Max 0.3) | |
| best_alias_sim = 0.0 | |
| for alias in source_aliases: | |
| sim = self.string_similarity(alias, target_name) | |
| if sim > best_alias_sim: | |
| best_alias_sim = sim | |
| score_alias = best_alias_sim * 0.3 | |
| if best_alias_sim > 0.8: | |
| strategies.append("alias_match") | |
| # 3. Franchise/Copyright Match (Max 0.3) | |
| best_franchise_sim = 0.0 | |
| for copy_tag in target_copyrights: | |
| sim = self.string_similarity(source_franchise, copy_tag) | |
| if sim > best_franchise_sim: | |
| best_franchise_sim = sim | |
| score_franchise = best_franchise_sim * 0.3 | |
| if best_franchise_sim > 0.8: | |
| strategies.append("franchise_match") | |
| total_score = score_name + score_alias + score_franchise | |
| # Boost exact matches | |
| if source_name.lower().replace(" ", "_") == target_name.lower(): | |
| total_score += 0.2 | |
| strategies.append("exact_match") | |
| return min(total_score, 1.0), strategies | |
| def resolve( | |
| self, | |
| source_id: str, | |
| source_name: str, | |
| source_aliases: List[str], | |
| source_franchise: str, | |
| target_db: List[Dict] | |
| ) -> List[MappingCandidate]: | |
| candidates = [] | |
| for target in target_db: | |
| score, strategies = self.calculate_score( | |
| source_name=source_name, | |
| source_aliases=source_aliases, | |
| source_franchise=source_franchise, | |
| target_name=target["name"], | |
| target_copyrights=target["copyrights"] | |
| ) | |
| if score > 0.4: | |
| candidates.append(MappingCandidate( | |
| source_id=source_id, | |
| target_id=target["id"], | |
| score=score, | |
| matched_strategies=strategies | |
| )) | |
| # Sort by score descending | |
| return sorted(candidates, key=lambda x: x.score, reverse=True) | |