prompt-compiler-api / src /knowledge /mapping_engine.py
JairoDanielMT's picture
Upload folder using huggingface_hub
4ef6c2b verified
Raw
History Blame Contribute Delete
3.43 kB
import difflib
from typing import List, Optional, Dict, Tuple
class MappingCandidate:
def __init__(self, source_id: str, target_id: str, score: float, matched_strategies: List[str]):
self.source_id = source_id
self.target_id = target_id
self.score = score
self.matched_strategies = matched_strategies
class MappingEngine:
"""
Resolves Source Entities to Target Entities using similarity scoring.
Score = name_similarity + alias_similarity + copyright_match + franchise_match.
"""
def __init__(self):
pass
def string_similarity(self, a: str, b: str) -> float:
if not a or not b:
return 0.0
return difflib.SequenceMatcher(None, a.lower(), b.lower()).ratio()
def calculate_score(
self,
source_name: str,
source_aliases: List[str],
source_franchise: str,
target_name: str,
target_copyrights: List[str]
) -> Tuple[float, List[str]]:
strategies = []
# 1. Name Similarity (Max 0.4)
name_sim = self.string_similarity(source_name, target_name)
score_name = name_sim * 0.4
if name_sim > 0.8:
strategies.append("name_match")
# 2. Alias Similarity (Max 0.3)
best_alias_sim = 0.0
for alias in source_aliases:
sim = self.string_similarity(alias, target_name)
if sim > best_alias_sim:
best_alias_sim = sim
score_alias = best_alias_sim * 0.3
if best_alias_sim > 0.8:
strategies.append("alias_match")
# 3. Franchise/Copyright Match (Max 0.3)
best_franchise_sim = 0.0
for copy_tag in target_copyrights:
sim = self.string_similarity(source_franchise, copy_tag)
if sim > best_franchise_sim:
best_franchise_sim = sim
score_franchise = best_franchise_sim * 0.3
if best_franchise_sim > 0.8:
strategies.append("franchise_match")
total_score = score_name + score_alias + score_franchise
# Boost exact matches
if source_name.lower().replace(" ", "_") == target_name.lower():
total_score += 0.2
strategies.append("exact_match")
return min(total_score, 1.0), strategies
def resolve(
self,
source_id: str,
source_name: str,
source_aliases: List[str],
source_franchise: str,
target_db: List[Dict]
) -> List[MappingCandidate]:
candidates = []
for target in target_db:
score, strategies = self.calculate_score(
source_name=source_name,
source_aliases=source_aliases,
source_franchise=source_franchise,
target_name=target["name"],
target_copyrights=target["copyrights"]
)
if score > 0.4:
candidates.append(MappingCandidate(
source_id=source_id,
target_id=target["id"],
score=score,
matched_strategies=strategies
))
# Sort by score descending
return sorted(candidates, key=lambda x: x.score, reverse=True)