pawmap / core /matcher.py
Sarolanda's picture
add new ui
1b27dd6
"""
matcher.py — Cosine similarity para identificar se dois avistamentos são do mesmo animal.
Threshold padrão: 0.80 (ajustável via MATCH_THRESHOLD env var).
"""
import os
from typing import Optional, Tuple
import numpy as np
THRESHOLD = float(os.environ.get("MATCH_THRESHOLD", "0.80"))
class AnimalMatcher:
def find_match(
self,
new_embedding: list,
candidates: list[dict],
) -> Optional[Tuple[int, float]]:
"""
Compara new_embedding com os embeddings de candidates.
Retorna (animal_id, score) do melhor match acima do threshold,
ou None se nenhum match encontrado.
candidates: lista de dicts com chaves 'id' e 'embedding' (list[float]).
"""
if not candidates or not new_embedding:
return None
new_vec = np.array(new_embedding, dtype=np.float32)
best_id: Optional[int] = None
best_score: float = 0.0
for animal in candidates:
emb = animal.get("embedding")
if not emb:
continue
score = self._cosine(new_vec, np.array(emb, dtype=np.float32))
if score > best_score:
best_score = score
best_id = animal["id"]
if best_score >= THRESHOLD:
return best_id, best_score
return None
def find_top_matches(
self,
new_embedding: list,
candidates: list[dict],
top_n: int = 3,
) -> list[dict]:
"""
Retorna os top_n animais mais similares (sem threshold mínimo),
ordenados por score decrescente.
Cada item: {'id': int, 'score': float}
"""
if not candidates or not new_embedding:
return []
new_vec = np.array(new_embedding, dtype=np.float32)
scores = []
for animal in candidates:
emb = animal.get("embedding")
if not emb:
continue
score = self._cosine(new_vec, np.array(emb, dtype=np.float32))
scores.append({"id": animal["id"], "score": score})
scores.sort(key=lambda x: x["score"], reverse=True)
return scores[:top_n]
@staticmethod
def _cosine(a: np.ndarray, b: np.ndarray) -> float:
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
if norm_a == 0.0 or norm_b == 0.0:
return 0.0
return float(np.dot(a, b) / (norm_a * norm_b))