Spaces:
Running
Running
| """ | |
| matcher.py — Cosine similarity para identificar se dois avistamentos são do mesmo animal. | |
| Threshold padrão: 0.80 (ajustável via MATCH_THRESHOLD env var). | |
| """ | |
| import os | |
| from typing import Optional, Tuple | |
| import numpy as np | |
| THRESHOLD = float(os.environ.get("MATCH_THRESHOLD", "0.80")) | |
| class AnimalMatcher: | |
| def find_match( | |
| self, | |
| new_embedding: list, | |
| candidates: list[dict], | |
| ) -> Optional[Tuple[int, float]]: | |
| """ | |
| Compara new_embedding com os embeddings de candidates. | |
| Retorna (animal_id, score) do melhor match acima do threshold, | |
| ou None se nenhum match encontrado. | |
| candidates: lista de dicts com chaves 'id' e 'embedding' (list[float]). | |
| """ | |
| if not candidates or not new_embedding: | |
| return None | |
| new_vec = np.array(new_embedding, dtype=np.float32) | |
| best_id: Optional[int] = None | |
| best_score: float = 0.0 | |
| for animal in candidates: | |
| emb = animal.get("embedding") | |
| if not emb: | |
| continue | |
| score = self._cosine(new_vec, np.array(emb, dtype=np.float32)) | |
| if score > best_score: | |
| best_score = score | |
| best_id = animal["id"] | |
| if best_score >= THRESHOLD: | |
| return best_id, best_score | |
| return None | |
| def find_top_matches( | |
| self, | |
| new_embedding: list, | |
| candidates: list[dict], | |
| top_n: int = 3, | |
| ) -> list[dict]: | |
| """ | |
| Retorna os top_n animais mais similares (sem threshold mínimo), | |
| ordenados por score decrescente. | |
| Cada item: {'id': int, 'score': float} | |
| """ | |
| if not candidates or not new_embedding: | |
| return [] | |
| new_vec = np.array(new_embedding, dtype=np.float32) | |
| scores = [] | |
| for animal in candidates: | |
| emb = animal.get("embedding") | |
| if not emb: | |
| continue | |
| score = self._cosine(new_vec, np.array(emb, dtype=np.float32)) | |
| scores.append({"id": animal["id"], "score": score}) | |
| scores.sort(key=lambda x: x["score"], reverse=True) | |
| return scores[:top_n] | |
| def _cosine(a: np.ndarray, b: np.ndarray) -> float: | |
| norm_a = np.linalg.norm(a) | |
| norm_b = np.linalg.norm(b) | |
| if norm_a == 0.0 or norm_b == 0.0: | |
| return 0.0 | |
| return float(np.dot(a, b) / (norm_a * norm_b)) | |