Spaces:

build-small-hackathon
/

pawmap

Running

App Files Files Community

pawmap / core /matcher.py

Sarolanda

add new ui

1b27dd6 6 days ago

raw

history blame contribute delete

2.45 kB

	"""
	matcher.py — Cosine similarity para identificar se dois avistamentos são do mesmo animal.

	Threshold padrão: 0.80 (ajustável via MATCH_THRESHOLD env var).
	"""
	import os
	from typing import Optional, Tuple

	import numpy as np

	THRESHOLD = float(os.environ.get("MATCH_THRESHOLD", "0.80"))


	class AnimalMatcher:
	def find_match(
	self,
	new_embedding: list,
	candidates: list[dict],
	) -> Optional[Tuple[int, float]]:
	"""
	Compara new_embedding com os embeddings de candidates.
	Retorna (animal_id, score) do melhor match acima do threshold,
	ou None se nenhum match encontrado.

	candidates: lista de dicts com chaves 'id' e 'embedding' (list[float]).
	"""
	if not candidates or not new_embedding:
	return None

	new_vec = np.array(new_embedding, dtype=np.float32)

	best_id: Optional[int] = None
	best_score: float = 0.0

	for animal in candidates:
	emb = animal.get("embedding")
	if not emb:
	continue
	score = self._cosine(new_vec, np.array(emb, dtype=np.float32))
	if score > best_score:
	best_score = score
	best_id = animal["id"]

	if best_score >= THRESHOLD:
	return best_id, best_score
	return None

	def find_top_matches(
	self,
	new_embedding: list,
	candidates: list[dict],
	top_n: int = 3,
	) -> list[dict]:
	"""
	Retorna os top_n animais mais similares (sem threshold mínimo),
	ordenados por score decrescente.
	Cada item: {'id': int, 'score': float}
	"""
	if not candidates or not new_embedding:
	return []
	new_vec = np.array(new_embedding, dtype=np.float32)
	scores = []
	for animal in candidates:
	emb = animal.get("embedding")
	if not emb:
	continue
	score = self._cosine(new_vec, np.array(emb, dtype=np.float32))
	scores.append({"id": animal["id"], "score": score})
	scores.sort(key=lambda x: x["score"], reverse=True)
	return scores[:top_n]

	@staticmethod
	def _cosine(a: np.ndarray, b: np.ndarray) -> float:
	norm_a = np.linalg.norm(a)
	norm_b = np.linalg.norm(b)
	if norm_a == 0.0 or norm_b == 0.0:
	return 0.0
	return float(np.dot(a, b) / (norm_a * norm_b))