Spaces:

build-small-hackathon
/

pawmap

Running

sara.mesquita

feat: improved breed prompt + dynamic breed dropdown by species

45b7139 3 days ago

8.59 kB

	"""
	ai.py — Visao via HuggingFace InferenceClient ou NVIDIA NIM.

	Variaveis de ambiente:
	HF_TOKEN — token HuggingFace (usa HF Serverless Inference)
	NVIDIA_API_KEY — chave NVIDIA NIM (free tier em build.nvidia.com)

	Modelo HF: meta-llama/Llama-3.2-11B-Vision-Instruct (disponivel no HF Serverless)
	Modelo NIM: nvidia/nemotron-3-nano-omni-30b-a3b (Nemotron Omni via NVIDIA)
	"""
	import base64
	import io
	import json
	import logging
	import os
	import re

	import numpy as np

	log = logging.getLogger(__name__)

	_HF_MODEL = "meta-llama/Llama-3.2-11B-Vision-Instruct"
	_NIM_MODEL = "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning" # Nemotron Omni VLM, NVIDIA NIM

	PROMPT = (
	"Examine this image carefully.\n"
	"FIRST: Is there a dog or cat clearly visible?\n"
	"If NO dog or cat is present, respond with exactly: {\"is_animal\": false}\n\n"
	"If YES, respond with ONLY valid JSON (no markdown, no explanation).\n\n"
	"For breed_estimate, use visual cues (coat type, ear shape, body build, snout, tail) to pick the SINGLE best match.\n\n"
	"DOG breeds to choose from (use exact spelling):\n"
	"SRD, Labrador Retriever, Golden Retriever, Pitbull, Poodle, Shih Tzu, Rottweiler, "
	"German Shepherd, Bulldog, Dachshund, Chihuahua, Siberian Husky, Border Collie, "
	"Beagle, Boxer, Maltese, Chow Chow, Akita, Dalmatian, Doberman\n\n"
	"CAT breeds to choose from (use exact spelling):\n"
	"Domestic Shorthair, Domestic Longhair, Siamese, Persian, Maine Coon, Bengal, "
	"British Shorthair, Ragdoll, Scottish Fold, Turkish Angora, Sphynx, Abyssinian\n\n"
	"JSON format:\n"
	"{\"is_animal\": true,"
	" \"species\": \"dog or cat\","
	" \"breed_estimate\": \"exact name from the list above — SRD/Domestic Shorthair only if truly unidentifiable\","
	" \"size\": \"small, medium or large\","
	" \"primary_color\": \"main color: caramel, black, white, gray, brown, golden, orange, tabby, mixed\","
	" \"secondary_colors\": [\"other visible colors, or empty list\"],"
	" \"distinctive_marks\": [\"notable features: e.g. white chest patch, red collar, scar, missing ear — or empty list\"],"
	" \"condition\": \"healthy, thin or injured\","
	" \"description_text\": \"one concise English sentence describing this specific animal for identity matching\"}"
	)


	class AnimalAI:
	def __init__(self):
	self.mode = None # "hf" \| "nim"
	self.model = None
	self.client = None # OpenAI (NIM) ou InferenceClient (HF)

	hf_token = os.environ.get("HF_TOKEN", "")
	nvidia_key = os.environ.get("NVIDIA_API_KEY", "")

	if nvidia_key:
	try:
	from openai import OpenAI
	self.mode = "nim"
	self.model = os.environ.get("NVIDIA_MODEL", _NIM_MODEL)
	self.client = OpenAI(
	base_url="https://integrate.api.nvidia.com/v1",
	api_key=nvidia_key,
	)
	log.info("AI: Nemotron Omni via NVIDIA NIM (%s)", self.model)
	except ImportError:
	log.warning("openai nao instalado")

	elif hf_token:
	try:
	from huggingface_hub import InferenceClient
	self.mode = "hf"
	self.model = os.environ.get("NVIDIA_MODEL", _HF_MODEL)
	self.client = InferenceClient(model=self.model, token=hf_token)
	log.info("AI: %s via HF InferenceClient", self.model)
	except ImportError:
	log.warning("huggingface_hub nao instalado")

	else:
	log.warning("Sem chave de API — IA desabilitada. Configure HF_TOKEN nos Secrets.")

	self.embedder = None
	try:
	from sentence_transformers import SentenceTransformer
	self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
	log.info("sentence-transformers: all-MiniLM-L6-v2")
	except Exception as e:
	log.warning("sentence-transformers nao carregou: %s", e)

	def analyze_image(self, image) -> dict:
	"""Analisa imagem. _ai_success=False indica que a IA nao foi usada."""
	if self.client is None:
	return self._fallback()

	try:
	img_b64 = self._to_b64(image)

	if self.mode == "hf":
	# HuggingFace InferenceClient — suporte nativo a multimodal
	resp = self.client.chat_completion(
	messages=[{
	"role": "user",
	"content": [
	{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64," + img_b64}},
	{"type": "text", "text": PROMPT},
	],
	}],
	max_tokens=400,
	temperature=0.1,
	)
	raw = resp.choices[0].message.content or ""
	else:
	# NVIDIA NIM — Nemotron Omni (reasoning model)
	resp = self.client.chat.completions.create(
	model=self.model,
	messages=[{
	"role": "user",
	"content": [
	{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64," + img_b64}},
	{"type": "text", "text": PROMPT},
	],
	}],
	max_tokens=1024,
	temperature=0.6,
	top_p=0.95,
	extra_body={
	"chat_template_kwargs": {"enable_thinking": True},
	"reasoning_budget": 512,
	},
	)
	msg = resp.choices[0].message
	# Nemotron Omni: resposta pode estar em content ou reasoning_content
	raw = (msg.content or "") or (getattr(msg, "reasoning_content", "") or "")

	log.info("AI resposta: %s", raw[:200])
	result = self._parse(raw)

	# Rejeição explícita: a IA não viu nenhum animal
	if result.get("is_animal") is False:
	log.info("IA: nenhum animal detectado na imagem.")
	return {"is_animal": False, "_ai_success": True}

	result["is_animal"] = True
	result["_ai_success"] = True
	return result

	except Exception as e:
	log.error("Erro na API de IA: %s", e)
	return self._fallback()

	def get_embedding(self, description: dict) -> list:
	"""Embedding da descricao. Aleatorio se IA falhou (evita falsos matches)."""
	if not description.get("_ai_success", True):
	log.info("Fallback IA — embedding aleatorio")
	v = np.random.randn(384).astype(np.float32)
	v /= np.linalg.norm(v)
	return v.tolist()

	if self.embedder is None:
	v = np.random.randn(384).astype(np.float32)
	v /= np.linalg.norm(v)
	return v.tolist()

	text = description.get("description_text") or self._desc_text(description)
	return self.embedder.encode(text, normalize_embeddings=True).tolist()

	@staticmethod
	def _to_b64(image) -> str:
	buf = io.BytesIO()
	img = image.copy()
	img.thumbnail((800, 800))
	img.save(buf, format="JPEG", quality=80)
	return base64.b64encode(buf.getvalue()).decode()

	@staticmethod
	def _parse(raw: str) -> dict:
	m = re.search(r"\{.*\}", raw, re.DOTALL)
	if m:
	try:
	return json.loads(m.group())
	except json.JSONDecodeError:
	pass
	log.warning("JSON nao parseado — fallback")
	return AnimalAI._fallback()

	@staticmethod
	def _desc_text(d: dict) -> str:
	parts = [d.get("size",""), d.get("primary_color",""), d.get("species",""), d.get("breed_estimate","")]
	marks = d.get("distinctive_marks", [])
	if marks:
	parts.append("with " + ", ".join(marks))
	return " ".join(filter(None, parts))

	@staticmethod
	def _fallback() -> dict:
	return {
	"is_animal": True,
	"_ai_success": False,
	"species": "dog",
	"breed_estimate": "SRD",
	"size": "médio",
	"primary_color": "caramelo",
	"secondary_colors": [],
	"distinctive_marks": [],
	"condition": "saudável",
	"description_text": "stray dog of unknown breed",
	}