""" ai.py — Visao via HuggingFace InferenceClient ou NVIDIA NIM. Variaveis de ambiente: HF_TOKEN — token HuggingFace (usa HF Serverless Inference) NVIDIA_API_KEY — chave NVIDIA NIM (free tier em build.nvidia.com) Modelo HF: meta-llama/Llama-3.2-11B-Vision-Instruct (disponivel no HF Serverless) Modelo NIM: nvidia/nemotron-3-nano-omni-30b-a3b (Nemotron Omni via NVIDIA) """ import base64 import io import json import logging import os import re import numpy as np log = logging.getLogger(__name__) _HF_MODEL = "meta-llama/Llama-3.2-11B-Vision-Instruct" _NIM_MODEL = "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning" # Nemotron Omni VLM, NVIDIA NIM PROMPT = ( "Examine this image carefully.\n" "FIRST: Is there a dog or cat clearly visible?\n" "If NO dog or cat is present, respond with exactly: {\"is_animal\": false}\n\n" "If YES, respond with ONLY valid JSON (no markdown, no explanation).\n\n" "For breed_estimate, use visual cues (coat type, ear shape, body build, snout, tail) to pick the SINGLE best match.\n\n" "DOG breeds to choose from (use exact spelling):\n" "SRD, Labrador Retriever, Golden Retriever, Pitbull, Poodle, Shih Tzu, Rottweiler, " "German Shepherd, Bulldog, Dachshund, Chihuahua, Siberian Husky, Border Collie, " "Beagle, Boxer, Maltese, Chow Chow, Akita, Dalmatian, Doberman\n\n" "CAT breeds to choose from (use exact spelling):\n" "Domestic Shorthair, Domestic Longhair, Siamese, Persian, Maine Coon, Bengal, " "British Shorthair, Ragdoll, Scottish Fold, Turkish Angora, Sphynx, Abyssinian\n\n" "JSON format:\n" "{\"is_animal\": true," " \"species\": \"dog or cat\"," " \"breed_estimate\": \"exact name from the list above — SRD/Domestic Shorthair only if truly unidentifiable\"," " \"size\": \"small, medium or large\"," " \"primary_color\": \"main color: caramel, black, white, gray, brown, golden, orange, tabby, mixed\"," " \"secondary_colors\": [\"other visible colors, or empty list\"]," " \"distinctive_marks\": [\"notable features: e.g. white chest patch, red collar, scar, missing ear — or empty list\"]," " \"condition\": \"healthy, thin or injured\"," " \"description_text\": \"one concise English sentence describing this specific animal for identity matching\"}" ) class AnimalAI: def __init__(self): self.mode = None # "hf" | "nim" self.model = None self.client = None # OpenAI (NIM) ou InferenceClient (HF) hf_token = os.environ.get("HF_TOKEN", "") nvidia_key = os.environ.get("NVIDIA_API_KEY", "") if nvidia_key: try: from openai import OpenAI self.mode = "nim" self.model = os.environ.get("NVIDIA_MODEL", _NIM_MODEL) self.client = OpenAI( base_url="https://integrate.api.nvidia.com/v1", api_key=nvidia_key, ) log.info("AI: Nemotron Omni via NVIDIA NIM (%s)", self.model) except ImportError: log.warning("openai nao instalado") elif hf_token: try: from huggingface_hub import InferenceClient self.mode = "hf" self.model = os.environ.get("NVIDIA_MODEL", _HF_MODEL) self.client = InferenceClient(model=self.model, token=hf_token) log.info("AI: %s via HF InferenceClient", self.model) except ImportError: log.warning("huggingface_hub nao instalado") else: log.warning("Sem chave de API — IA desabilitada. Configure HF_TOKEN nos Secrets.") self.embedder = None try: from sentence_transformers import SentenceTransformer self.embedder = SentenceTransformer("all-MiniLM-L6-v2") log.info("sentence-transformers: all-MiniLM-L6-v2") except Exception as e: log.warning("sentence-transformers nao carregou: %s", e) def analyze_image(self, image) -> dict: """Analisa imagem. _ai_success=False indica que a IA nao foi usada.""" if self.client is None: return self._fallback() try: img_b64 = self._to_b64(image) if self.mode == "hf": # HuggingFace InferenceClient — suporte nativo a multimodal resp = self.client.chat_completion( messages=[{ "role": "user", "content": [ {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64," + img_b64}}, {"type": "text", "text": PROMPT}, ], }], max_tokens=400, temperature=0.1, ) raw = resp.choices[0].message.content or "" else: # NVIDIA NIM — Nemotron Omni (reasoning model) resp = self.client.chat.completions.create( model=self.model, messages=[{ "role": "user", "content": [ {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64," + img_b64}}, {"type": "text", "text": PROMPT}, ], }], max_tokens=1024, temperature=0.6, top_p=0.95, extra_body={ "chat_template_kwargs": {"enable_thinking": True}, "reasoning_budget": 512, }, ) msg = resp.choices[0].message # Nemotron Omni: resposta pode estar em content ou reasoning_content raw = (msg.content or "") or (getattr(msg, "reasoning_content", "") or "") log.info("AI resposta: %s", raw[:200]) result = self._parse(raw) # Rejeição explícita: a IA não viu nenhum animal if result.get("is_animal") is False: log.info("IA: nenhum animal detectado na imagem.") return {"is_animal": False, "_ai_success": True} result["is_animal"] = True result["_ai_success"] = True return result except Exception as e: log.error("Erro na API de IA: %s", e) return self._fallback() def get_embedding(self, description: dict) -> list: """Embedding da descricao. Aleatorio se IA falhou (evita falsos matches).""" if not description.get("_ai_success", True): log.info("Fallback IA — embedding aleatorio") v = np.random.randn(384).astype(np.float32) v /= np.linalg.norm(v) return v.tolist() if self.embedder is None: v = np.random.randn(384).astype(np.float32) v /= np.linalg.norm(v) return v.tolist() text = description.get("description_text") or self._desc_text(description) return self.embedder.encode(text, normalize_embeddings=True).tolist() @staticmethod def _to_b64(image) -> str: buf = io.BytesIO() img = image.copy() img.thumbnail((800, 800)) img.save(buf, format="JPEG", quality=80) return base64.b64encode(buf.getvalue()).decode() @staticmethod def _parse(raw: str) -> dict: m = re.search(r"\{.*\}", raw, re.DOTALL) if m: try: return json.loads(m.group()) except json.JSONDecodeError: pass log.warning("JSON nao parseado — fallback") return AnimalAI._fallback() @staticmethod def _desc_text(d: dict) -> str: parts = [d.get("size",""), d.get("primary_color",""), d.get("species",""), d.get("breed_estimate","")] marks = d.get("distinctive_marks", []) if marks: parts.append("with " + ", ".join(marks)) return " ".join(filter(None, parts)) @staticmethod def _fallback() -> dict: return { "is_animal": True, "_ai_success": False, "species": "dog", "breed_estimate": "SRD", "size": "médio", "primary_color": "caramelo", "secondary_colors": [], "distinctive_marks": [], "condition": "saudável", "description_text": "stray dog of unknown breed", }