Spaces:
Sleeping
Sleeping
File size: 1,786 Bytes
90084cd f703339 9adcc24 90084cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# app/models/animal_vision.py
import faiss
import torch
import open_clip
import numpy as np
from PIL import Image
from app.models.llm import explain_species
from app.utils.config import (
DEVICE,
BIOCLIP_MODEL_ID,
BIOCLIP_INDEX_PATH,
ANIMAL_SPECIES_LIST,
TOP_K_ANIMALS
)
model, _, preprocess = open_clip.create_model_and_transforms(
f"hf-hub:{BIOCLIP_MODEL_ID}"
)
model = model.to(DEVICE)
model.eval()
index = faiss.read_index(str(BIOCLIP_INDEX_PATH))
with open(ANIMAL_SPECIES_LIST, "r", encoding="utf-8") as f:
SPECIES = [line.strip() for line in f]
@torch.no_grad()
def predict_animal(image: Image.Image):
"""
Returns:
{
"species": str,
"common_name": str | None,
"confidence": float,
"top_k": list,
"description": str
}
"""
image_tensor = preprocess(image.convert("RGB"))
image_tensor = image_tensor.unsqueeze(0).to(DEVICE)
image_features = model.encode_image(image_tensor)
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
image_np = image_features.cpu().numpy().astype("float32")
scores, indices = index.search(image_np, TOP_K_ANIMALS)
results = []
for idx, score in zip(indices[0], scores[0]):
results.append({
"species": SPECIES[idx],
"similarity": float(score)
})
best = results[0]
llm_result = explain_species(
species=best["species"],
confidence=best["similarity"],
domain="animal",
top_k=results
)
return {
"species": best["species"],
"common_name": llm_result["common_name"],
"confidence": best["similarity"],
#"top_k": results,
# "description": llm_result["description"],
}
|