Spaces:

froidhj
/

trashnet-server

Sleeping

App Files Files Community

froidhj commited on Oct 25, 2025

Commit

65f698e

verified ·

1 Parent(s): 9fd9d16

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -106

app.py CHANGED Viewed

@@ -1,89 +1,52 @@
 # app.py
 from fastapi import FastAPI, Request, Response
 from PIL import Image, ImageOps
-import io, os, torch, json
 import torch.nn.functional as F
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 # ========= CONFIG =========
 MODEL_ID = "prithivMLmods/Trash-Net"
-# PT-BR map (somente 4 classes)
 MAP_PT = {
     "glass": "vidro",
     "metal": "metal",
     "paper": "papel",
     "plastic": "plastico",
 }
 TARGETS_EN = list(MAP_PT.keys())  # ["glass","metal","paper","plastic"]
-# ========= OTIMIZAÇÕES (CPU do Space) =========
 torch.set_grad_enabled(False)
 torch.set_num_threads(1)
 torch.set_num_interop_threads(1)
-DEVICE = "cpu"
-# ========= CARREGAMENTO =========
-# use_fast=True evita o aviso e tende a ser mais eficiente
-processor = AutoImageProcessor.from_pretrained(MODEL_ID, use_fast=True)
 model = AutoModelForImageClassification.from_pretrained(MODEL_ID)
 model.eval()
-# ========= MAPAS DE RÓTULO (robustos) =========
-cfg = model.config
-# Tenta id2label direto (id->str)
-_id2label = {}
-if getattr(cfg, "id2label", None):
-    # pode vir com chaves str ou int; normalizamos:
-    try:
-        _id2label = {int(k): str(v) for k, v in cfg.id2label.items()}
-    except Exception:
-        # alguns modelos já trazem chaves int
-        _id2label = {int(i): str(lbl) for i, lbl in cfg.id2label.items()}
-# Tenta label2id direto (str->id)
-_label2id = {}
-if getattr(cfg, "label2id", None):
-    try:
-        _label2id = {str(k).strip().lower(): int(v) for k, v in cfg.label2id.items()}
-    except Exception:
-        # fallback: se o modelo tiver salvo ao contrário (id->label), inverta
-        _label2id = {}
-# Se label2id não veio, derive de id2label
-if not _label2id and _id2label:
-    _label2id = {str(v).strip().lower(): int(k) for k, v in _id2label.items()}
-# Se id2label não veio, derive de label2id
-if not _id2label and _label2id:
-    _id2label = {int(v): str(k) for k, v in _label2id.items()}
-# Logs de diagnóstico (aparecem no console do Space)
-print("config.id2label:", _id2label)
-print("config.label2id:", _label2id)
-# ========= DESCOBERTA DOS 4 ÍNDICES‐ALVO =========
 target_indices = []
-target_indices_en = []  # rótulo EN correspondente na mesma ordem
-# 1) tentamos correspondência exata (case-insensitive)
 for en in TARGETS_EN:
-    key = en.lower()
-    if key in _label2id:
-        idx = _label2id[key]
-        if idx not in target_indices:
-            target_indices.append(idx)
-            target_indices_en.append(en)
-# 2) se faltar algum, tentamos "contém" no id2label (ex.: "cardboard" ~ paper)
 if len(target_indices) < 4:
     for en in TARGETS_EN:
         if en in target_indices_en:
             continue
         found = None
         en_low = en.lower()
-        for i, lab in _id2label.items():
             if en_low in lab.lower():
                 found = i
                 break
@@ -91,107 +54,89 @@ if len(target_indices) < 4:
             target_indices.append(found)
             target_indices_en.append(en)
-# (continua mesmo que haja <4; sempre escolheremos dentre os disponíveis)
 app = FastAPI()
-# ========= PRÉ‐PROCESS =========
 def _prepare_image(img_bytes: bytes) -> Image.Image:
     img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
-    # Corrige rotação (EXIF)
     img = ImageOps.exif_transpose(img)
-    # Center-crop quadrado para reduzir distorções periféricas
     w, h = img.size
     side = min(w, h)
     left = (w - side) // 2
     top = (h - side) // 2
     img = img.crop((left, top, left + side, top + side))
-    # O processor cuida do resize/padding normalizado do modelo
     return img
-# ========= PREDICT =========
 def predict_image_bytes(img_bytes: bytes):
     """
     Retorna (label_pt, confidence_float_0_1)
-    Sempre uma das 4 classes: vidro/papel/plastico/metal
     """
     img = _prepare_image(img_bytes)
     inputs = processor(images=img, return_tensors="pt")
     logits = model(**inputs).logits  # [1, num_labels]
-    probs = F.softmax(logits, dim=-1)[0]  # [num_labels]
     if target_indices:
-        subset = probs[target_indices]           # [<=4]
-        j = int(torch.argmax(subset).item())     # posição dentro do subset
-        best_idx_global = target_indices[j]      # índice real no espaço do modelo
-        best_model_label = _id2label.get(best_idx_global, "").lower()
         conf = float(subset[j].item())
-        # Mapeia para PT (prioriza rótulo exato; senão usa a intenção TARGETS_EN[j])
-        if best_model_label in MAP_PT:
-            label_pt = MAP_PT[best_model_label]
         else:
             label_pt = MAP_PT[target_indices_en[j]]
         return label_pt, conf
-    # Fallback global (se não achamos índice nenhum)
-    i = int(torch.argmax(probs).item())
-    best_en = _id2label.get(i, "").lower()
-    conf = float(probs[i].item())
-    if "glass" in best_en:
-        label_pt = "vidro"
-    elif ("metal" in best_en) or ("steel" in best_en) or ("alum" in best_en):
-        label_pt = "metal"
-    elif ("paper" in best_en) or ("cardboard" in best_en):
-        label_pt = "papel"
     else:
-        label_pt = "plastico"
-    return label_pt, conf
-# ========= ENDPOINTS =========
 @app.get("/health")
 def health():
-    return {
-        "ok": True,
-        "model": MODEL_ID,
-        "targets_en": TARGETS_EN,
-        "targets_pt": list(MAP_PT.values()),
-        "mapped_indices": target_indices,
-    }
 @app.post("/predict")
 async def predict(request: Request):
     """
     Entrada:
-      - bytes JPEG/PNG (Content-Type: application/octet-stream | image/jpeg | image/png)
-      - ou JSON {"image_b64": "..."}  (útil para teste manual)
     Saída:
       - texto puro: 'vidro' | 'papel' | 'plastico' | 'metal'
       - header X-Confidence com a confiança 0..1
     """
     try:
         ctype = (request.headers.get("content-type") or "").lower()
-        img_bytes = b""
         if "application/octet-stream" in ctype or "image/jpeg" in ctype or "image/png" in ctype:
-            img_bytes = await request.body()
         else:
             data = await request.json()
             import base64
             b64 = (data.get("image_b64") or "").split(",")[-1]
-            if b64:
-                img_bytes = base64.b64decode(b64)
         if not img_bytes:
-            # Sem imagem: retorna uma classe válida com confiança 0
-            return Response("plastico", media_type="text/plain",
-                            headers={"X-Confidence": "0.0000"})
         label_pt, conf = predict_image_bytes(img_bytes)
-        return Response(label_pt, media_type="text/plain",
-                        headers={"X-Confidence": f"{conf:.4f}"})
     except Exception as e:
-        # Loga erro e ainda assim devolve uma das 4 (mantém o pipeline vivo)
-        print("predict error:", repr(e))
-        return Response("plastico", media_type="text/plain",
-                        headers={"X-Confidence": "0.0000"})

 # app.py
 from fastapi import FastAPI, Request, Response
 from PIL import Image, ImageOps
+import io, os, torch
 import torch.nn.functional as F
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 # ========= CONFIG =========
 MODEL_ID = "prithivMLmods/Trash-Net"
+# PT-BR map (somente 4 classes principais)
 MAP_PT = {
     "glass": "vidro",
     "metal": "metal",
     "paper": "papel",
     "plastic": "plastico",
 }
 TARGETS_EN = list(MAP_PT.keys())  # ["glass","metal","paper","plastic"]
+# ========= OTIMIZAÇÕES (para CPU do Space) =========
 torch.set_grad_enabled(False)
 torch.set_num_threads(1)
 torch.set_num_interop_threads(1)
+# ========= CARREGAMENTO DO MODELO =========
+processor = AutoImageProcessor.from_pretrained(MODEL_ID)
 model = AutoModelForImageClassification.from_pretrained(MODEL_ID)
 model.eval()
+# Cria dicionários auxiliares de mapeamento
+id2label = {int(k): v for k, v in model.config.id2label.items()}
+label2id = {v.lower(): int(k) for k, v in model.config.label2id.items()}
+# Descobre os índices das classes principais dentro do modelo
 target_indices = []
+target_indices_en = []
 for en in TARGETS_EN:
+    if en in label2id:
+        target_indices.append(label2id[en])
+        target_indices_en.append(en)
 if len(target_indices) < 4:
     for en in TARGETS_EN:
         if en in target_indices_en:
             continue
         found = None
         en_low = en.lower()
+        for i, lab in id2label.items():
             if en_low in lab.lower():
                 found = i
                 break
             target_indices.append(found)
             target_indices_en.append(en)
+# ========= FASTAPI APP =========
 app = FastAPI()
+# ========= FUNÇÕES =========
 def _prepare_image(img_bytes: bytes) -> Image.Image:
+    """Prepara a imagem (corrige orientação, recorta e converte RGB)."""
     img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
     img = ImageOps.exif_transpose(img)
     w, h = img.size
     side = min(w, h)
     left = (w - side) // 2
     top = (h - side) // 2
     img = img.crop((left, top, left + side, top + side))
     return img
 def predict_image_bytes(img_bytes: bytes):
     """
     Retorna (label_pt, confidence_float_0_1)
     """
     img = _prepare_image(img_bytes)
     inputs = processor(images=img, return_tensors="pt")
     logits = model(**inputs).logits  # [1, num_labels]
     if target_indices:
+        probs = F.softmax(logits, dim=-1)[0]
+        subset = probs[target_indices]
+        j = int(torch.argmax(subset).item())
+        best_idx_global = target_indices[j]
+        best_en = id2label[best_idx_global].lower()
         conf = float(subset[j].item())
+        if best_en in MAP_PT:
+            label_pt = MAP_PT[best_en]
         else:
             label_pt = MAP_PT[target_indices_en[j]]
         return label_pt, conf
     else:
+        probs = F.softmax(logits, dim=-1)[0]
+        i = int(torch.argmax(probs).item())
+        best_en = id2label[i].lower()
+        conf = float(probs[i].item())
+        if "glass" in best_en:
+            label_pt = "vidro"
+        elif "metal" in best_en or "steel" in best_en or "aluminum" in best_en:
+            label_pt = "metal"
+        elif "paper" in best_en or "cardboard" in best_en:
+            label_pt = "papel"
+        else:
+            label_pt = "plastico"
+        return label_pt, conf
+# ========= ROTAS =========
 @app.get("/health")
 def health():
+    """Verifica se o servidor está ativo."""
+    return {"ok": True, "model": MODEL_ID, "targets": list(MAP_PT.values())}
 @app.post("/predict")
 async def predict(request: Request):
     """
     Entrada:
+      - bytes JPEG (Content-Type: application/octet-stream ou image/jpeg)
+      - ou JSON {"image_b64": "..."} (apenas para testes manuais)
     Saída:
       - texto puro: 'vidro' | 'papel' | 'plastico' | 'metal'
       - header X-Confidence com a confiança 0..1
     """
     try:
         ctype = (request.headers.get("content-type") or "").lower()
         if "application/octet-stream" in ctype or "image/jpeg" in ctype or "image/png" in ctype:
+            img_bytes = await request.body()  # <-- aqui está o correto
         else:
             data = await request.json()
             import base64
             b64 = (data.get("image_b64") or "").split(",")[-1]
+            img_bytes = base64.b64decode(b64) if b64 else b""
         if not img_bytes:
+            return Response("plastico", media_type="text/plain", headers={"X-Confidence": "0.00"})
         label_pt, conf = predict_image_bytes(img_bytes)
+        return Response(label_pt, media_type="text/plain", headers={"X-Confidence": f"{conf:.4f}"})
     except Exception as e:
+        print("predict error:", e)
+        return Response("plastico", media_type="text/plain", headers={"X-Confidence": "0.00"})