Spaces:

dAISYTUIlmenau
/

LTN_Classification

Sleeping

App Files Files Community

chenchangliu commited on Feb 19

Commit

e6d2a2e

verified ·

1 Parent(s): 8cb227d

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -32

app.py CHANGED Viewed

@@ -8,19 +8,30 @@ import gradio as gr
 # ---------- CONFIG ----------
-CKPT_PATH = "best_effnet_twohead.pt"
-LABELS_PATH = "labels.json"
 IMG_SIZE = 224
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # ----------------------------
-# load labels
-with open(LABELS_PATH, "r") as f:
-    labels = json.load(f)
-SPECIES = labels["species"]
-STATE = labels["state"]
 # model (must match training)
@@ -29,34 +40,69 @@ class EffNetTwoHead(nn.Module):
         super().__init__()
         base = efficientnet_b0(weights=None)
         self.features = base.features
-        self.avgpool = base.avgpool
         c = base.classifier[1].in_features
         self.head_species = nn.Linear(c, num_species)
         self.head_state = nn.Linear(c, num_states)
     def forward(self, x):
         x = self.features(x)
-        x = self.avgpool(x)
         x = torch.flatten(x, 1)
         return self.head_species(x), self.head_state(x)
 # load model
 ckpt = torch.load(CKPT_PATH, map_location="cpu")
-model = EffNetTwoHead(len(SPECIES), len(STATE))
-model.load_state_dict(ckpt["model"])
 model.to(DEVICE).eval()
-# preprocessing (same as training)
-tfm = transforms.Compose([
-    transforms.Resize((IMG_SIZE, IMG_SIZE)),
-    transforms.ToTensor(),
-    transforms.Normalize(
-        mean=[0.485, 0.456, 0.406],
-        std=[0.229, 0.224, 0.225],
-    ),
-])
 @torch.no_grad()
@@ -65,7 +111,12 @@ def predict(image: Image.Image):
         return "No image", "No image"
     image = image.convert("RGB")
-    x = tfm(image).unsqueeze(0).to(DEVICE)
     log_sp, log_st = model(x)
@@ -78,9 +129,6 @@ def predict(image: Image.Image):
     sp_conf = float(prob_sp[sp_id].item())
     st_conf = float(prob_st[st_id].item())
-    #sp_text = f"{SPECIES[sp_id]}  (id={sp_id}, score={sp_conf:.3f})"
-    #st_text = f"{STATE[st_id]}  (id={st_id}, score={st_conf:.3f})"
     sp_text = f"{SPECIES[sp_id]}  (score={sp_conf:.3f})"
     st_text = f"{STATE[st_id]}  (score={st_conf:.3f})"
@@ -88,14 +136,16 @@ def predict(image: Image.Image):
 EXAMPLE_TEXT = """
-**Taxa (12):** Cacoxnus indagator, Chelostoma florisomne, Coeliopencyrtus, Eumenidae, Heriades, Hylaeus, Megachile, Osmia bicornis, Osmia cornuta, Passaloecus, Psenulus, Trypoxylon
-**States (4):** DauLv, DeadLv, Lv, OldFood
 """
 STATUS_TABLE = [
     ["DauLv",  "Visible alive prepupa that stopped feeding"],
     ["DeadLv",  "Dead visible larva"],
     ["Lv",      "Visible alive larva"],
     ["OldFood", "Brood cell with only unconsumed food, no larva will develop"],
 ]
@@ -140,10 +190,5 @@ with gr.Blocks(theme=theme, title="LTN EfficientNet Two-Head Classifier") as dem
                 inputs=img,
             )
-    #gr.Markdown(
-     #   "<small>Note: The status legend is a human-readable mapping. "
-      #  "Your model output labels come from `labels.json`.</small>"
-    #)
 if __name__ == "__main__":
-    demo.launch()

 # ---------- CONFIG ----------
+CKPT_PATH = "best_effnet_twohead.pt"          # training script saves best.pt / last.pt
+LABELS_PATH = "labels.json"    # optional; fallback to taxa.txt / states.txt
+TAXA_PATH = "taxa.txt"         # fallback
+STATES_PATH = "states.txt"     # fallback
 IMG_SIZE = 224
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # ----------------------------
+def load_lines(path: str) -> list[str]:
+    p = path
+    with open(p, "r", encoding="utf-8") as f:
+        return [ln.strip() for ln in f if ln.strip()]
+# load labels (labels.json preferred; fallback to taxa.txt/states.txt)
+try:
+    with open(LABELS_PATH, "r", encoding="utf-8") as f:
+        labels = json.load(f)
+    SPECIES = labels["species"]
+    STATE = labels["state"]
+except FileNotFoundError:
+    SPECIES = load_lines(TAXA_PATH)
+    STATE = load_lines(STATES_PATH)
 # model (must match training)
         super().__init__()
         base = efficientnet_b0(weights=None)
         self.features = base.features
+        self.pool = base.avgpool
         c = base.classifier[1].in_features
+        # training script uses dropout before heads
+        self.drop = nn.Dropout(0.3)
         self.head_species = nn.Linear(c, num_species)
         self.head_state = nn.Linear(c, num_states)
     def forward(self, x):
         x = self.features(x)
+        x = self.pool(x)
         x = torch.flatten(x, 1)
+        x = self.drop(x)
         return self.head_species(x), self.head_state(x)
 # load model
 ckpt = torch.load(CKPT_PATH, map_location="cpu")
+num_species = int(ckpt.get("num_species", len(SPECIES)))
+num_states = int(ckpt.get("num_states", len(STATE)))
+# if checkpoint defines class counts, trust it; labels must match lengths
+if len(SPECIES) != num_species:
+    raise RuntimeError(
+        f"Label mismatch: len(SPECIES)={len(SPECIES)} but ckpt num_species={num_species}. "
+        f"Fix labels.json or taxa.txt."
+    )
+if len(STATE) != num_states:
+    raise RuntimeError(
+        f"Label mismatch: len(STATE)={len(STATE)} but ckpt num_states={num_states}. "
+        f"Fix labels.json or states.txt."
+    )
+model = EffNetTwoHead(num_species, num_states)
+model.load_state_dict(ckpt["model"], strict=True)
 model.to(DEVICE).eval()
+# preprocessing (align with training: letterbox to 224x224 without cropping)
+# This implementation NEVER crops the image: it resizes to fit within 224x224,
+# then pads the remaining area (black) to reach exactly 224x224.
+normalize = transforms.Normalize(
+    mean=[0.485, 0.456, 0.406],
+    std=[0.229, 0.224, 0.225],
+)
+def letterbox_pil(im: Image.Image, size: int = 224, fill: int = 0) -> Image.Image:
+    w, h = im.size
+    if w == 0 or h == 0:
+        return Image.new("RGB", (size, size), (fill, fill, fill))
+    scale = min(size / w, size / h)  # fit entirely inside size x size
+    new_w = max(1, int(round(w * scale)))
+    new_h = max(1, int(round(h * scale)))
+    im_resized = im.resize((new_w, new_h), resample=Image.BILINEAR)
+    canvas = Image.new("RGB", (size, size), (fill, fill, fill))
+    left = (size - new_w) // 2
+    top = (size - new_h) // 2
+    canvas.paste(im_resized, (left, top))
+    return canvas
 @torch.no_grad()
         return "No image", "No image"
     image = image.convert("RGB")
+    # letterbox to 224x224 (no cropping)
+    image = letterbox_pil(image, size=IMG_SIZE, fill=0)
+    x = transforms.ToTensor()(image)
+    x = normalize(x).unsqueeze(0).to(DEVICE)
     log_sp, log_st = model(x)
     sp_conf = float(prob_sp[sp_id].item())
     st_conf = float(prob_st[st_id].item())
     sp_text = f"{SPECIES[sp_id]}  (score={sp_conf:.3f})"
     st_text = f"{STATE[st_id]}  (score={st_conf:.3f})"
 EXAMPLE_TEXT = """
+**Taxa (20):** Anthidium, Cacoxnus indagator, Chelostoma campanularum, Chelostoma florisomne, Chelostoma rapunculi, Coeliopencyrtus, Eumenidae, Heriades, Hylaeus, Ichneumonidae, Isodontia mexicana, Megachile, Osmia bicornis, Osmia brevicornis, Osmia cornuta, Passaloecus, Pemphredon, Psenulus, Trichodes, Trypoxylon
+**States (5):** DauLv, DeadLv, Hatched, Lv, OldFood
 """
 STATUS_TABLE = [
     ["DauLv",  "Visible alive prepupa that stopped feeding"],
     ["DeadLv",  "Dead visible larva"],
+    ["Hatched",  "Brood cell with hatched bee or wasp traces (cocoon or exuvia)"],
     ["Lv",      "Visible alive larva"],
     ["OldFood", "Brood cell with only unconsumed food, no larva will develop"],
 ]
                 inputs=img,
             )
 if __name__ == "__main__":
+    demo.launch()