Spaces:

TangYiJay
/

pi

Runtime error

App Files Files Community

TangYiJay commited on Nov 2, 2025

Commit

1530376

verified ·

1 Parent(s): 30d9fb0

app.py

Browse files

Files changed (1) hide show

app.py +76 -75

app.py CHANGED Viewed

@@ -1,94 +1,95 @@
 import gradio as gr
-from PIL import Image
 import numpy as np
-import torch
-from transformers import (
-    AutoModelForImageSegmentation,
-    AutoProcessor,
-    AutoFeatureExtractor,
-    AutoModelForImageClassification,
-)
-# === ① Load SAM model for segmentation ===
-sam_model_id = "facebook/sam-vit-base"
-processor_sam = AutoProcessor.from_pretrained(sam_model_id)
-model_sam = AutoModelForImageSegmentation.from_pretrained(sam_model_id)
-# === ② Load garbage classification model ===
-cls_model_id = "yangy50/garbage-classification"
-extractor = AutoFeatureExtractor.from_pretrained(cls_model_id)
-cls_model = AutoModelForImageClassification.from_pretrained(cls_model_id)
-base_img = None  # Global memory for base image
-# === Step 1: Set base ===
 def set_base(image):
-    global base_img
-    if image is None:
-        return "Please upload an empty bin image."
-    base_img = image.convert("RGB")
-    return "✅ Base image saved successfully."
-# === Step 2: Detect and classify trash ===
-def detect_trash(image):
-    global base_img
-    if base_img is None:
-        return "Please set a base image first."
-    current_img = image.convert("RGB")
     # Convert to numpy
-    base_np = np.array(base_img).astype(np.float32)
-    current_np = np.array(current_img).astype(np.float32)
-    # Difference mask
-    diff = np.abs(current_np - base_np).mean(axis=2)
-    mask = (diff > 40).astype(np.uint8) * 255  # threshold
-    mask_img = Image.fromarray(mask).convert("RGB")
-    # Use SAM to refine the mask
-    inputs = processor_sam(images=current_img, segmentation_maps=mask_img, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model_sam(**inputs)
-    seg = outputs.pred_masks[0].cpu().numpy()
-    # Crop bounding box of detected trash
-    ys, xs = np.where(seg > 0.5)
-    if len(xs) == 0 or len(ys) == 0:
-        return "No significant object detected."
-    x1, x2, y1, y2 = xs.min(), xs.max(), ys.min(), ys.max()
-    cropped = current_img.crop((x1, y1, x2, y2))
-    # Classify the cropped object
-    cls_inputs = extractor(images=cropped, return_tensors="pt")
-    with torch.no_grad():
-        cls_out = cls_model(**cls_inputs)
-        probs = torch.nn.functional.softmax(cls_out.logits, dim=-1)
-        pred_idx = torch.argmax(probs, dim=-1).item()
-        pred_class = cls_model.config.id2label[pred_idx]
-    return f"🧩 Detected Material: {pred_class}"
-# === Build UI ===
 set_base_ui = gr.Interface(
     fn=set_base,
-    inputs=gr.Image(type="pil", label="Upload Empty Bin (Base)"),
-    outputs=gr.Textbox(label="Status"),
-    title="🧩 Set Base",
 )
 detect_trash_ui = gr.Interface(
     fn=detect_trash,
     inputs=gr.Image(type="pil", label="Upload Trash Image"),
-    outputs=gr.Textbox(label="Detection Result"),
-    title="♻️ Detect & Classify Trash",
-)
-demo = gr.TabbedInterface(
-    [set_base_ui, detect_trash_ui],
-    ["Set Base", "Detect Trash"]
 )
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import numpy as np
+from PIL import Image
+from segment_anything import sam_model_registry, SamPredictor
+from transformers import BlipProcessor, BlipForQuestionAnswering
+# ===== 1️⃣ Load models =====
+# SAM
+sam_checkpoint = "sam_vit_b_01ec64.pth"  # 上传到Space的checkpoint
+sam_model_type = "vit_b"
+sam_model = sam_model_registry[sam_model_type](checkpoint=sam_checkpoint)
+sam_predictor = SamPredictor(sam_model)
+# BLIP
+blip_model_name = "Salesforce/blip-vqa-base"
+blip_processor = BlipProcessor.from_pretrained(blip_model_name)
+blip_model = BlipForQuestionAnswering.from_pretrained(blip_model_name)
+# ===== 2️⃣ Global base image =====
+base_image = None
+# ===== 3️⃣ Set base =====
 def set_base(image):
+    global base_image
+    base_image = image
+    return "Base image saved successfully."
+# ===== 4️⃣ Detect trash =====
+def detect_trash(trash_image):
+    global base_image
+    if base_image is None:
+        return "Please upload a base image first."
     # Convert to numpy
+    base_np = np.array(base_image.resize(trash_image.size))
+    trash_np = np.array(trash_image)
+    # Compute simple difference mask
+    diff = np.abs(trash_np.astype(np.int16) - base_np.astype(np.int16))
+    mask = (diff.sum(axis=2) > 50).astype(np.uint8)  # binary mask
+    # Find bounding box from mask
+    coords = np.argwhere(mask)
+    if coords.size == 0:
+        return "No difference detected."
+    y0, x0 = coords.min(axis=0)
+    y1, x1 = coords.max(axis=0)
+    box = np.array([[x0, y0, x1, y1]])
+    # Use SAM to refine mask
+    sam_predictor.set_image(trash_np)
+    masks, scores, logits = sam_predictor.predict(boxes=box)
+    # Take largest mask
+    mask_refined = masks[0]
+    # Crop the masked area
+    ys, xs = np.where(mask_refined)
+    if ys.size == 0:
+        return "SAM did not find any object."
+    cropped = trash_np[ys.min():ys.max(), xs.min():xs.max()]
+    # Convert to PIL for BLIP
+    cropped_img = Image.fromarray(cropped)
+    # BLIP question
+    question = "What material is this? Choose from plastic, metal, paper, cardboard, glass, trash."
+    inputs = blip_processor(cropped_img, question, return_tensors="pt")
+    out = blip_model.generate(**inputs)
+    answer = blip_processor.decode(out[0], skip_special_tokens=True)
+    # Only allow predefined classes
+    valid_classes = ["plastic", "metal", "paper", "cardboard", "glass", "trash"]
+    result = next((c for c in valid_classes if c in answer.lower()), "trash")
+    return result.capitalize()
+# ===== 5️⃣ Gradio UI =====
 set_base_ui = gr.Interface(
     fn=set_base,
+    inputs=gr.Image(type="pil", label="Upload Base Image"),
+    outputs=gr.Textbox(label="Result"),
+    title="Set Base Image",
+    api_name="/set_base"
 )
 detect_trash_ui = gr.Interface(
     fn=detect_trash,
     inputs=gr.Image(type="pil", label="Upload Trash Image"),
+    outputs=gr.Textbox(label="Detected Material"),
+    title="Detect Trash Material",
+    api_name="/detect_trash"
 )
+demo = gr.TabbedInterface([set_base_ui, detect_trash_ui], ["Set Base", "Detect Trash"])
+demo.launch()