Spaces:

TangYiJay
/

imagelanguage

Sleeping

File size: 3,584 Bytes

0fe4fae
 
7d26df7
04ec2fd
bcd3fcc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d26df7
bcd3fcc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de0a685
bcd3fcc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366963e
0fe4fae
bcd3fcc
 
 
 
366963e
bcd3fcc
 
 
 
366963e
bcd3fcc
 
 
366963e
bcd3fcc
 
366963e
bcd3fcc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d26df7
de0a685

import cv2
import numpy as np
from PIL import Image
import torch
import gradio as gr
from transformers import AutoImageProcessor, AutoModelForImageClassification

# ---- Step 1: 模型加载 ----
MODEL_ID = "prithivMLmods/Trash-Net"
processor = AutoImageProcessor.from_pretrained(MODEL_ID)
model = AutoModelForImageClassification.from_pretrained(MODEL_ID)

# ---- Step 2: 专业裁剪（基于OpenCV结构化差异检测） ----
def smart_crop(base_img: Image.Image, new_img: Image.Image):
    if base_img is None or new_img is None:
        return None, "Missing image input."

    # 转为OpenCV格式
    base = np.array(base_img.convert("RGB"))
    new = np.array(new_img.convert("RGB"))
    base = cv2.resize(base, (224, 224))
    new = cv2.resize(new, (224, 224))

    # 转灰度 + 高斯模糊减少噪声
    base_gray = cv2.GaussianBlur(cv2.cvtColor(base, cv2.COLOR_RGB2GRAY), (5,5), 0)
    new_gray = cv2.GaussianBlur(cv2.cvtColor(new, cv2.COLOR_RGB2GRAY), (5,5), 0)

    # 差异检测
    diff = cv2.absdiff(base_gray, new_gray)
    _, thresh = cv2.threshold(diff, 40, 255, cv2.THRESH_BINARY)

    # 形态学操作：去噪 + 连通区域扩大
    kernel = np.ones((5,5), np.uint8)
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    thresh = cv2.dilate(thresh, kernel, iterations=2)

    # 查找轮廓
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None, "No significant object difference detected."

    # 找最大差异区域
    contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(contour)

    # 裁剪区域并略微扩大边缘
    margin = 10
    x1 = max(0, x - margin)
    y1 = max(0, y - margin)
    x2 = min(new.shape[1], x + w + margin)
    y2 = min(new.shape[0], y + h + margin)

    cropped = new[y1:y2, x1:x2]
    cropped_pil = Image.fromarray(cropped)
    return cropped_pil, None

# ---- Step 3: TrashNet 分类 ----
def classify_trash(image: Image.Image):
    if image is None:
        return "No image to classify.", 0.0
    inputs = processor(images=image, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        preds = torch.nn.functional.softmax(outputs.logits, dim=-1)
        label = model.config.id2label[preds.argmax().item()]
        confidence = preds.max().item()
    return label, confidence

# ---- Step 4: 主逻辑 ----
def detect_and_classify(base_img, new_img):
    if base_img is None or new_img is None:
        return "⚠️ Please upload both base and new images.", None, None

    cropped, error = smart_crop(base_img, new_img)
    if error:
        return f"⚠️ {error}", None, None

    label, conf = classify_trash(cropped)
    return f"✅ Object classified as: {label} ({conf*100:.2f}% confidence)", cropped, label

# ---- Step 5: Gradio 界面 ----
with gr.Blocks(title="Smart Trash Detector") as demo:
    gr.Markdown("# ♻️ Smart Trash Detection (with professional background subtraction)\nUpload a **base image**, then a **new image**.")

    with gr.Row():
        base_img = gr.Image(label="Base Image", type="pil")
        new_img = gr.Image(label="New Image", type="pil")

    run_btn = gr.Button("Analyze and Classify")

    with gr.Row():
        result_text = gr.Textbox(label="Result")
        result_crop = gr.Image(label="Detected Object (Cropped)")
        result_label = gr.Textbox(label="Class")

    run_btn.click(detect_and_classify, [base_img, new_img], [result_text, result_crop, result_label])

if __name__ == "__main__":
    demo.launch()