SAM3

Running on Zero

App Files Files Community

fdsgsfjsfg commited on 27 days ago

Commit

80c2529

verified ·

1 Parent(s): 54b86f4

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -24

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from PIL import Image
 import gc
 import os
 import spaces
 from transformers import Sam3Model, Sam3Processor
 HF_TOKEN = os.getenv("HF_TOKEN")
@@ -52,38 +53,42 @@ def overlay_masks(image, masks, alpha=0.6):
 @spaces.GPU
 def process_text_detection(image, text_query, threshold):
     if not image or not text_query: return None, "请输入图像和描述词"
     try:
         model, processor = get_model()
         inputs = processor(images=image, text=text_query, return_tensors="pt").to(device)
-        with torch.no_grad():
-            outputs = model(**inputs)
         results = processor.post_process_instance_segmentation(outputs, threshold=threshold, mask_threshold=0.5, target_sizes=inputs.get("original_sizes").tolist())[0]
         masks = results["masks"]
-        scores = results["scores"].cpu().numpy() if "scores" in results else []
         result_img = overlay_masks(image, masks)
-        if len(masks) > 0:
-            status = f"✅ 检测到 {len(masks)} 个目标。"
-            if len(scores) > 0: status += f" 置信度范围: {np.min(scores):.2f}-{np.max(scores):.2f}"
-        else:
-            status = "❓ 未找到匹配目标，请尝试调低阈值或修改提示词。"
         return result_img, status
     except Exception as e:
         return image, f"❌ 错误: {str(e)}"
-# 就在这一行，去掉了 theme 相关的参数
-with gr.Blocks() as demo:
-    gr.Markdown("# 🚀 SAM 3 文本自动检测工具")
-    with gr.Row():
-        with gr.Column():
-            t_img_in = gr.Image(type="pil", label="上传原图")
-            t_query = gr.Textbox(label="输入检测内容(英文)", placeholder="例如: watermark, logo", value="watermark")
-            t_thresh = gr.Slider(0.1, 0.9, value=0.3, step=0.05, label="灵敏度(越低找得越多)")
-            t_btn = gr.Button("开始自动检测", variant="primary")
-        with gr.Column():
-            t_img_out = gr.Image(type="pil", label="检测结果 (遮罩高亮)")
-            t_info = gr.Textbox(label="状态信息")
-    t_btn.click(process_text_detection, [t_img_in, t_query, t_thresh], [t_img_out, t_info])
-if __name__ == "__main__":
-    demo.launch()

 import gc
 import os
 import spaces
+import cv2  # 新增：用于图像样本的坐标定位
 from transformers import Sam3Model, Sam3Processor
 HF_TOKEN = os.getenv("HF_TOKEN")
 @spaces.GPU
 def process_text_detection(image, text_query, threshold):
+    """文本检测模式"""
     if not image or not text_query: return None, "请输入图像和描述词"
     try:
         model, processor = get_model()
         inputs = processor(images=image, text=text_query, return_tensors="pt").to(device)
+        with torch.no_grad(): outputs = model(**inputs)
         results = processor.post_process_instance_segmentation(outputs, threshold=threshold, mask_threshold=0.5, target_sizes=inputs.get("original_sizes").tolist())[0]
         masks = results["masks"]
         result_img = overlay_masks(image, masks)
+        status = f"✅ 文本检测完成！找到 {len(masks)} 个目标。" if len(masks) > 0 else "❓ 未找到目标，请调低阈值。"
         return result_img, status
     except Exception as e:
         return image, f"❌ 错误: {str(e)}"
+@spaces.GPU
+def process_sample_detection(main_image, sample_image):
+    """样本截图检测模式 (OpenCV 定位 + SAM3 分割)"""
+    if not main_image or not sample_image: return None, "请上传主图和样本截图"
+    try:
+        model, processor = get_model()
+        # 1. 使用 OpenCV 进行模板匹配找到截图在主图中的坐标
+        main_cv = cv2.cvtColor(np.array(main_image), cv2.COLOR_RGB2BGR)
+        sample_cv = cv2.cvtColor(np.array(sample_image), cv2.COLOR_RGB2BGR)
+        # 检查样本是否比主图大
+        if sample_cv.shape[0] > main_cv.shape[0] or sample_cv.shape[1] > main_cv.shape[1]:
+            return main_image, "❌ 错误：样本截图不能比主图还大！"
+        result = cv2.matchTemplate(main_cv, sample_cv, cv2.TM_CCOEFF_NORMED)
+        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
+        # 如果相似度太低，说明没找到
+        if max_val < 0.4:
+            return main_image, f"❓ 未在主图中找到该样本 (最高匹配度: {max_val:.2f})。请确保截图来自该原图。"
+        # 计算 Bounding Box [x_min, y_min, x_max, y_max]
+        h, w = sample_cv.shape[:2]
+        box =