Spaces:

smartfalcon-ai
/

Industrial-Defect-Detection

Running

asamasach Claude Sonnet 4.5 commited on Jan 4

Commit

3972d40

1 Parent(s): 0ee3a29

Improve zero-shot detection: simplified CLIP logic and better visualization

CLIP Changes:
- Changed from 4-class to simple binary comparison (normal vs defect)
- Lowered default threshold from 0.5 to 0.25 (more sensitive)
- Now logs both normal and defect probabilities
- Returns defect probability directly instead of summing classes
- Added normal_score to detection metadata

Visualization Improvements:
- CLIP: Always shows anomaly score on image (even if no detection)
- CLIP: Shows defect vs normal scores in label
- CLIP: Green text when no anomaly detected
- OWL-ViT: Shows detection count
- OWL-ViT: Numbered detections (#1, #2, etc)
- Both: Show threshold used when no detection
- Thicker bounding boxes (3px instead of 2px)

This makes it much clearer what the models are seeing and why they did/didnt detect.

Test file added: test_zeroshot.py (creates synthetic defect images for testing)

🤖 Generated with Claude Code

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (3) hide show

__pycache__/app.cpython-313.pyc +0 -0
app.py +46 -26
test_zeroshot.py +116 -0

__pycache__/app.cpython-313.pyc ADDED Viewed

Binary file (30.2 kB). View file

app.py CHANGED Viewed

@@ -96,12 +96,12 @@ def extract_bboxes_from_heatmap(heatmap_path: str, orig_w: int, orig_h: int, thr
         return []
-def run_clip_anomaly_inference(image_bytes: bytes, confidence: float = 0.5):
     """
     Run zero-shot anomaly detection using CLIP similarity scoring.
-    This uses CLIP to compare image patches against "normal" vs "defect" descriptions.
-    Simple but effective for general anomaly detection.
     """
     try:
         from transformers import CLIPProcessor, CLIPModel
@@ -123,12 +123,10 @@ def run_clip_anomaly_inference(image_bytes: bytes, confidence: float = 0.5):
         processor = run_clip_anomaly_inference.processor
         model = run_clip_anomaly_inference.model
-        # Text descriptions for anomaly detection
         text_descriptions = [
-            "a photo of a normal product without defects",
-            "a photo of a defective product with anomalies",
-            "a photo with cracks or scratches",
-            "a photo with damage or imperfections"
         ]
         # Process inputs
@@ -145,31 +143,34 @@ def run_clip_anomaly_inference(image_bytes: bytes, confidence: float = 0.5):
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
-        # Get anomaly probability (sum of defect-related classes)
-        anomaly_prob = float(probs[0][1:].sum())  # Skip "normal" class
         detections = []
-        # If anomaly detected, create detection box
-        if anomaly_prob >= confidence:
-            # Create a detection for the whole image
-            # In a real scenario, you'd segment the anomalous region
             detections.append({
                 "bbox": [0, 0, orig_w, orig_h],
-                "confidence": anomaly_prob,
                 "class_id": 0,
                 "class_name": "anomaly",
                 "x1": 0,
                 "y1": 0,
                 "x2": orig_w,
                 "y2": orig_h,
-                "anomaly_score": anomaly_prob,
                 "model_type": "clip",
-                "description": "CLIP-based anomaly detection"
             })
-        logger.info(f"CLIP anomaly score: {anomaly_prob:.3f}, detections: {len(detections)}")
-        return detections, anomaly_prob
     except Exception as e:
         logger.error(f"CLIP inference error: {e}")
@@ -465,16 +466,26 @@ def gradio_inference(image, model_display_name, conf_threshold):
         detections, anomaly_score = run_clip_anomaly_inference(image_bytes, confidence=conf_threshold)
         for det in detections:
             x1 = int(det["x1"])
             y1 = int(det["y1"])
             x2 = int(det["x2"])
             y2 = int(det["y2"])
             score = det["confidence"]
-            label = f"anomaly:{score:.2f}"
-            cv2.rectangle(img_bgr, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Red for anomalies
-            cv2.putText(img_bgr, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
         return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
@@ -485,7 +496,12 @@ def gradio_inference(image, model_display_name, conf_threshold):
         detections = run_owlvit_inference(image_bytes, confidence=conf_threshold)
-        for det in detections:
             x1 = int(det["x1"])
             y1 = int(det["y1"])
             x2 = int(det["x2"])
@@ -493,9 +509,13 @@ def gradio_inference(image, model_display_name, conf_threshold):
             score = det["confidence"]
             class_name = det.get("class_name", "object")
-            label = f"{class_name}:{score:.2f}"
-            cv2.rectangle(img_bgr, (x1, y1), (x2, y2), (255, 0, 0), 2)  # Blue for OWL-ViT
-            cv2.putText(img_bgr, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
         return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

         return []
+def run_clip_anomaly_inference(image_bytes: bytes, confidence: float = 0.25):
     """
     Run zero-shot anomaly detection using CLIP similarity scoring.
+    This uses CLIP to compare the image against "normal" vs "defect" descriptions.
+    Returns detection if the image is more similar to defect descriptions than normal.
     """
     try:
         from transformers import CLIPProcessor, CLIPModel
         processor = run_clip_anomaly_inference.processor
         model = run_clip_anomaly_inference.model
+        # Simpler binary comparison: normal vs defect
         text_descriptions = [
+            "a high quality product without any defects or anomalies",
+            "a defective product with visible defects, cracks, scratches, or damage"
         ]
         # Process inputs
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
+        # Get probabilities
+        normal_prob = float(probs[0][0])
+        defect_prob = float(probs[0][1])
+        logger.info(f"CLIP probabilities - Normal: {normal_prob:.3f}, Defect: {defect_prob:.3f}")
         detections = []
+        # If defect probability is higher than threshold, create detection
+        # This means the image looks more like a defect than normal
+        if defect_prob >= confidence:
             detections.append({
                 "bbox": [0, 0, orig_w, orig_h],
+                "confidence": defect_prob,
                 "class_id": 0,
                 "class_name": "anomaly",
                 "x1": 0,
                 "y1": 0,
                 "x2": orig_w,
                 "y2": orig_h,
+                "anomaly_score": defect_prob,
+                "normal_score": normal_prob,
                 "model_type": "clip",
+                "description": f"CLIP anomaly detection (defect:{defect_prob:.2f} vs normal:{normal_prob:.2f})"
             })
+        logger.info(f"CLIP result - Defect score: {defect_prob:.3f}, Detections: {len(detections)}")
+        return detections, defect_prob
     except Exception as e:
         logger.error(f"CLIP inference error: {e}")
         detections, anomaly_score = run_clip_anomaly_inference(image_bytes, confidence=conf_threshold)
+        # Add text showing anomaly score even if no detection
+        status_text = f"Anomaly Score: {anomaly_score:.3f}"
+        cv2.putText(img_bgr, status_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
+        cv2.putText(img_bgr, status_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 1)
         for det in detections:
             x1 = int(det["x1"])
             y1 = int(det["y1"])
             x2 = int(det["x2"])
             y2 = int(det["y2"])
             score = det["confidence"]
+            normal_score = det.get("normal_score", 0)
+            label = f"DEFECT:{score:.2f} (vs normal:{normal_score:.2f})"
+            cv2.rectangle(img_bgr, (x1, y1), (x2, y2), (0, 0, 255), 3)  # Red for anomalies
+            cv2.putText(img_bgr, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+        if not detections:
+            no_detect_text = f"No anomaly detected (threshold: {conf_threshold:.2f})"
+            cv2.putText(img_bgr, no_detect_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
         return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
         detections = run_owlvit_inference(image_bytes, confidence=conf_threshold)
+        # Add detection count
+        status_text = f"OWL-ViT Detections: {len(detections)}"
+        cv2.putText(img_bgr, status_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
+        cv2.putText(img_bgr, status_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 1)
+        for i, det in enumerate(detections):
             x1 = int(det["x1"])
             y1 = int(det["y1"])
             x2 = int(det["x2"])
             score = det["confidence"]
             class_name = det.get("class_name", "object")
+            label = f"#{i+1} {class_name}:{score:.2f}"
+            cv2.rectangle(img_bgr, (x1, y1), (x2, y2), (255, 0, 0), 3)  # Blue for OWL-ViT
+            cv2.putText(img_bgr, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
+        if not detections:
+            no_detect_text = f"No objects detected (threshold: {conf_threshold:.2f})"
+            cv2.putText(img_bgr, no_detect_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
         return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

test_zeroshot.py ADDED Viewed

	@@ -0,0 +1,116 @@

+"""
+Test script to verify zero-shot models are working properly.
+"""
+import cv2
+import numpy as np
+import sys
+import os
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(__file__))
+from app import run_clip_anomaly_inference, run_owlvit_inference
+def create_test_image_with_defect():
+    """Create a simple test image with a visible defect."""
+    # Create white background
+    img = np.ones((640, 640, 3), dtype=np.uint8) * 255
+    # Draw a normal grid pattern
+    for i in range(0, 640, 80):
+        cv2.line(img, (i, 0), (i, 640), (200, 200, 200), 2)
+        cv2.line(img, (0, i), (640, i), (200, 200, 200), 2)
+    # Draw a "defect" - irregular shapes
+    cv2.circle(img, (320, 320), 50, (0, 0, 0), -1)  # Black circle (defect)
+    cv2.rectangle(img, (100, 100), (150, 180), (50, 50, 50), -1)  # Dark rectangle (scratch)
+    # Save the test image
+    cv2.imwrite("test_defect_image.jpg", img)
+    # Convert to bytes
+    _, img_encoded = cv2.imencode('.jpg', img)
+    return img_encoded.tobytes()
+def create_normal_test_image():
+    """Create a simple test image without defects."""
+    # Create white background
+    img = np.ones((640, 640, 3), dtype=np.uint8) * 255
+    # Draw a normal grid pattern only
+    for i in range(0, 640, 80):
+        cv2.line(img, (i, 0), (i, 640), (200, 200, 200), 2)
+        cv2.line(img, (0, i), (640, i), (200, 200, 200), 2)
+    # Save the test image
+    cv2.imwrite("test_normal_image.jpg", img)
+    # Convert to bytes
+    _, img_encoded = cv2.imencode('.jpg', img)
+    return img_encoded.tobytes()
+def test_clip():
+    """Test CLIP anomaly detection."""
+    print("\n" + "="*60)
+    print("Testing CLIP Anomaly Detection")
+    print("="*60)
+    # Test with defect image
+    print("\n1. Testing with DEFECT image (should detect anomaly)...")
+    defect_image = create_test_image_with_defect()
+    detections, score = run_clip_anomaly_inference(defect_image, confidence=0.3)
+    print(f"   Anomaly Score: {score:.4f}")
+    print(f"   Detections: {len(detections)}")
+    if detections:
+        for i, det in enumerate(detections):
+            print(f"   Detection {i+1}: {det}")
+    else:
+        print("   ⚠️ NO DETECTIONS (this is the problem!)")
+    # Test with normal image
+    print("\n2. Testing with NORMAL image (should NOT detect anomaly)...")
+    normal_image = create_normal_test_image()
+    detections, score = run_clip_anomaly_inference(normal_image, confidence=0.3)
+    print(f"   Anomaly Score: {score:.4f}")
+    print(f"   Detections: {len(detections)}")
+    if detections:
+        print("   ⚠️ False positive detected!")
+    else:
+        print("   ✓ Correctly identified as normal")
+def test_owlvit():
+    """Test OWL-ViT object detection."""
+    print("\n" + "="*60)
+    print("Testing OWL-ViT Object Detection")
+    print("="*60)
+    # Test with defect image
+    print("\n1. Testing with DEFECT image...")
+    defect_image = create_test_image_with_defect()
+    detections = run_owlvit_inference(defect_image, confidence=0.05)
+    print(f"   Detections: {len(detections)}")
+    if detections:
+        for i, det in enumerate(detections):
+            print(f"   Detection {i+1}: bbox={det['bbox']}, conf={det['confidence']:.4f}, class={det['class_name']}")
+    else:
+        print("   ⚠️ NO DETECTIONS (this is the problem!)")
+if __name__ == "__main__":
+    print("Testing Zero-Shot Models")
+    print("This will create test images and run inference")
+    try:
+        test_clip()
+        test_owlvit()
+        print("\n" + "="*60)
+        print("Test Complete!")
+        print("="*60)
+        print("\nTest images saved:")
+        print("  - test_defect_image.jpg (has defects)")
+        print("  - test_normal_image.jpg (normal)")
+    except Exception as e:
+        print(f"\n❌ ERROR: {e}")
+        import traceback
+        traceback.print_exc()