Spaces:

eho69
/

scanning

Sleeping

File size: 20,923 Bytes

# import gradio as gr
# import cv2
# import numpy as np
# from PIL import Image
# import torch
# from transformers import DetrImageProcessor, DetrForObjectDetection
# import os

# try:
#     import spaces
# except ImportError:
#     # Mocking spaces for local development
#     class spaces:
#         @staticmethod
#         def GPU(func):
#             def wrapper(*args, **kwargs):
#                 return func(*args, **kwargs)
#             return wrapper

# # Load DETR model for object detection
# try:
#     processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
#     model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
#     # Warm up the model
#     dummy_input = processor(images=np.zeros((100, 100, 3)), return_tensors="pt")
#     with torch.no_grad():
#         _ = model(**dummy_input)
# except Exception as e:
#     print(f"Error loading model: {e}")
#     processor = None
#     model = None

# @spaces.GPU
# def detect_objects(image):
    
#     if model is None or processor is None:
#         return image, "Model failed to load. Please check logs."

#     # Convert to PIL if it's already a numpy array (to avoid rescaling warnings in processor)
#     if isinstance(image, np.ndarray):
#         image_pil = Image.fromarray(image)
#     else:
#         image_pil = image
    
#     # Ensure it's RGB
#     if image_pil.mode != "RGB":
#         image_pil = image_pil.convert("RGB")
    
#     # Prepare image for the model
#     inputs = processor(images=image_pil, return_tensors="pt")
    
#     with torch.no_grad():
#         outputs = model(**inputs)
    
#     # Post-process outputs
#     target_sizes = torch.tensor([image_pil.size[::-1]]) # (height, width)
#     results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]
    
#     # Draw bounding boxes on the image (using numpy for cv2)
#     annotated_image = np.array(image_pil)
    
#     detection_info = []
    
#     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
#         box = [round(i, 2) for i in box.tolist()]
#         label_name = model.config.id2label[label.item()]
#         confidence = round(score.item(), 3)
        
#         # Draw rectangle
#         x1, y1, x2, y2 = map(int, box)
#         cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
#         # Add label
#         label_text = f"{label_name}: {confidence}"
#         cv2.putText(annotated_image, label_text, (x1, y1 - 10), 
#                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
#         detection_info.append(f"• {label_name} (confidence: {confidence})")
    
#     # Create summary text
#     summary = f"**Detected {len(detection_info)} object(s):**\n\n" + "\n".join(detection_info) if detection_info else "No objects detected with confidence > 0.7"
    
#     return annotated_image, summary

# def scan_edges(image):
#     """
#     Simple edge detection using OpenCV
#     """
#     # Convert PIL image to numpy array
#     if isinstance(image, Image.Image):
#         image = np.array(image)
    
#     # Convert to grayscale
#     gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    
#     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    
#     # Apply Gaussian blur
#     blurred = cv2.GaussianBlur(gray, (5, 5), 0)

#     # enhanced = clahe.apply(gray_image)
    
#     # Edge detection using Canny
#     edges = cv2.Canny(blurred, 50, 150)
    
#     # Convert back to RGB for display
#     edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
    
#     return edges_rgb

# # def scan_edges(image):

# #     # --- 1. Convert PIL image to numpy array if needed ---
# #     if isinstance(image, Image.Image):
# #         image = np.array(image)

# #     # --- 2. Convert to grayscale ---
# #     gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

# #     # --- 3. Gaussian blur BEFORE CLAHE to reduce high-freq noise ---
# #     #        that CLAHE would otherwise amplify
# #     blurred = cv2.GaussianBlur(gray, (5, 5), 0)

# #     # --- 4. Apply CLAHE on the blurred image ---
# #     #        clipLimit=2.0  → controls noise amplification in flat regions
# #     #        tileGridSize   → 8x8 tiles work well for engine block scale features;
# #     #                         increase (e.g. 16x16) if bearing saddles are small
# #     #                         relative to full image resolution
# #     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
# #     enhanced = clahe.apply(blurred)

# #     # --- 5. Canny edge detection on CLAHE-enhanced image ---
# #     #        Lower threshold (30) helps recover weak edges in shadow regions
# #     #        Upper threshold (120) keeps strong structural edges
# #     #        Tune these if you get too much noise or missing arcs
# #     edges = cv2.Canny(enhanced, 30, 120)

# #     # --- 6. Convert single-channel edge map back to RGB for display ---
# #     edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

# #     return edges_rgb

# def process_image(image, mode):
#     """
#     Process image based on selected mode
#     """
#     if image is None:
#         return None, "Please upload an image."
        
#     if mode == "Object Detection":
#         return detect_objects(image)
#     else:  # Edge Detection
#         edges = scan_edges(image)
#         return edges, "Edge detection completed"

# # Create Gradio interface
# with gr.Blocks(title="Object Scanner") as demo:
#     gr.Markdown("Detect objects or scan edges using your camera or uploaded images")
    
#     with gr.Tabs():
#         with gr.TabItem(" Image Scanner"):
#             with gr.Row():
#                 with gr.Column():
#                     input_image = gr.Image(
#                         sources=["upload", "webcam"], 
#                         type="pil", 
#                         label="Upload or Capture Image"
#                     )
#                     mode = gr.Radio(
#                         choices=["Object Detection", "Edge Detection"],
#                         value="Object Detection",
#                         label="Scanning Mode"
#                     )
#                     scan_btn = gr.Button(" Process Image", variant="primary")
                
#                 with gr.Column():
#                     output_image = gr.Image(type="numpy", label="Processed Result")
#                     output_text = gr.Markdown(label="Detection Results")
            
#             # Examples
#             if os.path.exists("examples"):
#                 gr.Examples(
#                     examples=[
#                         ["examples/sample1.jpg", "Object Detection"],
#                         ["examples/sample2.jpg", "Edge Detection"],
#                     ],
#                     inputs=[input_image, mode],
#                     outputs=[output_image, output_text],
#                     fn=process_image,
#                     cache_examples=False,
#                 )

#         with gr.TabItem("🎥 Live Edge Scan"):
#             gr.Markdown("### Real-time Edge Detection")
#             with gr.Row():
#                 with gr.Column():
#                     camera_input = gr.Image(
#                         sources=["webcam"], 
#                         streaming=True, 
#                         type="numpy",
#                         label="Live Feed"
#                     )
#                 with gr.Column():
#                     camera_output = gr.Image(
#                         label="Edge Stream"
#                     )
            
#             # Live stream logic for edges
#             camera_input.stream(
#                 fn=scan_edges,
#                 inputs=camera_input,
#                 outputs=camera_output
#             )
    
#     # Static scan logic
#     scan_btn.click(
#         fn=process_image,
#         inputs=[input_image, mode],
#         outputs=[output_image, output_text]
#     )

# if __name__ == "__main__":
#     demo.launch(
#         server_name="0.0.0.0", 
#         server_port=7860, 
#         theme=gr.themes.Soft(),
#         ssr_mode=False
#     )

import gradio as gr
import cv2
import numpy as np
from PIL import Image
import torch
from transformers import DetrImageProcessor, DetrForObjectDetection
import os

try:
    import spaces
except ImportError:
    # Mocking spaces for local development
    class spaces:
        @staticmethod
        def GPU(func):
            def wrapper(*args, **kwargs):
                return func(*args, **kwargs)
            return wrapper

# Load DETR model for object detection
try:
    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
    # Warm up the model
    dummy_input = processor(images=np.zeros((100, 100, 3)), return_tensors="pt")
    with torch.no_grad():
        _ = model(**dummy_input)
except Exception as e:
    print(f"Error loading model: {e}")
    processor = None
    model = None

@spaces.GPU
def detect_objects(image):
    
    if model is None or processor is None:
        return image, "Model failed to load. Please check logs."

    # Convert to PIL if it's already a numpy array (to avoid rescaling warnings in processor)
    if isinstance(image, np.ndarray):
        image_pil = Image.fromarray(image)
    else:
        image_pil = image
    
    # Ensure it's RGB
    if image_pil.mode != "RGB":
        image_pil = image_pil.convert("RGB")
    
    # Prepare image for the model
    inputs = processor(images=image_pil, return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Post-process outputs
    target_sizes = torch.tensor([image_pil.size[::-1]]) # (height, width)
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]
    
    # Draw bounding boxes on the image (using numpy for cv2)
    annotated_image = np.array(image_pil)
    
    detection_info = []
    
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        box = [round(i, 2) for i in box.tolist()]
        label_name = model.config.id2label[label.item()]
        confidence = round(score.item(), 3)
        
        # Draw rectangle
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Add label
        label_text = f"{label_name}: {confidence}"
        cv2.putText(annotated_image, label_text, (x1, y1 - 10), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        detection_info.append(f"• {label_name} (confidence: {confidence})")
    
    # Create summary text
    summary = f"**Detected {len(detection_info)} object(s):**\n\n" + "\n".join(detection_info) if detection_info else "No objects detected with confidence > 0.7"
    
    return annotated_image, summary


def scan_edges(image):
    """
    Edge detection with CLAHE preprocessing to recover edges lost in
    shadowed regions (e.g. bearing saddle arcs on engine blocks).
    Pipeline:
        RGB → Grayscale → Gaussian Blur → CLAHE → Canny → Closing
    """
    # 1. Convert PIL image to numpy array if needed
    if isinstance(image, Image.Image):
        image = np.array(image)

    # 2. Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # 3. Gaussian blur BEFORE CLAHE — prevents CLAHE from amplifying
    #    surface noise on metallic/reflective parts
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # 4. CLAHE — boosts local contrast in dark/shadowed regions
    #    clipLimit=2.0  : caps noise amplification in uniform areas
    #    tileGridSize   : 8x8 tiles suit engine-block scale features;
    #                     use (16,16) if bearing saddles are small in frame
    clahe = cv2.createCLAHE(clipLimit=9.9, tileGridSize=(8, 8))
    enhanced = clahe.apply(blurred)

    # 5. Canny on CLAHE-enhanced image
    #    Thresholds lowered vs original (50,150) → (30,120) so weak edges
    #    in shadowed saddle arcs are no longer missed
    edges = cv2.Canny(enhanced, 30, 120)

    # 6. Morphological Closing Operation - Fills gaps in broken edges
    #    STEP 1: Dilation (Grow phase) - Expands white pixels to connect gaps
    #    STEP 2: Erosion (Shrink phase) - Shrinks back to original thickness
    #    kernel size 3x3 is suitable for small gaps; use 5x5 for larger gaps
    kernel = np.ones((3, 3), np.uint8)
    closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=1)

    # 7. Convert back to RGB for Gradio display
    edges_rgb = cv2.cvtColor(closed_edges, cv2.COLOR_GRAY2RGB)

    return edges_rgb


def extract_green_channel(image):
    """
    Extract the green channel from an RGB image.
    Green channel often provides good contrast for vegetation and certain materials.
    """
    # 1. Convert PIL image to numpy array if needed
    if isinstance(image, Image.Image):
        image = np.array(image)
    
    # 2. Extract green channel (index 1 in RGB)
    green_channel = image[:, :, 1]
    
    # 3. Convert to RGB for display (all channels = green)
    green_rgb = cv2.cvtColor(green_channel, cv2.COLOR_GRAY2RGB)
    
    return green_rgb


def green_bilateral_edges(image):
    """
    Edge detection using green channel with bilateral filtering.
    Pipeline:
        RGB → Green Channel → Bilateral Filter → Canny Edge Detection → Closing
    
    Bilateral filtering preserves edges while reducing noise, making it ideal
    for edge detection on noisy or textured surfaces.
    """
    # 1. Convert PIL image to numpy array if needed
    if isinstance(image, Image.Image):
        image = np.array(image)
    
    # 2. Extract green channel
    green_channel = image[:, :, 1]
    
    # 3. Apply bilateral filter
    #    d=9           : diameter of pixel neighborhood
    #    sigmaColor=75 : filter sigma in the color space (larger = more colors mixed)
    #    sigmaSpace=75 : filter sigma in the coordinate space (larger = farther pixels influence)
    #    Bilateral filtering smooths flat regions while preserving sharp edges
    bilateral = cv2.bilateralFilter(green_channel, d=9, sigmaColor=75, sigmaSpace=75)
    
    # 4. Apply Canny edge detection
    #    Using moderate thresholds for balanced edge detection
    edges = cv2.Canny(bilateral, 50, 150)
    
    # 5. Morphological Closing Operation - Fills gaps in broken edges
    #    STEP 1: Dilation (Grow phase) - Expands white pixels to connect gaps
    #    STEP 2: Erosion (Shrink phase) - Shrinks back to original thickness
    #    kernel size 3x3 is suitable for small gaps; use 5x5 for larger gaps
    kernel = np.ones((3, 3), np.uint8)
    closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=1)
    
    # 6. Convert back to RGB for Gradio display
    edges_rgb = cv2.cvtColor(closed_edges, cv2.COLOR_GRAY2RGB)
    
    return edges_rgb


def process_image(image, mode):
    """
    Process image based on selected mode
    """
    if image is None:
        return None, "Please upload an image."
        
    if mode == "Object Detection":
        return detect_objects(image)
    elif mode == "Edge Detection":
        edges = scan_edges(image)
        return edges, "Edge detection completed (CLAHE + Canny + Closing)"
    elif mode == "Green Channel":
        green = extract_green_channel(image)
        return green, "Green channel extracted"
    elif mode == "Green + Bilateral Edges":
        edges = green_bilateral_edges(image)
        return edges, "Edge detection completed (Green Channel + Bilateral Filter + Canny + Closing)"
    else:
        return image, "Unknown mode selected"


def process_live_stream(image, mode):
    """
    Process live stream based on selected mode
    """
    if image is None:
        return None
        
    if mode == "Edge Detection":
        return scan_edges(image)
    elif mode == "Green Channel":
        return extract_green_channel(image)
    elif mode == "Green + Bilateral Edges":
        return green_bilateral_edges(image)
    else:
        return scan_edges(image)  # Default to edge detection


# Create Gradio interface
with gr.Blocks(title="Object Scanner") as demo:
    gr.Markdown("# 🔍 Object Scanner\nDetect objects, scan edges, or extract green channel using your camera or uploaded images")
    
    with gr.Tabs():
        with gr.TabItem("📷 Image Scanner"):
            with gr.Row():
                with gr.Column():
                    input_image = gr.Image(
                        sources=["upload", "webcam"], 
                        type="pil", 
                        label="Upload or Capture Image"
                    )
                    mode = gr.Radio(
                        choices=[
                            "Object Detection", 
                            "Edge Detection", 
                            "Green Channel",
                            "Green + Bilateral Edges"
                        ],
                        value="Object Detection",
                        label="Scanning Mode"
                    )
                    scan_btn = gr.Button("🔍 Process Image", variant="primary")
                
                with gr.Column():
                    output_image = gr.Image(type="numpy", label="Processed Result")
                    output_text = gr.Markdown(label="Detection Results")
            
            # Examples
            if os.path.exists("examples"):
                gr.Examples(
                    examples=[
                        ["examples/sample1.jpg", "Object Detection"],
                        ["examples/sample2.jpg", "Edge Detection"],
                        ["examples/sample1.jpg", "Green Channel"],
                        ["examples/sample2.jpg", "Green + Bilateral Edges"],
                    ],
                    inputs=[input_image, mode],
                    outputs=[output_image, output_text],
                    fn=process_image,
                    cache_examples=False,
                )

        with gr.TabItem("🎥 Live Processing"):
            gr.Markdown("### Real-time Image Processing")
            with gr.Row():
                with gr.Column():
                    camera_input = gr.Image(
                        sources=["webcam"], 
                        streaming=True, 
                        type="numpy",
                        label="Live Feed"
                    )
                    live_mode = gr.Radio(
                        choices=[
                            "Edge Detection",
                            "Green Channel",
                            "Green + Bilateral Edges"
                        ],
                        value="Edge Detection",
                        label="Processing Mode"
                    )
                with gr.Column():
                    camera_output = gr.Image(
                        label="Processed Stream"
                    )
            
            # Live stream logic
            camera_input.stream(
                fn=lambda img, mode: process_live_stream(img, mode),
                inputs=[camera_input, live_mode],
                outputs=camera_output
            )
    
    # Static scan logic
    scan_btn.click(
        fn=process_image,
        inputs=[input_image, mode],
        outputs=[output_image, output_text]
    )
    
    # Info section
    with gr.Accordion("ℹ️ Mode Information", open=False):
        gr.Markdown("""
        ### Available Modes:
        
        **Object Detection** - Uses DETR model to detect and label objects with bounding boxes
        
        **Edge Detection** - CLAHE-enhanced Canny edge detection with morphological closing to fill gaps
        - Pipeline: Grayscale → Gaussian Blur → CLAHE → Canny → Closing
        - Closing operation connects broken edges (useful for interrupted arcs and curves)
        
        **Green Channel** - Extracts the green channel, useful for vegetation and certain materials
        
        **Green + Bilateral Edges** - Combines green channel extraction with bilateral filtering and closing
        - Pipeline: Green Channel → Bilateral Filter → Canny → Closing
        - Bilateral filtering preserves edges while reducing noise
        - Closing fills gaps in broken edges, ideal for textured surfaces
        
        ### Morphological Closing:
        - **Step 1 (Dilation)**: Expands white pixels to connect nearby edges
        - **Step 2 (Erosion)**: Shrinks pixels back to original size while keeping gaps filled
        - **Result**: Broken arcs and interrupted lines become continuous edges
        """)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0", 
        server_port=7860, 
        theme=gr.themes.Soft(),
        ssr_mode=False
    )