Spaces:

sagar007
/

SegmentVision

Sleeping

App Files Files Community

sagar007 commited on Nov 29, 2024

Commit

3d6a9c7

verified ·

1 Parent(s): eefe5b4

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -23

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import supervision as sv
 import os
 # Load CLIP model
-model = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 # Initialize FastSAM model
@@ -24,30 +24,44 @@ def process_image_clip(image, text_input):
     if not text_input:
         return "Please enter some text to check in the image."
-    # Process image for CLIP
-    inputs = processor(
-        images=image,
-        text=[text_input],
-        return_tensors="pt",
-        padding=True
-    )
-    # Get model predictions
-    outputs = model(**inputs)
-    logits_per_image = outputs.logits_per_image
-    probs = logits_per_image.softmax(dim=1)
-    confidence = float(probs[0][0])
-    return f"Confidence that the image contains '{text_input}': {confidence:.2%}"
 def process_image_fastsam(image):
     if image is None:
         return None
-    # Convert PIL image to numpy array
-    image_np = np.array(image)
     try:
         # Run FastSAM inference
         results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
@@ -79,7 +93,7 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
     with gr.Tab("CLIP Zero-Shot Classification"):
         with gr.Row():
-            image_input = gr.Image(type="pil", label="Input Image")
             text_input = gr.Textbox(
                 label="What do you want to check in the image?",
                 placeholder="e.g., 'a dog', 'sunset', 'people playing'",
@@ -88,13 +102,29 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
         output_text = gr.Textbox(label="Result")
         classify_btn = gr.Button("Classify")
         classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
     with gr.Tab("FastSAM Segmentation"):
         with gr.Row():
-            image_input_sam = gr.Image(type="pil", label="Input Image")
-            image_output = gr.Image(type="pil", label="Segmentation Result")
         segment_btn = gr.Button("Segment")
         segment_btn.click(fn=process_image_fastsam, inputs=[image_input_sam], outputs=image_output)
     gr.Markdown("""
     ### How to use:
@@ -106,4 +136,4 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
     - For best results, use clear images with good lighting
     """)
-demo.launch()

 import os
 # Load CLIP model
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 # Initialize FastSAM model
     if not text_input:
         return "Please enter some text to check in the image."
+    try:
+        # Convert numpy array to PIL Image if needed
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
+        # Create a list of candidate labels
+        candidate_labels = [text_input, f"not {text_input}"]
+        # Process image and text
+        inputs = processor(
+            images=image,
+            text=candidate_labels,
+            return_tensors="pt",
+            padding=True
+        )
+        # Get model predictions
+        outputs = model(**{k: v for k, v in inputs.items()})
+        logits_per_image = outputs.logits_per_image
+        probs = logits_per_image.softmax(dim=1)
+        # Get confidence for the positive label
+        confidence = float(probs[0][0])
+        return f"Confidence that the image contains '{text_input}': {confidence:.2%}"
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
 def process_image_fastsam(image):
     if image is None:
         return None
     try:
+        # Convert PIL image to numpy array if needed
+        if isinstance(image, Image.Image):
+            image_np = np.array(image)
+        else:
+            image_np = image
         # Run FastSAM inference
         results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
     with gr.Tab("CLIP Zero-Shot Classification"):
         with gr.Row():
+            image_input = gr.Image(label="Input Image")
             text_input = gr.Textbox(
                 label="What do you want to check in the image?",
                 placeholder="e.g., 'a dog', 'sunset', 'people playing'",
         output_text = gr.Textbox(label="Result")
         classify_btn = gr.Button("Classify")
         classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
+        gr.Examples(
+            examples=[
+                ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/kitchen/kitchen.png", "kitchen"],
+                ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/calculator/calculator.jpg", "calculator"],
+            ],
+            inputs=[image_input, text_input],
+        )
     with gr.Tab("FastSAM Segmentation"):
         with gr.Row():
+            image_input_sam = gr.Image(label="Input Image")
+            image_output = gr.Image(label="Segmentation Result")
         segment_btn = gr.Button("Segment")
         segment_btn.click(fn=process_image_fastsam, inputs=[image_input_sam], outputs=image_output)
+        gr.Examples(
+            examples=[
+                ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/kitchen/kitchen.png"],
+                ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/calculator/calculator.jpg"],
+            ],
+            inputs=[image_input_sam],
+        )
     gr.Markdown("""
     ### How to use:
     - For best results, use clear images with good lighting
     """)
+demo.launch(share=True)