Spaces:

scmlewis
/

image_captioning

Running

App Files Files Community

scmlewis commited on Oct 20, 2025

Commit

8f847e7

verified ·

1 Parent(s): f026d0c

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -39

app.py CHANGED Viewed

@@ -1,21 +1,18 @@
-from transformers import BlipProcessor, BlipForConditionalGeneration
-from ultralytics import YOLO
-import torch
-import gradio as gr
-from PIL import Image
-from collections import deque
-import numpy as np
-# Load BLIP and YOLOv5 models
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-detect_model = YOLO('yolov5s.pt')
-MEMORY_SIZE = 10  # Now only 10 in history
-last_images = deque([], maxlen=MEMORY_SIZE)
-last_captions = deque([], maxlen=MEMORY_SIZE)
 custom_css = """
 #app-title {
     text-align: center;
     font-size: 38px;
@@ -28,19 +25,6 @@ custom_css = """
     font-size: 19px;
     margin: 14px 0 22px 0;
 }
-/* Responsive + locked max-width */
-#main-app-area {
-    max-width: 600px;
-    margin-left: auto;
-    margin-right: auto;
-    padding: 0 8px;
-}
-@media (max-width: 700px) {
-    #main-app-area {
-        max-width: 98vw;
-        padding: 0 2vw;
-    }
-}
 #generate-btn {
     background: linear-gradient(90deg, #31b2fd 0%, #98f972 100%);
     color: white;
@@ -58,6 +42,22 @@ custom_css = """
 }
 """
 def preprocess_image(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
@@ -80,19 +80,15 @@ def generate_caption(image):
     out = model.generate(**inputs, max_length=30, num_beams=5, early_stopping=True)
     caption = processor.decode(out[0], skip_special_tokens=True)
     detected_objs = detect_objects(image)
-    # Update session memory
     last_images.append(image)
     last_captions.append(caption)
     tags = ", ".join(detected_objs) if detected_objs else "None"
     gallery = [(img, f"Detected objects: {tags}\nCaption: {caption}") for img, caption in zip(list(last_images), list(last_captions))]
     result_text = f"Detected objects: {tags}\nCaption: {caption}"
     return result_text, gallery
 with gr.Blocks(css=custom_css) as iface:
-    gr.HTML('<div id="main-app-area">')
     gr.HTML('<div id="app-title">🖼️ Image Captioning with Object Detection</div>')
     gr.HTML(
         '<div id="instructions">'
@@ -103,23 +99,20 @@ with gr.Blocks(css=custom_css) as iface:
         '📜 <i>Last 10 results are stored for you.</i>'
         '</div>'
     )
     image_input = gr.Image(type="pil", label="Upload Image")
     generate_btn = gr.Button("⭐ Generate Caption", elem_id="generate-btn")
     caption_output = gr.Textbox(label="📝 Caption and Detected Objects", lines=5, interactive=True)
     gallery = gr.Gallery(label="Last 10 Images and Captions", scale=3)
     def on_generate(image):
         if image is None:
             return "Please upload an image.", []
         return generate_caption(image)
     generate_btn.click(
         fn=on_generate,
         inputs=image_input,
         outputs=[caption_output, gallery]
     )
-    gr.HTML('</div>')  # end main-app-area
 if __name__ == "__main__":
     iface.launch()

 custom_css = """
+/* Center main content and lock max width to 900px, with responsive shrink */
+#main-app-area {
+    max-width: 900px;
+    margin-left: auto;
+    margin-right: auto;
+    padding: 0 16px;
+}
+/* Responsive for mobile (<950px) */
+@media (max-width: 950px) {
+    #main-app-area {
+        max-width: 99vw;
+        padding: 0 2vw;
+    }
+}
 #app-title {
     text-align: center;
     font-size: 38px;
     font-size: 19px;
     margin: 14px 0 22px 0;
 }
 #generate-btn {
     background: linear-gradient(90deg, #31b2fd 0%, #98f972 100%);
     color: white;
 }
 """
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from ultralytics import YOLO
+import torch
+import gradio as gr
+from PIL import Image
+from collections import deque
+import numpy as np
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+detect_model = YOLO('yolov5s.pt')
+MEMORY_SIZE = 10
+last_images = deque([], maxlen=MEMORY_SIZE)
+last_captions = deque([], maxlen=MEMORY_SIZE)
 def preprocess_image(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
     out = model.generate(**inputs, max_length=30, num_beams=5, early_stopping=True)
     caption = processor.decode(out[0], skip_special_tokens=True)
     detected_objs = detect_objects(image)
     last_images.append(image)
     last_captions.append(caption)
     tags = ", ".join(detected_objs) if detected_objs else "None"
     gallery = [(img, f"Detected objects: {tags}\nCaption: {caption}") for img, caption in zip(list(last_images), list(last_captions))]
     result_text = f"Detected objects: {tags}\nCaption: {caption}"
     return result_text, gallery
 with gr.Blocks(css=custom_css) as iface:
+    gr.HTML('<div id="main-app-area">')  # Start content region
     gr.HTML('<div id="app-title">🖼️ Image Captioning with Object Detection</div>')
     gr.HTML(
         '<div id="instructions">'
         '📜 <i>Last 10 results are stored for you.</i>'
         '</div>'
     )
     image_input = gr.Image(type="pil", label="Upload Image")
     generate_btn = gr.Button("⭐ Generate Caption", elem_id="generate-btn")
     caption_output = gr.Textbox(label="📝 Caption and Detected Objects", lines=5, interactive=True)
     gallery = gr.Gallery(label="Last 10 Images and Captions", scale=3)
     def on_generate(image):
         if image is None:
             return "Please upload an image.", []
         return generate_caption(image)
     generate_btn.click(
         fn=on_generate,
         inputs=image_input,
         outputs=[caption_output, gallery]
     )
+    gr.HTML('</div>')  # End content region
 if __name__ == "__main__":
     iface.launch()