Spaces:

scmlewis
/

image_captioning

Sleeping

App Files Files Community

scmlewis commited on Oct 20, 2025

Commit

bfd5ab7

verified ·

1 Parent(s): 1c27e36

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -65

app.py CHANGED Viewed

@@ -6,17 +6,72 @@ from PIL import Image
 from collections import deque
 import numpy as np
-# Load main BLIP model for English captioning
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-# Load YOLOv5 small model for object detection using ultralytics package
 detect_model = YOLO('yolov5s.pt')
-# Session memory for last 15 images and captions
 MEMORY_SIZE = 15
-last_images = deque([], maxlen=MEMORY_SIZE)
-last_captions = deque([], maxlen=MEMORY_SIZE)
 def preprocess_image(image):
     if image.mode != "RGB":
@@ -40,78 +95,61 @@ def generate_caption(image):
     out = model.generate(**inputs, max_length=30, num_beams=5, early_stopping=True)
     caption = processor.decode(out[0], skip_special_tokens=True)
     detected_objs = detect_objects(image)
-    # Update session memory
-    last_images.append(image)
-    last_captions.append(caption)
     tags = ", ".join(detected_objs) if detected_objs else "None"
-    gallery = [(img, cap) for img, cap in zip(list(last_images), list(last_captions))]
-    result_text = f"Detected objects: {tags}\nCaption: {caption}"
-    return result_text, gallery
-# Custom CSS styles
-custom_css = """
-#app-title {
-    text-align: center;
-    font-size: 36px;
-    color: #4DB8FF; /* Light blue header */
-    font-weight: bold;
-}
-#instructions {
-    text-align: center;
-    font-size: 18px;
-    /* Removed custom color for better theme contrast */
-}
-#generate-btn {
-    background: linear-gradient(90deg, #1E90FF, #32CD32); /* lake blue → light green */
-    color: white;
-    font-weight: bold;
-    border: none;
-    border-radius: 10px;
-    transition: 0.3s ease;
-}
-#generate-btn:hover {
-    box-shadow: 0 0 10px rgba(50,205,50,0.4);
-    transform: scale(1.05);
-}
-"""
 with gr.Blocks(css=custom_css) as iface:
-    # Centered header and description
-    gr.HTML('<h1 id="app-title">🖼️ Image Captioning with Object Detection</h1>')
     gr.HTML(
-        '<p id="instructions">👋 Welcome! This app detects objects in your image and generates a descriptive caption.<br>'
-        '🪄 <b>How to use:</b><br>'
-        '1️⃣ Upload an image below<br>'
-        '2️⃣ Click <b>⭐ Generate Caption</b> to start analysis<br>'
-        '3️⃣ View caption and detected objects instantly below.<br>'
-        '💡 The last <b>15 results</b> will be saved for your review!</p>'
     )
-    # Upload image and Generate button directly below
-    image_input = gr.Image(type="pil", label="Upload Image")
-    generate_btn = gr.Button("⭐ Generate Caption", elem_id="generate-btn")
-    # Result output
-    caption_output = gr.Textbox(label="📝 Caption and Detected Objects", lines=3, interactive=False)
-    # History gallery
-    gallery = gr.Gallery(label="Last 15 Images and Captions", scale=3)
     def on_generate(image):
         if image is None:
-            return "Please upload an image.", []
-        return generate_caption(image)
     generate_btn.click(
         fn=on_generate,
         inputs=image_input,
-        outputs=[caption_output, gallery]
     )
 if __name__ == "__main__":
     iface.launch()

 from collections import deque
 import numpy as np
+# Load BLIP model for English captioning
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 detect_model = YOLO('yolov5s.pt')
 MEMORY_SIZE = 15
+last_texts = deque([], maxlen=MEMORY_SIZE)
+custom_css = """
+#app-title {
+    text-align: center;
+    font-size: 38px;
+    color: #53c9fc;
+    font-weight: bold;
+    padding-top: 12px;
+}
+#instructions {
+    text-align: center;
+    font-size: 19px;
+    margin: 14px 0 22px 0;
+}
+#main-card {
+    max-width: 600px;
+    margin: auto;
+    background: #252933;
+    border-radius: 16px;
+    box-shadow: 0 5px 24px #0002;
+    padding: 28px 35px;
+}
+#generate-btn {
+    background: linear-gradient(90deg, #31b2fd 0%, #98f972 100%);
+    color: white;
+    font-size: 18px;
+    font-weight: bold;
+    border: none;
+    border-radius: 11px;
+    margin-top: 8px;
+    margin-bottom: 14px;
+    transition: 0.2s;
+}
+#generate-btn:hover {
+    filter: brightness(1.08);
+    box-shadow: 0 2px 16px #9efbc344;
+}
+.label-copyable {
+    font-size: 18px;
+    font-weight: bold;
+    color: #53c9fc;
+    margin-bottom: 4px;
+}
+.gr-table { /* helps tables stand out on dark bg */
+    background: #23262e !important;
+    border-radius: 10px !important;
+}
+.copy-btn-table {
+    background: #252c37;
+    color: #75e39e;
+    border: none;
+    border-radius: 7px;
+    padding: 5px 15px;
+    font-size: 15px;
+    font-weight: bold;
+    margin-left: 10px;
+    transition: background 0.2s;
+}
+"""
 def preprocess_image(image):
     if image.mode != "RGB":
     out = model.generate(**inputs, max_length=30, num_beams=5, early_stopping=True)
     caption = processor.decode(out[0], skip_special_tokens=True)
     detected_objs = detect_objects(image)
     tags = ", ".join(detected_objs) if detected_objs else "None"
+    combined_text = f"Detected objects: {tags}\nCaption: {caption}"
+    last_texts.append(combined_text)
+    return combined_text
+def build_history_table():
+    # Table: one row per caption text, with copy button in second column
+    headers = ["Past Outputs", "Action"]
+    data = []
+    for t in reversed(last_texts):  # latest on top
+        copy_btn = gr.Button("Copy", elem_classes="copy-btn-table")
+        data.append([t, copy_btn])
+    return headers, data
 with gr.Blocks(css=custom_css) as iface:
+    gr.HTML('<div id="app-title">🖼️ Image Captioning with Object Detection</div>')
     gr.HTML(
+        '<div id="instructions">'
+        '🙌 <b>Welcome!</b> Instantly analyze images using AI.<br>'
+        '1️⃣ <b>Upload</b> your image.<br>'
+        '2️⃣ Click <b>⭐ Generate Caption</b>.<br>'
+        '3️⃣ Copy and reuse your results below.<br>'
+        '📜 <i>Last 15 results are stored for you.</i>'
+        '</div>'
     )
+    with gr.Box(elem_id="main-card"):
+        image_input = gr.Image(type="pil", label="Upload Image")
+        generate_btn = gr.Button("⭐ Generate Caption", elem_id="generate-btn")
+        caption_output = gr.Textbox(label="📝 Caption and Detected Objects", lines=5, interactive=True, elem_classes="label-copyable")
+        history_table = gr.Dataframe(
+            headers=["Past Outputs", "Action"],
+            datatype=["str", "str"],
+            interactive=True,
+            row_count=(0, MEMORY_SIZE),
+            col_count=2,
+            wrap=True
+        )
     def on_generate(image):
         if image is None:
+            return "Please upload an image.", (["Past Outputs", "Action"], [])
+        combined = generate_caption(image)
+        headers, data = build_history_table()
+        return combined, (headers, [[row[0], "Copy"] for row in data])
+    def copy_output(text):
+        return gr.Textbox.update(value=text, interactive=True)
     generate_btn.click(
         fn=on_generate,
         inputs=image_input,
+        outputs=[caption_output, history_table]
     )
+    # The table cells are interactive; for a real "copy" button, use browser clipboard JS if needed
 if __name__ == "__main__":
     iface.launch()