scmlewis commited on
Commit
7223770
·
verified ·
1 Parent(s): 9517d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -62
app.py CHANGED
@@ -13,7 +13,6 @@ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-capt
13
  # Load YOLOv5 small model for detection
14
  detect_model = YOLO('yolov5s.pt')
15
 
16
- # MarianMT translation models cache
17
  translation_models = {
18
  "English": None,
19
  "French": ("Helsinki-NLP/opus-mt-en-fr", "Helsinki-NLP/opus-mt-fr-en"),
@@ -45,6 +44,7 @@ def translate_caption(caption, target_lang):
45
  MEMORY_SIZE = 15
46
  last_images = deque([], maxlen=MEMORY_SIZE)
47
  last_captions = deque([], maxlen=MEMORY_SIZE)
 
48
  last_languages = deque([], maxlen=MEMORY_SIZE)
49
 
50
  def preprocess_image(image):
@@ -74,40 +74,50 @@ def generate_caption(image, language):
74
  # Update session memory
75
  last_images.append(image)
76
  last_captions.append(caption_translated)
 
77
  last_languages.append(language)
78
 
79
  tags = ", ".join(detected_objs) if detected_objs else "None"
80
- result_text = f"Detected objects: {tags}\nCaption ({language}): {caption_translated}"
81
-
82
- # Prepare table data for last 15 images with copyable captions and copy buttons
83
- history_rows = []
84
- for img, cap, lang in zip(last_images, last_captions, last_languages):
85
- history_rows.append([img, cap])
86
-
87
- return result_text, history_rows
88
-
89
- def gallery_to_table(history_rows):
90
- # history_rows is list of [PIL image, caption text]
91
- headers = ["Image", "Caption (click to copy)"]
92
- data = []
93
- for img, cap in history_rows:
94
- data.append([
95
- img,
96
- gr.Textbox.update(value=cap, interactive=True)
97
- ])
98
- return headers, data
99
 
100
- with gr.Blocks() as iface:
101
- gr.Markdown("# Image Captioning with Object Detection & Multilingual Support")
 
 
 
 
 
 
 
 
102
 
103
- gr.Markdown("""
 
104
 
105
- This app generates descriptive captions for your uploaded images, detects objects within them,
106
- and supports multilingual captions. Upload an image, then click 'Generate Caption' to see results.
107
 
108
- Your last 15 images and captions are saved below for easy reference and copying.
109
 
110
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  language = gr.Dropdown(
113
  label="Select Caption Language",
@@ -120,52 +130,29 @@ with gr.Blocks() as iface:
120
  image_input = gr.Image(type="pil", label="Upload Image")
121
  generate_btn = gr.Button("Generate Caption")
122
  with gr.Column(scale=3):
123
- caption_output = gr.Textbox(
124
- label="Caption & Detected Objects",
125
- lines=4,
126
- interactive=True
127
- )
128
  copy_btn = gr.Button("Copy Caption Text")
129
 
130
- # History table with thumbnails and copyable captions
131
- history_table = gr.Dataframe(
132
- headers=["Image", "Caption"],
133
- row_count=(MEMORY_SIZE, MEMORY_SIZE),
134
- col_count=2,
135
- datatype=["image", "str"],
136
- interactive=False,
137
- wrap=True,
138
- label="Last 15 Images and Captions"
139
- )
140
-
141
- def copy_text(caption_text):
142
- return gr.update(value=caption_text)
143
-
144
- def update_history(history_rows):
145
- # Convert to format compatible with gr.Dataframe
146
- data = []
147
- for img, cap in history_rows:
148
- data.append([img, cap])
149
- return data
150
 
151
  def on_generate(image, language):
152
  if image is None:
153
- return "Please upload an image.", []
154
- result_text, history_rows = generate_caption(image, language)
155
- history_data = update_history(history_rows)
156
- return result_text, history_data
 
 
 
157
 
158
  generate_btn.click(
159
  fn=on_generate,
160
  inputs=[image_input, language],
161
- outputs=[caption_output, history_table]
162
  )
163
 
164
- copy_btn.click(
165
- fn=lambda text: text,
166
- inputs=[caption_output],
167
- outputs=[caption_output]
168
- )
169
 
170
  if __name__ == "__main__":
171
  iface.launch()
 
13
  # Load YOLOv5 small model for detection
14
  detect_model = YOLO('yolov5s.pt')
15
 
 
16
  translation_models = {
17
  "English": None,
18
  "French": ("Helsinki-NLP/opus-mt-en-fr", "Helsinki-NLP/opus-mt-fr-en"),
 
44
  MEMORY_SIZE = 15
45
  last_images = deque([], maxlen=MEMORY_SIZE)
46
  last_captions = deque([], maxlen=MEMORY_SIZE)
47
+ last_objects = deque([], maxlen=MEMORY_SIZE)
48
  last_languages = deque([], maxlen=MEMORY_SIZE)
49
 
50
  def preprocess_image(image):
 
74
  # Update session memory
75
  last_images.append(image)
76
  last_captions.append(caption_translated)
77
+ last_objects.append(detected_objs)
78
  last_languages.append(language)
79
 
80
  tags = ", ".join(detected_objs) if detected_objs else "None"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ return caption_translated, tags
83
+
84
+ def build_history_ui():
85
+ # Create a list of columns with image, caption textbox and copy button
86
+ components = []
87
+ for i in range(len(last_images)):
88
+ img = last_images[i]
89
+ cap = last_captions[i]
90
+ obj = last_objects[i]
91
+ lang = last_languages[i]
92
 
93
+ copy_btn = gr.Button("Copy Caption")
94
+ cap_box = gr.Textbox(value=cap, lines=2, interactive=True)
95
 
96
+ def copy_fn(caption):
97
+ return caption # No direct clipboard access, but textbox selectable
98
 
99
+ copy_btn.click(fn=copy_fn, inputs=cap_box, outputs=cap_box)
100
 
101
+ components.append(gr.Row([
102
+ gr.Image(value=img, interactive=False, label=f"Image {i+1}", elem_id=f"img_{i}"),
103
+ gr.Column([
104
+ gr.Markdown(f"**Caption ({lang}):**"),
105
+ cap_box,
106
+ copy_btn,
107
+ gr.Markdown(f"**Detected Objects:** {', '.join(obj) if obj else 'None'}")
108
+ ])
109
+ ]))
110
+ return components
111
+
112
+ with gr.Blocks() as iface:
113
+ gr.Markdown("# Image Captioning with Object Detection & Multilingual Support")
114
+ gr.Markdown(
115
+ """
116
+ Upload an image, select the language for captions, then click 'Generate Caption'.
117
+ The app generates a descriptive caption along with detected object tags.
118
+ Your last 15 images and captions are displayed below for easy reference and copying.
119
+ """
120
+ )
121
 
122
  language = gr.Dropdown(
123
  label="Select Caption Language",
 
130
  image_input = gr.Image(type="pil", label="Upload Image")
131
  generate_btn = gr.Button("Generate Caption")
132
  with gr.Column(scale=3):
133
+ caption_output = gr.Textbox(label="Caption", lines=3, interactive=True)
134
+ object_output = gr.Textbox(label="Detected Objects", lines=2, interactive=False)
 
 
 
135
  copy_btn = gr.Button("Copy Caption Text")
136
 
137
+ history_container = gr.Column(label="Last 15 Images & Captions", elem_classes="history-container")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  def on_generate(image, language):
140
  if image is None:
141
+ return "Please upload an image.", "", []
142
+ caption, objects = generate_caption(image, language)
143
+ # Rebuild history display on every generation
144
+ return caption, objects, build_history_ui()
145
+
146
+ def copy_text(text):
147
+ return gr.Textbox.update(value=text, interactive=True)
148
 
149
  generate_btn.click(
150
  fn=on_generate,
151
  inputs=[image_input, language],
152
+ outputs=[caption_output, object_output, history_container]
153
  )
154
 
155
+ copy_btn.click(fn=copy_text, inputs=caption_output, outputs=caption_output)
 
 
 
 
156
 
157
  if __name__ == "__main__":
158
  iface.launch()