scmlewis commited on
Commit
3fdf4eb
·
verified ·
1 Parent(s): 1f6a8b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -6,13 +6,14 @@ from PIL import Image
6
  from collections import deque
7
  import numpy as np
8
 
9
- # Load BLIP model
10
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
12
 
13
- # Load YOLOv5 model
14
  detect_model = YOLO('yolov5s.pt')
15
 
 
16
  translation_models = {
17
  "English": None,
18
  "French": ("Helsinki-NLP/opus-mt-en-fr", "Helsinki-NLP/opus-mt-fr-en"),
@@ -71,6 +72,7 @@ def generate_caption(image, language):
71
  caption_translated = translate_caption(caption_en, language)
72
  detected_objs = detect_objects(image)
73
 
 
74
  last_images.append(image)
75
  last_captions.append(caption_translated)
76
  last_objects.append(detected_objs)
@@ -81,7 +83,8 @@ def generate_caption(image, language):
81
  return caption_translated, tags
82
 
83
  def build_history_ui():
84
- components = []
 
85
  for i in range(len(last_images)):
86
  img = last_images[i]
87
  cap = last_captions[i]
@@ -95,10 +98,11 @@ def build_history_ui():
95
  def copy_fn(caption):
96
  return caption
97
 
 
98
  copy_btn.click(fn=copy_fn, inputs=cap_box, outputs=cap_box)
99
 
100
  row = gr.Row([
101
- gr.Image(value=img, interactive=False, show_label=False),
102
  gr.Column([
103
  gr.Markdown(f"**Caption ({lang}):**"),
104
  cap_box,
@@ -106,9 +110,8 @@ def build_history_ui():
106
  gr.Markdown(f"**Detected Objects:** {', '.join(obj) if obj else 'None'}")
107
  ])
108
  ])
109
- components.append(row)
110
-
111
- return components
112
 
113
  with gr.Blocks() as iface:
114
  gr.Markdown("# Image Captioning with Object Detection & Multilingual Support")
 
6
  from collections import deque
7
  import numpy as np
8
 
9
+ # Load BLIP model for English captioning
10
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
12
 
13
+ # Load YOLOv5 small model for detection
14
  detect_model = YOLO('yolov5s.pt')
15
 
16
+ # Setup MarianMT translation models cache for multilingual captions
17
  translation_models = {
18
  "English": None,
19
  "French": ("Helsinki-NLP/opus-mt-en-fr", "Helsinki-NLP/opus-mt-fr-en"),
 
72
  caption_translated = translate_caption(caption_en, language)
73
  detected_objs = detect_objects(image)
74
 
75
+ # Update session memory
76
  last_images.append(image)
77
  last_captions.append(caption_translated)
78
  last_objects.append(detected_objs)
 
83
  return caption_translated, tags
84
 
85
  def build_history_ui():
86
+ # Build list of Gradio Rows containing image, caption textbox and copy button
87
+ rows = []
88
  for i in range(len(last_images)):
89
  img = last_images[i]
90
  cap = last_captions[i]
 
98
  def copy_fn(caption):
99
  return caption
100
 
101
+ # Bind copy button inside lambda to close over correct caption_box
102
  copy_btn.click(fn=copy_fn, inputs=cap_box, outputs=cap_box)
103
 
104
  row = gr.Row([
105
+ gr.Image(value=img, interactive=False, show_label=False, elem_id=f"history_img_{i}"),
106
  gr.Column([
107
  gr.Markdown(f"**Caption ({lang}):**"),
108
  cap_box,
 
110
  gr.Markdown(f"**Detected Objects:** {', '.join(obj) if obj else 'None'}")
111
  ])
112
  ])
113
+ rows.append(row)
114
+ return rows
 
115
 
116
  with gr.Blocks() as iface:
117
  gr.Markdown("# Image Captioning with Object Detection & Multilingual Support")