scmlewis commited on
Commit
1f6a8b2
·
verified ·
1 Parent(s): 7223770

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -6,11 +6,11 @@ from PIL import Image
6
  from collections import deque
7
  import numpy as np
8
 
9
- # Load BLIP model for English captioning
10
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
12
 
13
- # Load YOLOv5 small model for detection
14
  detect_model = YOLO('yolov5s.pt')
15
 
16
  translation_models = {
@@ -71,7 +71,6 @@ def generate_caption(image, language):
71
  caption_translated = translate_caption(caption_en, language)
72
  detected_objs = detect_objects(image)
73
 
74
- # Update session memory
75
  last_images.append(image)
76
  last_captions.append(caption_translated)
77
  last_objects.append(detected_objs)
@@ -82,7 +81,6 @@ def generate_caption(image, language):
82
  return caption_translated, tags
83
 
84
  def build_history_ui():
85
- # Create a list of columns with image, caption textbox and copy button
86
  components = []
87
  for i in range(len(last_images)):
88
  img = last_images[i]
@@ -90,32 +88,35 @@ def build_history_ui():
90
  obj = last_objects[i]
91
  lang = last_languages[i]
92
 
 
 
93
  copy_btn = gr.Button("Copy Caption")
94
- cap_box = gr.Textbox(value=cap, lines=2, interactive=True)
95
 
96
  def copy_fn(caption):
97
- return caption # No direct clipboard access, but textbox selectable
98
 
99
  copy_btn.click(fn=copy_fn, inputs=cap_box, outputs=cap_box)
100
 
101
- components.append(gr.Row([
102
- gr.Image(value=img, interactive=False, label=f"Image {i+1}", elem_id=f"img_{i}"),
103
  gr.Column([
104
  gr.Markdown(f"**Caption ({lang}):**"),
105
  cap_box,
106
  copy_btn,
107
  gr.Markdown(f"**Detected Objects:** {', '.join(obj) if obj else 'None'}")
108
  ])
109
- ]))
 
 
110
  return components
111
 
112
  with gr.Blocks() as iface:
113
  gr.Markdown("# Image Captioning with Object Detection & Multilingual Support")
114
  gr.Markdown(
115
  """
116
- Upload an image, select the language for captions, then click 'Generate Caption'.
117
- The app generates a descriptive caption along with detected object tags.
118
- Your last 15 images and captions are displayed below for easy reference and copying.
119
  """
120
  )
121
 
@@ -134,14 +135,14 @@ with gr.Blocks() as iface:
134
  object_output = gr.Textbox(label="Detected Objects", lines=2, interactive=False)
135
  copy_btn = gr.Button("Copy Caption Text")
136
 
137
- history_container = gr.Column(label="Last 15 Images & Captions", elem_classes="history-container")
138
 
139
  def on_generate(image, language):
140
  if image is None:
141
  return "Please upload an image.", "", []
142
  caption, objects = generate_caption(image, language)
143
- # Rebuild history display on every generation
144
- return caption, objects, build_history_ui()
145
 
146
  def copy_text(text):
147
  return gr.Textbox.update(value=text, interactive=True)
 
6
  from collections import deque
7
  import numpy as np
8
 
9
+ # Load BLIP model
10
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
12
 
13
+ # Load YOLOv5 model
14
  detect_model = YOLO('yolov5s.pt')
15
 
16
  translation_models = {
 
71
  caption_translated = translate_caption(caption_en, language)
72
  detected_objs = detect_objects(image)
73
 
 
74
  last_images.append(image)
75
  last_captions.append(caption_translated)
76
  last_objects.append(detected_objs)
 
81
  return caption_translated, tags
82
 
83
  def build_history_ui():
 
84
  components = []
85
  for i in range(len(last_images)):
86
  img = last_images[i]
 
88
  obj = last_objects[i]
89
  lang = last_languages[i]
90
 
91
+ cap_box = gr.Textbox(value=cap, lines=2, interactive=True, show_label=False)
92
+
93
  copy_btn = gr.Button("Copy Caption")
 
94
 
95
  def copy_fn(caption):
96
+ return caption
97
 
98
  copy_btn.click(fn=copy_fn, inputs=cap_box, outputs=cap_box)
99
 
100
+ row = gr.Row([
101
+ gr.Image(value=img, interactive=False, show_label=False),
102
  gr.Column([
103
  gr.Markdown(f"**Caption ({lang}):**"),
104
  cap_box,
105
  copy_btn,
106
  gr.Markdown(f"**Detected Objects:** {', '.join(obj) if obj else 'None'}")
107
  ])
108
+ ])
109
+ components.append(row)
110
+
111
  return components
112
 
113
  with gr.Blocks() as iface:
114
  gr.Markdown("# Image Captioning with Object Detection & Multilingual Support")
115
  gr.Markdown(
116
  """
117
+ Upload an image, select the caption language, then click 'Generate Caption'.
118
+ The app generates a caption and detected object tags.
119
+ Your last 15 images and captions are displayed below for easy copying and reference.
120
  """
121
  )
122
 
 
135
  object_output = gr.Textbox(label="Detected Objects", lines=2, interactive=False)
136
  copy_btn = gr.Button("Copy Caption Text")
137
 
138
+ history_container = gr.Column()
139
 
140
  def on_generate(image, language):
141
  if image is None:
142
  return "Please upload an image.", "", []
143
  caption, objects = generate_caption(image, language)
144
+ history = build_history_ui()
145
+ return caption, objects, history
146
 
147
  def copy_text(text):
148
  return gr.Textbox.update(value=text, interactive=True)