scmlewis commited on
Commit
8f847e7
ยท
verified ยท
1 Parent(s): f026d0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -39
app.py CHANGED
@@ -1,21 +1,18 @@
1
- from transformers import BlipProcessor, BlipForConditionalGeneration
2
- from ultralytics import YOLO
3
- import torch
4
- import gradio as gr
5
- from PIL import Image
6
- from collections import deque
7
- import numpy as np
8
-
9
- # Load BLIP and YOLOv5 models
10
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
12
- detect_model = YOLO('yolov5s.pt')
13
-
14
- MEMORY_SIZE = 10 # Now only 10 in history
15
- last_images = deque([], maxlen=MEMORY_SIZE)
16
- last_captions = deque([], maxlen=MEMORY_SIZE)
17
-
18
  custom_css = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  #app-title {
20
  text-align: center;
21
  font-size: 38px;
@@ -28,19 +25,6 @@ custom_css = """
28
  font-size: 19px;
29
  margin: 14px 0 22px 0;
30
  }
31
- /* Responsive + locked max-width */
32
- #main-app-area {
33
- max-width: 600px;
34
- margin-left: auto;
35
- margin-right: auto;
36
- padding: 0 8px;
37
- }
38
- @media (max-width: 700px) {
39
- #main-app-area {
40
- max-width: 98vw;
41
- padding: 0 2vw;
42
- }
43
- }
44
  #generate-btn {
45
  background: linear-gradient(90deg, #31b2fd 0%, #98f972 100%);
46
  color: white;
@@ -58,6 +42,22 @@ custom_css = """
58
  }
59
  """
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def preprocess_image(image):
62
  if image.mode != "RGB":
63
  image = image.convert("RGB")
@@ -80,19 +80,15 @@ def generate_caption(image):
80
  out = model.generate(**inputs, max_length=30, num_beams=5, early_stopping=True)
81
  caption = processor.decode(out[0], skip_special_tokens=True)
82
  detected_objs = detect_objects(image)
83
-
84
- # Update session memory
85
  last_images.append(image)
86
  last_captions.append(caption)
87
-
88
  tags = ", ".join(detected_objs) if detected_objs else "None"
89
  gallery = [(img, f"Detected objects: {tags}\nCaption: {caption}") for img, caption in zip(list(last_images), list(last_captions))]
90
-
91
  result_text = f"Detected objects: {tags}\nCaption: {caption}"
92
  return result_text, gallery
93
 
94
  with gr.Blocks(css=custom_css) as iface:
95
- gr.HTML('<div id="main-app-area">')
96
  gr.HTML('<div id="app-title">๐Ÿ–ผ๏ธ Image Captioning with Object Detection</div>')
97
  gr.HTML(
98
  '<div id="instructions">'
@@ -103,23 +99,20 @@ with gr.Blocks(css=custom_css) as iface:
103
  '๐Ÿ“œ <i>Last 10 results are stored for you.</i>'
104
  '</div>'
105
  )
106
-
107
  image_input = gr.Image(type="pil", label="Upload Image")
108
  generate_btn = gr.Button("โญ Generate Caption", elem_id="generate-btn")
109
  caption_output = gr.Textbox(label="๐Ÿ“ Caption and Detected Objects", lines=5, interactive=True)
110
  gallery = gr.Gallery(label="Last 10 Images and Captions", scale=3)
111
-
112
  def on_generate(image):
113
  if image is None:
114
  return "Please upload an image.", []
115
  return generate_caption(image)
116
-
117
  generate_btn.click(
118
  fn=on_generate,
119
  inputs=image_input,
120
  outputs=[caption_output, gallery]
121
  )
122
- gr.HTML('</div>') # end main-app-area
123
 
124
  if __name__ == "__main__":
125
  iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  custom_css = """
2
+ /* Center main content and lock max width to 900px, with responsive shrink */
3
+ #main-app-area {
4
+ max-width: 900px;
5
+ margin-left: auto;
6
+ margin-right: auto;
7
+ padding: 0 16px;
8
+ }
9
+ /* Responsive for mobile (<950px) */
10
+ @media (max-width: 950px) {
11
+ #main-app-area {
12
+ max-width: 99vw;
13
+ padding: 0 2vw;
14
+ }
15
+ }
16
  #app-title {
17
  text-align: center;
18
  font-size: 38px;
 
25
  font-size: 19px;
26
  margin: 14px 0 22px 0;
27
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  #generate-btn {
29
  background: linear-gradient(90deg, #31b2fd 0%, #98f972 100%);
30
  color: white;
 
42
  }
43
  """
44
 
45
+ from transformers import BlipProcessor, BlipForConditionalGeneration
46
+ from ultralytics import YOLO
47
+ import torch
48
+ import gradio as gr
49
+ from PIL import Image
50
+ from collections import deque
51
+ import numpy as np
52
+
53
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
54
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
55
+ detect_model = YOLO('yolov5s.pt')
56
+
57
+ MEMORY_SIZE = 10
58
+ last_images = deque([], maxlen=MEMORY_SIZE)
59
+ last_captions = deque([], maxlen=MEMORY_SIZE)
60
+
61
  def preprocess_image(image):
62
  if image.mode != "RGB":
63
  image = image.convert("RGB")
 
80
  out = model.generate(**inputs, max_length=30, num_beams=5, early_stopping=True)
81
  caption = processor.decode(out[0], skip_special_tokens=True)
82
  detected_objs = detect_objects(image)
 
 
83
  last_images.append(image)
84
  last_captions.append(caption)
 
85
  tags = ", ".join(detected_objs) if detected_objs else "None"
86
  gallery = [(img, f"Detected objects: {tags}\nCaption: {caption}") for img, caption in zip(list(last_images), list(last_captions))]
 
87
  result_text = f"Detected objects: {tags}\nCaption: {caption}"
88
  return result_text, gallery
89
 
90
  with gr.Blocks(css=custom_css) as iface:
91
+ gr.HTML('<div id="main-app-area">') # Start content region
92
  gr.HTML('<div id="app-title">๐Ÿ–ผ๏ธ Image Captioning with Object Detection</div>')
93
  gr.HTML(
94
  '<div id="instructions">'
 
99
  '๐Ÿ“œ <i>Last 10 results are stored for you.</i>'
100
  '</div>'
101
  )
 
102
  image_input = gr.Image(type="pil", label="Upload Image")
103
  generate_btn = gr.Button("โญ Generate Caption", elem_id="generate-btn")
104
  caption_output = gr.Textbox(label="๐Ÿ“ Caption and Detected Objects", lines=5, interactive=True)
105
  gallery = gr.Gallery(label="Last 10 Images and Captions", scale=3)
 
106
  def on_generate(image):
107
  if image is None:
108
  return "Please upload an image.", []
109
  return generate_caption(image)
 
110
  generate_btn.click(
111
  fn=on_generate,
112
  inputs=image_input,
113
  outputs=[caption_output, gallery]
114
  )
115
+ gr.HTML('</div>') # End content region
116
 
117
  if __name__ == "__main__":
118
  iface.launch()