scmlewis commited on
Commit
bfd5ab7
·
verified ·
1 Parent(s): 1c27e36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -65
app.py CHANGED
@@ -6,17 +6,72 @@ from PIL import Image
6
  from collections import deque
7
  import numpy as np
8
 
9
- # Load main BLIP model for English captioning
10
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
12
-
13
- # Load YOLOv5 small model for object detection using ultralytics package
14
  detect_model = YOLO('yolov5s.pt')
15
 
16
- # Session memory for last 15 images and captions
17
  MEMORY_SIZE = 15
18
- last_images = deque([], maxlen=MEMORY_SIZE)
19
- last_captions = deque([], maxlen=MEMORY_SIZE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def preprocess_image(image):
22
  if image.mode != "RGB":
@@ -40,78 +95,61 @@ def generate_caption(image):
40
  out = model.generate(**inputs, max_length=30, num_beams=5, early_stopping=True)
41
  caption = processor.decode(out[0], skip_special_tokens=True)
42
  detected_objs = detect_objects(image)
43
-
44
- # Update session memory
45
- last_images.append(image)
46
- last_captions.append(caption)
47
-
48
  tags = ", ".join(detected_objs) if detected_objs else "None"
49
- gallery = [(img, cap) for img, cap in zip(list(last_images), list(last_captions))]
50
-
51
- result_text = f"Detected objects: {tags}\nCaption: {caption}"
52
- return result_text, gallery
53
-
54
- # Custom CSS styles
55
- custom_css = """
56
- #app-title {
57
- text-align: center;
58
- font-size: 36px;
59
- color: #4DB8FF; /* Light blue header */
60
- font-weight: bold;
61
- }
62
-
63
- #instructions {
64
- text-align: center;
65
- font-size: 18px;
66
- /* Removed custom color for better theme contrast */
67
- }
68
-
69
- #generate-btn {
70
- background: linear-gradient(90deg, #1E90FF, #32CD32); /* lake blue → light green */
71
- color: white;
72
- font-weight: bold;
73
- border: none;
74
- border-radius: 10px;
75
- transition: 0.3s ease;
76
- }
77
- #generate-btn:hover {
78
- box-shadow: 0 0 10px rgba(50,205,50,0.4);
79
- transform: scale(1.05);
80
- }
81
- """
82
 
83
  with gr.Blocks(css=custom_css) as iface:
84
- # Centered header and description
85
- gr.HTML('<h1 id="app-title">🖼️ Image Captioning with Object Detection</h1>')
86
  gr.HTML(
87
- '<p id="instructions">👋 Welcome! This app detects objects in your image and generates a descriptive caption.<br>'
88
- '🪄 <b>How to use:</b><br>'
89
- '1️⃣ Upload an image below<br>'
90
- '2️⃣ Click <b>⭐ Generate Caption</b> to start analysis<br>'
91
- '3️⃣ View caption and detected objects instantly below.<br>'
92
- '💡 The last <b>15 results</b> will be saved for your review!</p>'
 
93
  )
94
-
95
- # Upload image and Generate button directly below
96
- image_input = gr.Image(type="pil", label="Upload Image")
97
- generate_btn = gr.Button(" Generate Caption", elem_id="generate-btn")
98
-
99
- # Result output
100
- caption_output = gr.Textbox(label="📝 Caption and Detected Objects", lines=3, interactive=False)
101
-
102
- # History gallery
103
- gallery = gr.Gallery(label="Last 15 Images and Captions", scale=3)
 
 
104
 
105
  def on_generate(image):
106
  if image is None:
107
- return "Please upload an image.", []
108
- return generate_caption(image)
 
 
 
 
 
109
 
110
  generate_btn.click(
111
  fn=on_generate,
112
  inputs=image_input,
113
- outputs=[caption_output, gallery]
114
  )
115
 
 
 
116
  if __name__ == "__main__":
117
  iface.launch()
 
6
  from collections import deque
7
  import numpy as np
8
 
9
+ # Load BLIP model for English captioning
10
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
 
12
  detect_model = YOLO('yolov5s.pt')
13
 
 
14
  MEMORY_SIZE = 15
15
+ last_texts = deque([], maxlen=MEMORY_SIZE)
16
+
17
+ custom_css = """
18
+ #app-title {
19
+ text-align: center;
20
+ font-size: 38px;
21
+ color: #53c9fc;
22
+ font-weight: bold;
23
+ padding-top: 12px;
24
+ }
25
+ #instructions {
26
+ text-align: center;
27
+ font-size: 19px;
28
+ margin: 14px 0 22px 0;
29
+ }
30
+ #main-card {
31
+ max-width: 600px;
32
+ margin: auto;
33
+ background: #252933;
34
+ border-radius: 16px;
35
+ box-shadow: 0 5px 24px #0002;
36
+ padding: 28px 35px;
37
+ }
38
+ #generate-btn {
39
+ background: linear-gradient(90deg, #31b2fd 0%, #98f972 100%);
40
+ color: white;
41
+ font-size: 18px;
42
+ font-weight: bold;
43
+ border: none;
44
+ border-radius: 11px;
45
+ margin-top: 8px;
46
+ margin-bottom: 14px;
47
+ transition: 0.2s;
48
+ }
49
+ #generate-btn:hover {
50
+ filter: brightness(1.08);
51
+ box-shadow: 0 2px 16px #9efbc344;
52
+ }
53
+ .label-copyable {
54
+ font-size: 18px;
55
+ font-weight: bold;
56
+ color: #53c9fc;
57
+ margin-bottom: 4px;
58
+ }
59
+ .gr-table { /* helps tables stand out on dark bg */
60
+ background: #23262e !important;
61
+ border-radius: 10px !important;
62
+ }
63
+ .copy-btn-table {
64
+ background: #252c37;
65
+ color: #75e39e;
66
+ border: none;
67
+ border-radius: 7px;
68
+ padding: 5px 15px;
69
+ font-size: 15px;
70
+ font-weight: bold;
71
+ margin-left: 10px;
72
+ transition: background 0.2s;
73
+ }
74
+ """
75
 
76
  def preprocess_image(image):
77
  if image.mode != "RGB":
 
95
  out = model.generate(**inputs, max_length=30, num_beams=5, early_stopping=True)
96
  caption = processor.decode(out[0], skip_special_tokens=True)
97
  detected_objs = detect_objects(image)
 
 
 
 
 
98
  tags = ", ".join(detected_objs) if detected_objs else "None"
99
+ combined_text = f"Detected objects: {tags}\nCaption: {caption}"
100
+ last_texts.append(combined_text)
101
+ return combined_text
102
+
103
+ def build_history_table():
104
+ # Table: one row per caption text, with copy button in second column
105
+ headers = ["Past Outputs", "Action"]
106
+ data = []
107
+ for t in reversed(last_texts): # latest on top
108
+ copy_btn = gr.Button("Copy", elem_classes="copy-btn-table")
109
+ data.append([t, copy_btn])
110
+ return headers, data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  with gr.Blocks(css=custom_css) as iface:
113
+ gr.HTML('<div id="app-title">🖼️ Image Captioning with Object Detection</div>')
 
114
  gr.HTML(
115
+ '<div id="instructions">'
116
+ '🙌 <b>Welcome!</b> Instantly analyze images using AI.<br>'
117
+ '1️⃣ <b>Upload</b> your image.<br>'
118
+ '2️⃣ Click <b>⭐ Generate Caption</b>.<br>'
119
+ '3️⃣ Copy and reuse your results below.<br>'
120
+ '📜 <i>Last 15 results are stored for you.</i>'
121
+ '</div>'
122
  )
123
+ with gr.Box(elem_id="main-card"):
124
+ image_input = gr.Image(type="pil", label="Upload Image")
125
+ generate_btn = gr.Button("⭐ Generate Caption", elem_id="generate-btn")
126
+ caption_output = gr.Textbox(label="📝 Caption and Detected Objects", lines=5, interactive=True, elem_classes="label-copyable")
127
+ history_table = gr.Dataframe(
128
+ headers=["Past Outputs", "Action"],
129
+ datatype=["str", "str"],
130
+ interactive=True,
131
+ row_count=(0, MEMORY_SIZE),
132
+ col_count=2,
133
+ wrap=True
134
+ )
135
 
136
  def on_generate(image):
137
  if image is None:
138
+ return "Please upload an image.", (["Past Outputs", "Action"], [])
139
+ combined = generate_caption(image)
140
+ headers, data = build_history_table()
141
+ return combined, (headers, [[row[0], "Copy"] for row in data])
142
+
143
+ def copy_output(text):
144
+ return gr.Textbox.update(value=text, interactive=True)
145
 
146
  generate_btn.click(
147
  fn=on_generate,
148
  inputs=image_input,
149
+ outputs=[caption_output, history_table]
150
  )
151
 
152
+ # The table cells are interactive; for a real "copy" button, use browser clipboard JS if needed
153
+
154
  if __name__ == "__main__":
155
  iface.launch()