Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -59,6 +59,29 @@ MODEL_REPO = "meettilavat/imagecaptioning"
|
|
| 59 |
SUBFOLDER_PREFIX = "outputs/blip2_full_ft_stage2"
|
| 60 |
LOCAL_DIR = Path(os.environ["HF_HOME"]) / "models" / "imagecaptioning"
|
| 61 |
DEFAULT_PROMPT = "Describe the image in detail."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
def _allow_patterns() -> Iterable[str]:
|
|
@@ -224,11 +247,20 @@ def update_beam_visibility(choice: str):
|
|
| 224 |
)
|
| 225 |
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
with gr.Blocks(title="BLIP-2 Image Captioning") as demo:
|
| 228 |
gr.Markdown("# BLIP-2 Image Captioning (H200 fine-tuned)")
|
| 229 |
gr.Markdown(
|
| 230 |
"Upload an image, tweak decoding settings, and optionally compare beam widths side by side."
|
| 231 |
)
|
|
|
|
| 232 |
|
| 233 |
with gr.Row():
|
| 234 |
with gr.Column(scale=6, min_width=320):
|
|
@@ -278,6 +310,14 @@ with gr.Blocks(title="BLIP-2 Image Captioning") as demo:
|
|
| 278 |
f"Running inference on {device.type.upper()} with dtype {dtype}. "
|
| 279 |
"Compare beams to balance diversity vs. precision."
|
| 280 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
beam_mode_input.change(
|
| 283 |
fn=update_beam_visibility,
|
|
@@ -285,7 +325,12 @@ with gr.Blocks(title="BLIP-2 Image Captioning") as demo:
|
|
| 285 |
outputs=[single_beam_slider, compare_beams_group],
|
| 286 |
)
|
| 287 |
|
| 288 |
-
run_button.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
fn=run_inference,
|
| 290 |
inputs=[
|
| 291 |
image_input,
|
|
@@ -298,6 +343,11 @@ with gr.Blocks(title="BLIP-2 Image Captioning") as demo:
|
|
| 298 |
outputs=caption_output,
|
| 299 |
api_name="generate",
|
| 300 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
|
| 303 |
if __name__ == "__main__":
|
|
|
|
| 59 |
SUBFOLDER_PREFIX = "outputs/blip2_full_ft_stage2"
|
| 60 |
LOCAL_DIR = Path(os.environ["HF_HOME"]) / "models" / "imagecaptioning"
|
| 61 |
DEFAULT_PROMPT = "Describe the image in detail."
|
| 62 |
+
SPINNER_CSS = """
|
| 63 |
+
<style>
|
| 64 |
+
#caption-spinner {
|
| 65 |
+
display: flex;
|
| 66 |
+
align-items: center;
|
| 67 |
+
gap: 0.5rem;
|
| 68 |
+
font-size: 0.95rem;
|
| 69 |
+
}
|
| 70 |
+
#caption-spinner .caption-spinner__loader {
|
| 71 |
+
width: 20px;
|
| 72 |
+
height: 20px;
|
| 73 |
+
border: 3px solid var(--neutral-400, rgba(0, 0, 0, 0.25));
|
| 74 |
+
border-top-color: var(--body-text-color, rgba(0, 0, 0, 0.75));
|
| 75 |
+
border-radius: 50%;
|
| 76 |
+
animation: caption-spin 0.75s linear infinite;
|
| 77 |
+
}
|
| 78 |
+
@keyframes caption-spin {
|
| 79 |
+
to {
|
| 80 |
+
transform: rotate(360deg);
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
</style>
|
| 84 |
+
"""
|
| 85 |
|
| 86 |
|
| 87 |
def _allow_patterns() -> Iterable[str]:
|
|
|
|
| 247 |
)
|
| 248 |
|
| 249 |
|
| 250 |
+
def show_spinner():
|
| 251 |
+
return gr.HTML.update(visible=True)
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def hide_spinner():
|
| 255 |
+
return gr.HTML.update(visible=False)
|
| 256 |
+
|
| 257 |
+
|
| 258 |
with gr.Blocks(title="BLIP-2 Image Captioning") as demo:
|
| 259 |
gr.Markdown("# BLIP-2 Image Captioning (H200 fine-tuned)")
|
| 260 |
gr.Markdown(
|
| 261 |
"Upload an image, tweak decoding settings, and optionally compare beam widths side by side."
|
| 262 |
)
|
| 263 |
+
gr.HTML(SPINNER_CSS)
|
| 264 |
|
| 265 |
with gr.Row():
|
| 266 |
with gr.Column(scale=6, min_width=320):
|
|
|
|
| 310 |
f"Running inference on {device.type.upper()} with dtype {dtype}. "
|
| 311 |
"Compare beams to balance diversity vs. precision."
|
| 312 |
)
|
| 313 |
+
spinner_display = gr.HTML(
|
| 314 |
+
value=(
|
| 315 |
+
'<div class="caption-spinner__loader" aria-hidden="true"></div>'
|
| 316 |
+
"<span role=\"status\">Generating caption...</span>"
|
| 317 |
+
),
|
| 318 |
+
visible=False,
|
| 319 |
+
elem_id="caption-spinner",
|
| 320 |
+
)
|
| 321 |
|
| 322 |
beam_mode_input.change(
|
| 323 |
fn=update_beam_visibility,
|
|
|
|
| 325 |
outputs=[single_beam_slider, compare_beams_group],
|
| 326 |
)
|
| 327 |
|
| 328 |
+
run_event = run_button.click(
|
| 329 |
+
fn=show_spinner,
|
| 330 |
+
outputs=spinner_display,
|
| 331 |
+
show_progress=False,
|
| 332 |
+
)
|
| 333 |
+
run_event = run_event.then(
|
| 334 |
fn=run_inference,
|
| 335 |
inputs=[
|
| 336 |
image_input,
|
|
|
|
| 343 |
outputs=caption_output,
|
| 344 |
api_name="generate",
|
| 345 |
)
|
| 346 |
+
run_event.then(
|
| 347 |
+
fn=hide_spinner,
|
| 348 |
+
outputs=spinner_display,
|
| 349 |
+
show_progress=False,
|
| 350 |
+
)
|
| 351 |
|
| 352 |
|
| 353 |
if __name__ == "__main__":
|