Spaces:
Runtime error
Runtime error
chats-bug
commited on
Commit
·
7295a68
1
Parent(s):
fbee9c4
More testing
Browse files
app.py
CHANGED
|
@@ -2,6 +2,8 @@ import gradio as gr
|
|
| 2 |
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig
|
| 3 |
import torch
|
| 4 |
import open_clip
|
|
|
|
|
|
|
| 5 |
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
|
|
@@ -60,13 +62,13 @@ def generate_caption(
|
|
| 60 |
The generated caption.
|
| 61 |
"""
|
| 62 |
inputs = preprocessor(image, return_tensors="pt").to(device)
|
|
|
|
| 63 |
|
| 64 |
if use_float_16:
|
| 65 |
inputs = inputs.to(torch.float16)
|
| 66 |
|
| 67 |
generated_ids = model.generate(
|
| 68 |
-
pixel_values=
|
| 69 |
-
# attention_mask=inputs.attention_mask,
|
| 70 |
max_length=64,
|
| 71 |
)
|
| 72 |
|
|
@@ -113,7 +115,8 @@ def generate_captions_clip(
|
|
| 113 |
def generate_captions(
|
| 114 |
image,
|
| 115 |
max_length,
|
| 116 |
-
temperature
|
|
|
|
| 117 |
):
|
| 118 |
"""
|
| 119 |
Generate captions for the given image.
|
|
@@ -133,6 +136,10 @@ def generate_captions(
|
|
| 133 |
caption_git_large_coco = ""
|
| 134 |
caption_oc_coca = ""
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
# Generate captions for the image using the Blip base model
|
| 137 |
try:
|
| 138 |
caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
|
|
@@ -168,6 +175,7 @@ iface = gr.Interface(
|
|
| 168 |
gr.inputs.Image(label="Image"),
|
| 169 |
gr.inputs.Slider(minimum=16, maximum=64, step=2, default=32, label="Max Length"),
|
| 170 |
gr.inputs.Slider(minimum=0.5, maximum=1.5, step=0.1, default=1.0, label="Temperature"),
|
|
|
|
| 171 |
],
|
| 172 |
# Define the outputs
|
| 173 |
outputs=[
|
|
@@ -182,4 +190,4 @@ iface = gr.Interface(
|
|
| 182 |
)
|
| 183 |
|
| 184 |
# Launch the interface
|
| 185 |
-
iface.launch()
|
|
|
|
| 2 |
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig
|
| 3 |
import torch
|
| 4 |
import open_clip
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import requests
|
| 7 |
|
| 8 |
from huggingface_hub import hf_hub_download
|
| 9 |
|
|
|
|
| 62 |
The generated caption.
|
| 63 |
"""
|
| 64 |
inputs = preprocessor(image, return_tensors="pt").to(device)
|
| 65 |
+
pixel_values = preprocessor(images=image, return_tensors="pt").pixel_values
|
| 66 |
|
| 67 |
if use_float_16:
|
| 68 |
inputs = inputs.to(torch.float16)
|
| 69 |
|
| 70 |
generated_ids = model.generate(
|
| 71 |
+
pixel_values=pixel_values,
|
|
|
|
| 72 |
max_length=64,
|
| 73 |
)
|
| 74 |
|
|
|
|
| 115 |
def generate_captions(
|
| 116 |
image,
|
| 117 |
max_length,
|
| 118 |
+
temperature,
|
| 119 |
+
use_sample_image,
|
| 120 |
):
|
| 121 |
"""
|
| 122 |
Generate captions for the given image.
|
|
|
|
| 136 |
caption_git_large_coco = ""
|
| 137 |
caption_oc_coca = ""
|
| 138 |
|
| 139 |
+
if use_sample_image:
|
| 140 |
+
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
| 141 |
+
image = Image.open(requests.get(url, stream=True).raw)
|
| 142 |
+
|
| 143 |
# Generate captions for the image using the Blip base model
|
| 144 |
try:
|
| 145 |
caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
|
|
|
|
| 175 |
gr.inputs.Image(label="Image"),
|
| 176 |
gr.inputs.Slider(minimum=16, maximum=64, step=2, default=32, label="Max Length"),
|
| 177 |
gr.inputs.Slider(minimum=0.5, maximum=1.5, step=0.1, default=1.0, label="Temperature"),
|
| 178 |
+
gr.inputs.Checkbox(default=False, type="bool", label="Use example image")
|
| 179 |
],
|
| 180 |
# Define the outputs
|
| 181 |
outputs=[
|
|
|
|
| 190 |
)
|
| 191 |
|
| 192 |
# Launch the interface
|
| 193 |
+
iface.launch(debug=True)
|