Spaces:
Runtime error
Runtime error
File size: 880 Bytes
3ff5fb9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
from transformers import VisionEncoderDecoderModel
from transformers import RobertaTokenizerFast
from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
import gradio as gr
MAX_LEN = 128
tokenizer = RobertaTokenizerFast.from_pretrained('Byte_tokenizer', max_len=MAX_LEN)
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
t = VisionEncoderDecoderModel.from_pretrained('Model')
title="Image to Caption Generator"
def predict(image):
image=image.convert("RGB")
caption=tokenizer.decode(t.generate(feature_extractor(image, return_tensors="pt").pixel_values)[0])
caption=caption.replace("<s>", "").replace("</s>", "")
return caption
demo = gr.Interface(
fn=predict,
inputs=gr.inputs.Image(type="pil"),
outputs=gr.outputs.Textbox(label="Generated Caption"),
title=title,
)
demo.launch()
|