omniparser / app.py
lljz66's picture
Update app.py
6e55ec2 verified
raw
history blame contribute delete
931 Bytes
import torch
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM
model_id = "proteus-computer-use/omniparser-finetuned"
processor = AutoProcessor.from_pretrained(
"microsoft/Florence-2-base",
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16,
trust_remote_code=True
).to("cuda" if torch.cuda.is_available() else "cpu")
def caption(image):
inputs = processor(images=image, text="<CAPTION>", return_tensors="pt").to(model.device)
outputs = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=20
)
result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
return result
demo = gr.Interface(
fn=caption,
inputs=gr.Image(type="pil"),
outputs="text",
title="OmniParser Icon Caption Model"
)
demo.launch()