import torch import gradio as gr from transformers import AutoProcessor, AutoModelForCausalLM model_id = "proteus-computer-use/omniparser-finetuned" processor = AutoProcessor.from_pretrained( "microsoft/Florence-2-base", trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, trust_remote_code=True ).to("cuda" if torch.cuda.is_available() else "cpu") def caption(image): inputs = processor(images=image, text="