File size: 2,219 Bytes
3b7dd4e 138c4d4 7d00133 138c4d4 3b7dd4e 138c4d4 3b7dd4e 138c4d4 7d00133 3b7dd4e 7d00133 3b7dd4e 138c4d4 7d00133 138c4d4 3b7dd4e 138c4d4 7d00133 138c4d4 3b7dd4e 138c4d4 3b7dd4e 7d00133 3b7dd4e 138c4d4 7d00133 138c4d4 3b7dd4e 138c4d4 3b7dd4e 138c4d4 3b7dd4e 7d00133 3b7dd4e 7d00133 3b7dd4e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | import gradio as gr
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
# =========================
# Load model (CPU optimized)
# =========================
model_id = "microsoft/GUI-Actor-Verifier-2B"
processor = AutoProcessor.from_pretrained(
model_id,
trust_remote_code=True
)
model = AutoModelForImageTextToText.from_pretrained(
model_id,
trust_remote_code=True,
torch_dtype=torch.float32, # CPU needs float32
device_map="cpu", # force CPU
low_cpu_mem_usage=True
)
model.eval()
# =========================
# Inference
# =========================
def run_model(image, prompt):
try:
if image is None:
return "❌ Please upload an image."
if not prompt or prompt.strip() == "":
prompt = "Describe this image."
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": prompt}
]
}
]
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
)
# Move tensors to CPU explicitly
inputs = {k: v.to("cpu") for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=50, # IMPORTANT: keep small for CPU
do_sample=False
)
result = processor.decode(
outputs[0][inputs["input_ids"].shape[-1]:],
skip_special_tokens=True
)
return result
except Exception as e:
return f"❌ Error: {str(e)}"
# =========================
# UI
# =========================
demo = gr.Interface(
fn=run_model,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Your Question")
],
outputs=gr.Textbox(label="Model Output"),
title="GUI Actor Verifier (CPU Mode)",
description="⚠️ Running on CPU — responses may be slow."
)
demo.launch() |