blackmistcode's picture
Update app.py
c8f026d verified
Raw
History Blame Contribute Delete
1.93 kB
import spaces
import torch
import gradio as gr
from transformers import AutoProcessor, AutoModelForImageTextToText
MODEL_ID = "google/medgemma-1.5-4b-it"
processor = AutoProcessor.from_pretrained(MODEL_ID)
model = AutoModelForImageTextToText.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16).to("cuda")
UNUSED95_ID = processor.tokenizer.convert_tokens_to_ids('<unused95>')
EOT_ID = processor.tokenizer.convert_tokens_to_ids('<end_of_turn>')
def extract_response(output_ids, input_length):
ids = output_ids.tolist()
if UNUSED95_ID in ids:
idx = ids.index(UNUSED95_ID)
response_ids = ids[idx + 1:]
else:
response_ids = ids[input_length:]
return processor.decode(response_ids, skip_special_tokens=True).strip()
@spaces.GPU(duration=90)
def analyze(image1, image2, image3, image4, text_prompt):
images = [img for img in [image1, image2, image3, image4] if img is not None]
messages = [{"role": "user", "content": []}]
for img in images:
messages[0]["content"].append({"type": "image", "image": img})
messages[0]["content"].append({"type": "text", "text": text_prompt})
inputs = processor.apply_chat_template(
messages, add_generation_prompt=True, tokenize=True,
return_dict=True, return_tensors="pt"
).to(model.device, dtype=torch.bfloat16)
with torch.inference_mode():
output = model.generate(**inputs, max_new_tokens=2048, eos_token_id=EOT_ID)
return extract_response(output[0], inputs['input_ids'].shape[1])
demo = gr.Interface(
fn=analyze,
inputs=[
gr.Image(type="pil", label="Image 1 (optional)"),
gr.Image(type="pil", label="Image 2 (optional)"),
gr.Image(type="pil", label="Image 3 (optional)"),
gr.Image(type="pil", label="Image 4 (optional)"),
gr.Textbox(label="Prompt"),
],
outputs=gr.Textbox(label="Response"),
title="MedGemma 1.5 4B"
)
demo.launch()