Spaces:
Running on Zero
Running on Zero
| import spaces | |
| import torch | |
| import gradio as gr | |
| from transformers import AutoProcessor, AutoModelForImageTextToText | |
| MODEL_ID = "google/medgemma-1.5-4b-it" | |
| processor = AutoProcessor.from_pretrained(MODEL_ID) | |
| model = AutoModelForImageTextToText.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16).to("cuda") | |
| UNUSED95_ID = processor.tokenizer.convert_tokens_to_ids('<unused95>') | |
| EOT_ID = processor.tokenizer.convert_tokens_to_ids('<end_of_turn>') | |
| def extract_response(output_ids, input_length): | |
| ids = output_ids.tolist() | |
| if UNUSED95_ID in ids: | |
| idx = ids.index(UNUSED95_ID) | |
| response_ids = ids[idx + 1:] | |
| else: | |
| response_ids = ids[input_length:] | |
| return processor.decode(response_ids, skip_special_tokens=True).strip() | |
| def analyze(image1, image2, image3, image4, text_prompt): | |
| images = [img for img in [image1, image2, image3, image4] if img is not None] | |
| messages = [{"role": "user", "content": []}] | |
| for img in images: | |
| messages[0]["content"].append({"type": "image", "image": img}) | |
| messages[0]["content"].append({"type": "text", "text": text_prompt}) | |
| inputs = processor.apply_chat_template( | |
| messages, add_generation_prompt=True, tokenize=True, | |
| return_dict=True, return_tensors="pt" | |
| ).to(model.device, dtype=torch.bfloat16) | |
| with torch.inference_mode(): | |
| output = model.generate(**inputs, max_new_tokens=2048, eos_token_id=EOT_ID) | |
| return extract_response(output[0], inputs['input_ids'].shape[1]) | |
| demo = gr.Interface( | |
| fn=analyze, | |
| inputs=[ | |
| gr.Image(type="pil", label="Image 1 (optional)"), | |
| gr.Image(type="pil", label="Image 2 (optional)"), | |
| gr.Image(type="pil", label="Image 3 (optional)"), | |
| gr.Image(type="pil", label="Image 4 (optional)"), | |
| gr.Textbox(label="Prompt"), | |
| ], | |
| outputs=gr.Textbox(label="Response"), | |
| title="MedGemma 1.5 4B" | |
| ) | |
| demo.launch() |