MultimodalCXray / app.py
eduardofarina's picture
Update app.py
e343cd1 verified
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
import torch
from PIL import Image
import gradio as gr
# Carrega modelo e componentes
model = VisionEncoderDecoderModel.from_pretrained("eduardofarina/MultimodalXray")
feature_extractor = ViTImageProcessor.from_pretrained("eduardofarina/MultimodalXray")
tokenizer = AutoTokenizer.from_pretrained("eduardofarina/MultimodalXray")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Função de predição
def predict(image):
if image is None:
return "No image provided."
# Preprocessa
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)
# Gera texto
output_ids = model.generate(pixel_values, max_new_tokens=500)
preds = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return preds.strip()
# Interface Gradio
input_image = gr.Image(label="Upload any Chest Xray", type='pil')
output_text = gr.Textbox(label="Preliminary Radiology Report")
interface = gr.Interface(fn=predict,
inputs=input_image,
outputs=output_text,
title="X-Ray Report Generation",
description="The examples are cases from Radiopaedia",
examples=["example_1.jpeg", "example_2.jpeg"])
interface.launch(debug=True)