from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer import torch from PIL import Image import gradio as gr # Carrega modelo e componentes model = VisionEncoderDecoderModel.from_pretrained("eduardofarina/MultimodalXray") feature_extractor = ViTImageProcessor.from_pretrained("eduardofarina/MultimodalXray") tokenizer = AutoTokenizer.from_pretrained("eduardofarina/MultimodalXray") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Função de predição def predict(image): if image is None: return "No image provided." # Preprocessa pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device) # Gera texto output_ids = model.generate(pixel_values, max_new_tokens=500) preds = tokenizer.decode(output_ids[0], skip_special_tokens=True) return preds.strip() # Interface Gradio input_image = gr.Image(label="Upload any Chest Xray", type='pil') output_text = gr.Textbox(label="Preliminary Radiology Report") interface = gr.Interface(fn=predict, inputs=input_image, outputs=output_text, title="X-Ray Report Generation", description="The examples are cases from Radiopaedia", examples=["example_1.jpeg", "example_2.jpeg"]) interface.launch(debug=True)