Spaces:

hackergeek
/

RADIOCAP200

Runtime error

App Files Files Community

hackergeek commited on Feb 1

Commit

c6845ce

verified ·

1 Parent(s): 8bbc742

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -61

app.py CHANGED Viewed

@@ -3,82 +3,68 @@ from torch import nn
 from torchvision import transforms
 from PIL import Image
 import gradio as gr
-from transformers import AutoTokenizer
-# ===========================
-# تنظیمات دستگاه و dtype
-# ===========================
-DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
-# ===========================
-# مسیر مدل و tokenizer
-# ===========================
-CHECKPOINT_PATH = "checkpoints/epoch_04/model.pt"  # مسیر دانلود شده در Space
-TOKENIZER_NAME = "bert-base-uncased"  # یا مدل tokenizer مناسب شما
-tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
-# ===========================
-# تعریف مدل (مثال ساده)
-# ===========================
-# توجه: مدل واقعی خودت را اینجا قرار بده
-class DummyCaptionModel(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.dummy = nn.Linear(10, 10)
-    def forward(self, x, question=None):
-        # خروجی فرضی
-        if question:
-            return "Answer to question: " + question
-        return "Generated caption for the image"
-model = DummyCaptionModel()
-if torch.cuda.is_available():
-    model.load_state_dict(torch.load(CHECKPOINT_PATH, map_location=DEVICE))
 model.to(DEVICE)
 model.eval()
-# ===========================
-# Transform تصویر
-# ===========================
 transform = transforms.Compose([
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
-    # transforms.Normalize(mean=[0.485, 0.456, 0.406],
-    #                      std=[0.229, 0.224, 0.225])
 ])
-# ===========================
-# تابع بارگذاری تصویر
-# ===========================
-def load_image(img: Image.Image):
-    """تبدیل PIL image به Tensor"""
-    return transform(img).unsqueeze(0).to(DEVICE, dtype=DTYPE)
-# ===========================
-# تابع اصلی پیش‌بینی
-# ===========================
-def predict(img: Image.Image, question: str = ""):
     img_tensor = load_image(img)
-    # اگر سوال خالی بود کپشن تولید کن، وگرنه VQA
-    output_text = model(img_tensor, question.strip() or None)
-    return output_text
-# ===========================
-# Interface گریدیو
-# ===========================
 iface = gr.Interface(
     fn=predict,
-    inputs=[
-        gr.Image(type="pil", label="Upload Radiology Image"),
-        gr.Textbox(label="Optional Question (for VQA)", placeholder="Ask a question or leave empty for caption")
-    ],
-    outputs=gr.Textbox(label="Output"),
-    title="RADIOCAP200: Radiology Caption + VQA",
-    description="Upload a radiology image and optionally ask a question. If the question is empty, model generates a caption. Otherwise, it answers the question."
 )
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860, share=True)

 from torchvision import transforms
 from PIL import Image
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# =======================
+# تنظیمات
+# =======================
+MODEL_NAME = "erfanasghariyan/RADIOCAP200"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DTYPE = torch.float32  # یا torch.bfloat16 اگر مدل bf16 است
+# =======================
+# بارگذاری مدل و توکنایزر
+# =======================
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
 model.to(DEVICE)
 model.eval()
+# =======================
+# ترنسفورم تصویر
+# =======================
 transform = transforms.Compose([
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
+    transforms.Normalize([0.485, 0.456, 0.406],
+                         [0.229, 0.224, 0.225])
 ])
+# =======================
+# تابع پردازش تصویر
+# =======================
+def load_image(img):
+    # اگر ورودی مسیر فایل بود، با PIL باز کن
+    if isinstance(img, str):
+        img = Image.open(img).convert("RGB")
+    elif isinstance(img, Image.Image):
+        img = img.convert("RGB")
+    else:
+        raise TypeError(f"Unexpected type {type(img)}")
+    img_tensor = transform(img).unsqueeze(0).to(DEVICE, dtype=DTYPE)
+    return img_tensor
+# =======================
+# تابع پیش‌بینی
+# =======================
+def predict(img):
     img_tensor = load_image(img)
+    with torch.no_grad():
+        output_ids = model.generate(img_tensor, max_length=128)
+    caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    return caption
+# =======================
+# رابط Gradio
+# =======================
 iface = gr.Interface(
     fn=predict,
+    inputs=gr.Image(type="filepath"),  # مسیر فایل به تابع داده می‌شود
+    outputs="text",
+    title="RADIOCAP200 - Radiology Captioning",
+    description="Upload a radiology image and get a generated report/caption."
 )
 if __name__ == "__main__":
+    iface.launch()