alvikhan
/

medico2025

Model card Files Files and versions

xet

Community

alvikhan commited on Sep 25, 2025

Commit

5605f0a

1 Parent(s): 62305fe

import error corrected

Browse files

Files changed (2) hide show

model.py +6 -6
submission_task1.py +115 -68

model.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import torch
 import torch.nn as nn
 import os
-from .qtype import QuestionTypeClassifier
-from .functions import build_vocabs, build_answer_vocab, collate_fn, preprocess_example, normalize_answer, preprocess_image
-from .models import disease_model, device, generate_descriptive_answer, router_tokenizer, gen_model
-from .tpred import TaskPredictor
-from .model_functions import compute_loss, compute_meteor, compute_rouge, extract_count, forward_batch
-from .fussionmodel import BertModel, CoAttentionFusion, ViTModel, F
 class VQAModel(nn.Module):

 import torch
 import torch.nn as nn
 import os
+from qtype import QuestionTypeClassifier
+from functions import build_vocabs, build_answer_vocab, collate_fn, preprocess_example, normalize_answer, preprocess_image
+from models import disease_model, device, generate_descriptive_answer, router_tokenizer, gen_model
+from tpred import TaskPredictor
+from model_functions import compute_loss, compute_meteor, compute_rouge, extract_count, forward_batch
+from fussionmodel import BertModel, CoAttentionFusion, ViTModel, F
 class VQAModel(nn.Module):

submission_task1.py CHANGED Viewed

@@ -1,107 +1,154 @@
-import torch
-import torch.nn as nn
 from datasets import load_dataset, Image as HfImage
-from transformers import AutoProcessor, AutoTokenizer
-import json, time, platform, sys, subprocess
 from tqdm import tqdm
 from evaluate import load
-# ================== METRICS ================== #
 bleu = load("bleu")
 rouge = load("rouge")
 meteor = load("meteor")
-# ================== DATASET ================== #
 ds = load_dataset("SimulaMet/Kvasir-VQA-x1")["test"]
-ds_shuffled = ds.shuffle(seed=42)
-val_dataset = ds_shuffled.select(range(1500))
 val_dataset = val_dataset.cast_column("image", HfImage())
-predictions = []
 device = "cuda" if torch.cuda.is_available() else "cpu"
-def get_mem():
-    return torch.cuda.memory_allocated(device)/(1024**2) if torch.cuda.is_available() else 0
 initial_mem = get_mem()
-# ================== SUBMISSION INFO ================== #
 SUBMISSION_INFO = {
-    "Participant_Names": "Your Name",
-    "Affiliations": "Your Institute",
-    "Contact_emails": ["your_email@example.com"],
-    "Team_Name": "YourTeam",
     "Country": "Pakistan",
     "Notes_to_organizers": "Custom pipeline with disease classifier + co-attention fusion."
 }
-# ================== IMPORT YOUR MODEL ================== #
-from modeling_vqa import DiseaseClassifier, CoAttentionFusion, AnswerGenerator
-# load pretrained disease classifier (you must save this separately or integrate HF repo)
-disease_model = DiseaseClassifier().to(device)
-disease_model.load_state_dict(torch.load("disease_classifier.pt", map_location=device))
-disease_model.eval()
-# co-attention fusion
-fusion_model = CoAttentionFusion(img_dim=2048, ques_dim=768, disease_dim=23, hidden_dim=512).to(device)
-# answer generator (choose LM decoder)
-answer_generator = AnswerGenerator(num_classes=23).to(device)
-tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
-# ================== VALIDATION LOOP ================== #
 start_time, post_model_mem = time.time(), get_mem()
 for idx, ex in enumerate(tqdm(val_dataset, desc="Validating")):
     question = ex["question"]
-    image = ex["image"].convert("RGB")
-    # --- Step 1: Extract disease vector ---
-    with torch.no_grad():
-        dis_vec = disease_model(image).to(device)  # [23]
-    # --- Step 2: Encode question ---
-    inputs = tokenizer(question, return_tensors="pt", truncation=True, padding=True).to(device)
-    ques_feat = inputs["input_ids"]
-    # --- Step 3: Get image features (CNN backbone placeholder) ---
-    img_feat = torch.randn(1, 49, 2048).to(device)  # replace with real extractor (ResNet/ViT)
-    # --- Step 4: Fusion ---
-    fused = fusion_model(img_feat, ques_feat.mean(dim=1), dis_vec.unsqueeze(0))
-    # --- Step 5: Generate answer ---
-    answer = answer_generator(fused)
-    assert isinstance(answer, str), f"Generated answer at index {idx} is not a string"
-    predictions.append({"index": idx, "img_id": ex["img_id"], "question": question, "answer": answer})
-# ================== METRICS ================== #
 references = [[e] for e in val_dataset['answer']]
 preds = [pred['answer'] for pred in predictions]
-bleu_score = round(bleu.compute(predictions=preds, references=references)['bleu'], 4)
-rouge_res = rouge.compute(predictions=preds, references=references)
-meteor_score = round(meteor.compute(predictions=preds, references=references)['meteor'], 4)
 public_scores = {
     'bleu': bleu_score,
-    'rouge1': round(float(rouge_res['rouge1']), 4),
-    'rouge2': round(float(rouge_res['rouge2']), 4),
-    'rougeL': round(float(rouge_res['rougeL']), 4),
     'meteor': meteor_score
 }
-print("✨ Public scores: ", public_scores)
-# ================== SAVE OUTPUT ================== #
-output_data = {
-    "submission_info": SUBMISSION_INFO,
-    "public_scores": public_scores,
-    "predictions": predictions,
-    "gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "cpu",
-}
 with open("predictions_1.json", "w") as f:
     json.dump(output_data, f, indent=4)
-print("✅ Done. Results saved to predictions_1.json")

+from transformers import AutoModelForCausalLM
 from datasets import load_dataset, Image as HfImage
+from transformers import AutoProcessor
+import torch
+import json
+import time
 from tqdm import tqdm
+import subprocess
+import platform
+import sys
 from evaluate import load
 bleu = load("bleu")
 rouge = load("rouge")
 meteor = load("meteor")
 ds = load_dataset("SimulaMet/Kvasir-VQA-x1")["test"]
+ds_shuffled = ds.shuffle(seed=42) # Shuffle with fixed seed for reproducibility
+val_dataset = ds_shuffled.select(range(1500)) # Select first 1500 after shuffle
 val_dataset = val_dataset.cast_column("image", HfImage())
+predictions = []  # List to store predictions
+gpu_name = torch.cuda.get_device_name(
+    0) if torch.cuda.is_available() else "cpu"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+def get_mem(): return torch.cuda.memory_allocated(device) / \
+    (1024 ** 2) if torch.cuda.is_available() else 0
 initial_mem = get_mem()
+# ✏️✏️--------EDIT SECTION 1: SUBMISISON DETAILS and MODEL LOADING --------✏️✏️#
 SUBMISSION_INFO = {
+    # 🔹 TODO: PARTICIPANTS MUST ADD PROPER SUBMISSION INFO FOR THE SUBMISSION 🔹
+    # This will be visible to the organizers
+    # DONT change the keys, only add your info
+    "Participant_Names": "Zeshan Khan",
+    "Affiliations": "National University of Computer and Emerging Sciences",
+    "Contact_emails": ["zeshankhanalvi@gmail.com"],
+    # But, the first email only will be used for correspondance
+    "Team_Name": "FAST-NU-DS",
     "Country": "Pakistan",
     "Notes_to_organizers": "Custom pipeline with disease classifier + co-attention fusion."
 }
+# 🔹 TODO: PARTICIPANTS MUST LOAD THEIR MODEL HERE, EDIT AS NECESSARY FOR YOUR MODEL 🔹
+# can add necessary library imports here
+from model import VQAModel
+import torch
+import torch.nn as nn
+from datasets import load_dataset
+from torch.utils.data import DataLoader
+from functions import preprocess_example, collate_fn
+model = VQAModel(img_dim=768, ques_dim=768, disease_dim=23, hidden_dim=512).to(device)
+model.load("vqa.pt")
+val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)
+res=model.eval(val_loader)
+#model_hf.eval()  # Ensure model is in evaluation mode
+# 🏁----------------END  SUBMISISON DETAILS and MODEL LOADING -----------------🏁#
 start_time, post_model_mem = time.time(), get_mem()
+total_time, final_mem = round(
+    time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
+model_mem_used = round(post_model_mem - initial_mem, 2)
 for idx, ex in enumerate(tqdm(val_dataset, desc="Validating")):
     question = ex["question"]
+    image = ex["image"].convert(
+        "RGB") if ex["image"].mode != "RGB" else ex["image"]
+    # you have access to 'question' and 'image' variables for each example
+# ✏️✏️___________EDIT SECTION 2: ANSWER GENERATION___________✏️✏️#
+    # 🔹 TODO: PARTICIPANTS CAN MODIFY THIS TOKENIZATION STEP IF NEEDED 🔹
+    answer=model.predict(image=image,question=question)
+    #inputs = processor(text=[question], images=[image],
+    #                   return_tensors="pt", padding=True)
+    #inputs = {k: v.to(device) for k, v in inputs.items()
+     #         if k not in ['labels', 'attention_mask']}
+    # 🔹 TODO: PARTICIPANTS CAN MODIFY THE GENERATION AND DECODING METHOD HERE 🔹
+    #with torch.no_grad():
+    #    output = model_hf.generate(**inputs)
+    #answer = processor.tokenizer.decode(output[0], skip_special_tokens=True)
+    # make sure 'answer' variable will hold answer (sentence/word) as str
+# 🏁________________ END ANSWER GENERATION ________________🏁#
+# ⛔ DO NOT EDIT any lines below from here, can edit only upto decoding step above as required. ⛔
+    # Ensures answer is a string
+    assert isinstance(
+        answer, str), f"Generated answer at index {idx} is not a string"
+    # Appends prediction
+    predictions.append(
+        {"index": idx, "img_id": ex["img_id"], "question": ex["question"], "answer": answer})
+# Ensure all predictions match dataset length
+assert len(predictions) == len(
+    val_dataset), "Mismatch between predictions and dataset length"
+total_time, final_mem = round(
+    time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
+model_mem_used = round(post_model_mem - initial_mem, 2)
+# caulcualtes metrics
 references = [[e] for e in val_dataset['answer']]
 preds = [pred['answer'] for pred in predictions]
+bleu_result = bleu.compute(predictions=preds, references=references)
+rouge_result = rouge.compute(predictions=preds, references=references)
+meteor_result = meteor.compute(predictions=preds, references=references)
+bleu_score = round(bleu_result['bleu'], 4)
+rouge1_score = round(float(rouge_result['rouge1']), 4)
+rouge2_score = round(float(rouge_result['rouge2']), 4)
+rougeL_score = round(float(rouge_result['rougeL']), 4)
+meteor_score = round(float(meteor_result['meteor']), 4)
 public_scores = {
     'bleu': bleu_score,
+    'rouge1': rouge1_score,
+    'rouge2': rouge2_score,
+    'rougeL': rougeL_score,
     'meteor': meteor_score
 }
+print("✨Public scores: ", public_scores)
+# Saves predictions to a JSON file
+output_data = {"submission_info": SUBMISSION_INFO, "public_scores": public_scores,
+               "predictions": predictions, "total_time": total_time, "time_per_item": total_time / len(val_dataset),
+               "memory_used_mb": final_mem, "model_memory_mb": model_mem_used, "gpu_name": gpu_name,
+               "debug": {
+                   "packages": json.loads(subprocess.check_output([sys.executable, "-m", "pip", "list", "--format=json"])),
+                   "system": {
+                       "python": platform.python_version(),
+                       "os": platform.system(),
+                       "platform": platform.platform(),
+                       "arch": platform.machine()
+                   }}}
 with open("predictions_1.json", "w") as f:
     json.dump(output_data, f, indent=4)
+print(f"Time: {total_time}s | Mem: {final_mem}MB | Model Load Mem: {model_mem_used}MB | GPU: {gpu_name}")
+print("✅ Scripts Looks Good! Generation process completed successfully. Results saved to 'predictions_1.json'.")
+print("Next Step:\n 1) Upload this submission_task1.py script file to HuggingFace model repository.")
+print('''\n 2) Make a submission to the competition:\n Run:: medvqa validate_and_submit --competition=medico-2025 --task=1 --repo_id=...''')