import error corrected
Browse files- model.py +6 -6
- submission_task1.py +115 -68
model.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
import torch
|
| 2 |
import torch.nn as nn
|
| 3 |
import os
|
| 4 |
-
from
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
-
from
|
| 8 |
-
from
|
| 9 |
-
from
|
| 10 |
|
| 11 |
|
| 12 |
class VQAModel(nn.Module):
|
|
|
|
| 1 |
import torch
|
| 2 |
import torch.nn as nn
|
| 3 |
import os
|
| 4 |
+
from qtype import QuestionTypeClassifier
|
| 5 |
+
from functions import build_vocabs, build_answer_vocab, collate_fn, preprocess_example, normalize_answer, preprocess_image
|
| 6 |
+
from models import disease_model, device, generate_descriptive_answer, router_tokenizer, gen_model
|
| 7 |
+
from tpred import TaskPredictor
|
| 8 |
+
from model_functions import compute_loss, compute_meteor, compute_rouge, extract_count, forward_batch
|
| 9 |
+
from fussionmodel import BertModel, CoAttentionFusion, ViTModel, F
|
| 10 |
|
| 11 |
|
| 12 |
class VQAModel(nn.Module):
|
submission_task1.py
CHANGED
|
@@ -1,107 +1,154 @@
|
|
| 1 |
-
import
|
| 2 |
-
import torch.nn as nn
|
| 3 |
from datasets import load_dataset, Image as HfImage
|
| 4 |
-
from transformers import AutoProcessor
|
| 5 |
-
import
|
|
|
|
|
|
|
| 6 |
from tqdm import tqdm
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from evaluate import load
|
| 8 |
|
| 9 |
-
# ================== METRICS ================== #
|
| 10 |
bleu = load("bleu")
|
| 11 |
rouge = load("rouge")
|
| 12 |
meteor = load("meteor")
|
| 13 |
|
| 14 |
-
|
| 15 |
ds = load_dataset("SimulaMet/Kvasir-VQA-x1")["test"]
|
| 16 |
-
ds_shuffled = ds.shuffle(seed=42)
|
| 17 |
-
val_dataset = ds_shuffled.select(range(1500))
|
| 18 |
val_dataset = val_dataset.cast_column("image", HfImage())
|
|
|
|
| 19 |
|
| 20 |
-
|
|
|
|
| 21 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
| 25 |
|
| 26 |
initial_mem = get_mem()
|
| 27 |
|
| 28 |
-
#
|
|
|
|
| 29 |
SUBMISSION_INFO = {
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
"Country": "Pakistan",
|
| 35 |
"Notes_to_organizers": "Custom pipeline with disease classifier + co-attention fusion."
|
| 36 |
}
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
disease_model.eval()
|
| 45 |
|
| 46 |
-
# co-attention fusion
|
| 47 |
-
fusion_model = CoAttentionFusion(img_dim=2048, ques_dim=768, disease_dim=23, hidden_dim=512).to(device)
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
|
|
|
|
| 53 |
|
| 54 |
-
# ================== VALIDATION LOOP ================== #
|
| 55 |
start_time, post_model_mem = time.time(), get_mem()
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
for idx, ex in enumerate(tqdm(val_dataset, desc="Validating")):
|
| 58 |
question = ex["question"]
|
| 59 |
-
image = ex["image"].convert(
|
| 60 |
-
|
| 61 |
-
#
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
inputs =
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
#
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
answer
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
references = [[e] for e in val_dataset['answer']]
|
| 83 |
preds = [pred['answer'] for pred in predictions]
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
public_scores = {
|
| 90 |
'bleu': bleu_score,
|
| 91 |
-
'rouge1':
|
| 92 |
-
'rouge2':
|
| 93 |
-
'rougeL':
|
| 94 |
'meteor': meteor_score
|
| 95 |
}
|
| 96 |
-
print("โจ
|
| 97 |
-
|
| 98 |
-
#
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
with open("predictions_1.json", "w") as f:
|
| 106 |
json.dump(output_data, f, indent=4)
|
| 107 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoModelForCausalLM
|
|
|
|
| 2 |
from datasets import load_dataset, Image as HfImage
|
| 3 |
+
from transformers import AutoProcessor
|
| 4 |
+
import torch
|
| 5 |
+
import json
|
| 6 |
+
import time
|
| 7 |
from tqdm import tqdm
|
| 8 |
+
import subprocess
|
| 9 |
+
import platform
|
| 10 |
+
import sys
|
| 11 |
+
|
| 12 |
from evaluate import load
|
| 13 |
|
|
|
|
| 14 |
bleu = load("bleu")
|
| 15 |
rouge = load("rouge")
|
| 16 |
meteor = load("meteor")
|
| 17 |
|
| 18 |
+
|
| 19 |
ds = load_dataset("SimulaMet/Kvasir-VQA-x1")["test"]
|
| 20 |
+
ds_shuffled = ds.shuffle(seed=42) # Shuffle with fixed seed for reproducibility
|
| 21 |
+
val_dataset = ds_shuffled.select(range(1500)) # Select first 1500 after shuffle
|
| 22 |
val_dataset = val_dataset.cast_column("image", HfImage())
|
| 23 |
+
predictions = [] # List to store predictions
|
| 24 |
|
| 25 |
+
gpu_name = torch.cuda.get_device_name(
|
| 26 |
+
0) if torch.cuda.is_available() else "cpu"
|
| 27 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 28 |
|
| 29 |
+
|
| 30 |
+
def get_mem(): return torch.cuda.memory_allocated(device) / \
|
| 31 |
+
(1024 ** 2) if torch.cuda.is_available() else 0
|
| 32 |
+
|
| 33 |
|
| 34 |
initial_mem = get_mem()
|
| 35 |
|
| 36 |
+
# โ๏ธโ๏ธ--------EDIT SECTION 1: SUBMISISON DETAILS and MODEL LOADING --------โ๏ธโ๏ธ#
|
| 37 |
+
|
| 38 |
SUBMISSION_INFO = {
|
| 39 |
+
# ๐น TODO: PARTICIPANTS MUST ADD PROPER SUBMISSION INFO FOR THE SUBMISSION ๐น
|
| 40 |
+
# This will be visible to the organizers
|
| 41 |
+
# DONT change the keys, only add your info
|
| 42 |
+
"Participant_Names": "Zeshan Khan",
|
| 43 |
+
"Affiliations": "National University of Computer and Emerging Sciences",
|
| 44 |
+
"Contact_emails": ["zeshankhanalvi@gmail.com"],
|
| 45 |
+
# But, the first email only will be used for correspondance
|
| 46 |
+
"Team_Name": "FAST-NU-DS",
|
| 47 |
"Country": "Pakistan",
|
| 48 |
"Notes_to_organizers": "Custom pipeline with disease classifier + co-attention fusion."
|
| 49 |
}
|
| 50 |
+
# ๐น TODO: PARTICIPANTS MUST LOAD THEIR MODEL HERE, EDIT AS NECESSARY FOR YOUR MODEL ๐น
|
| 51 |
+
# can add necessary library imports here
|
| 52 |
|
| 53 |
+
from model import VQAModel
|
| 54 |
+
import torch
|
| 55 |
+
import torch.nn as nn
|
| 56 |
+
from datasets import load_dataset
|
| 57 |
+
from torch.utils.data import DataLoader
|
| 58 |
+
from functions import preprocess_example, collate_fn
|
|
|
|
| 59 |
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
model = VQAModel(img_dim=768, ques_dim=768, disease_dim=23, hidden_dim=512).to(device)
|
| 62 |
+
model.load("vqa.pt")
|
| 63 |
+
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)
|
| 64 |
+
res=model.eval(val_loader)
|
| 65 |
|
| 66 |
+
#model_hf.eval() # Ensure model is in evaluation mode
|
| 67 |
+
# ๐----------------END SUBMISISON DETAILS and MODEL LOADING -----------------๐#
|
| 68 |
|
|
|
|
| 69 |
start_time, post_model_mem = time.time(), get_mem()
|
| 70 |
+
total_time, final_mem = round(
|
| 71 |
+
time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
|
| 72 |
+
model_mem_used = round(post_model_mem - initial_mem, 2)
|
| 73 |
|
| 74 |
for idx, ex in enumerate(tqdm(val_dataset, desc="Validating")):
|
| 75 |
question = ex["question"]
|
| 76 |
+
image = ex["image"].convert(
|
| 77 |
+
"RGB") if ex["image"].mode != "RGB" else ex["image"]
|
| 78 |
+
# you have access to 'question' and 'image' variables for each example
|
| 79 |
+
|
| 80 |
+
# โ๏ธโ๏ธ___________EDIT SECTION 2: ANSWER GENERATION___________โ๏ธโ๏ธ#
|
| 81 |
+
# ๐น TODO: PARTICIPANTS CAN MODIFY THIS TOKENIZATION STEP IF NEEDED ๐น
|
| 82 |
+
answer=model.predict(image=image,question=question)
|
| 83 |
+
#inputs = processor(text=[question], images=[image],
|
| 84 |
+
# return_tensors="pt", padding=True)
|
| 85 |
+
#inputs = {k: v.to(device) for k, v in inputs.items()
|
| 86 |
+
# if k not in ['labels', 'attention_mask']}
|
| 87 |
+
|
| 88 |
+
# ๐น TODO: PARTICIPANTS CAN MODIFY THE GENERATION AND DECODING METHOD HERE ๐น
|
| 89 |
+
#with torch.no_grad():
|
| 90 |
+
# output = model_hf.generate(**inputs)
|
| 91 |
+
#answer = processor.tokenizer.decode(output[0], skip_special_tokens=True)
|
| 92 |
+
|
| 93 |
+
# make sure 'answer' variable will hold answer (sentence/word) as str
|
| 94 |
+
# ๐________________ END ANSWER GENERATION ________________๐#
|
| 95 |
+
|
| 96 |
+
# โ DO NOT EDIT any lines below from here, can edit only upto decoding step above as required. โ
|
| 97 |
+
# Ensures answer is a string
|
| 98 |
+
assert isinstance(
|
| 99 |
+
answer, str), f"Generated answer at index {idx} is not a string"
|
| 100 |
+
# Appends prediction
|
| 101 |
+
predictions.append(
|
| 102 |
+
{"index": idx, "img_id": ex["img_id"], "question": ex["question"], "answer": answer})
|
| 103 |
+
|
| 104 |
+
# Ensure all predictions match dataset length
|
| 105 |
+
assert len(predictions) == len(
|
| 106 |
+
val_dataset), "Mismatch between predictions and dataset length"
|
| 107 |
+
|
| 108 |
+
total_time, final_mem = round(
|
| 109 |
+
time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
|
| 110 |
+
model_mem_used = round(post_model_mem - initial_mem, 2)
|
| 111 |
+
|
| 112 |
+
# caulcualtes metrics
|
| 113 |
references = [[e] for e in val_dataset['answer']]
|
| 114 |
preds = [pred['answer'] for pred in predictions]
|
| 115 |
|
| 116 |
+
bleu_result = bleu.compute(predictions=preds, references=references)
|
| 117 |
+
rouge_result = rouge.compute(predictions=preds, references=references)
|
| 118 |
+
meteor_result = meteor.compute(predictions=preds, references=references)
|
| 119 |
+
bleu_score = round(bleu_result['bleu'], 4)
|
| 120 |
+
rouge1_score = round(float(rouge_result['rouge1']), 4)
|
| 121 |
+
rouge2_score = round(float(rouge_result['rouge2']), 4)
|
| 122 |
+
rougeL_score = round(float(rouge_result['rougeL']), 4)
|
| 123 |
+
meteor_score = round(float(meteor_result['meteor']), 4)
|
| 124 |
|
| 125 |
public_scores = {
|
| 126 |
'bleu': bleu_score,
|
| 127 |
+
'rouge1': rouge1_score,
|
| 128 |
+
'rouge2': rouge2_score,
|
| 129 |
+
'rougeL': rougeL_score,
|
| 130 |
'meteor': meteor_score
|
| 131 |
}
|
| 132 |
+
print("โจPublic scores: ", public_scores)
|
| 133 |
+
|
| 134 |
+
# Saves predictions to a JSON file
|
| 135 |
+
|
| 136 |
+
output_data = {"submission_info": SUBMISSION_INFO, "public_scores": public_scores,
|
| 137 |
+
"predictions": predictions, "total_time": total_time, "time_per_item": total_time / len(val_dataset),
|
| 138 |
+
"memory_used_mb": final_mem, "model_memory_mb": model_mem_used, "gpu_name": gpu_name,
|
| 139 |
+
"debug": {
|
| 140 |
+
"packages": json.loads(subprocess.check_output([sys.executable, "-m", "pip", "list", "--format=json"])),
|
| 141 |
+
"system": {
|
| 142 |
+
"python": platform.python_version(),
|
| 143 |
+
"os": platform.system(),
|
| 144 |
+
"platform": platform.platform(),
|
| 145 |
+
"arch": platform.machine()
|
| 146 |
+
}}}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
with open("predictions_1.json", "w") as f:
|
| 150 |
json.dump(output_data, f, indent=4)
|
| 151 |
+
print(f"Time: {total_time}s | Mem: {final_mem}MB | Model Load Mem: {model_mem_used}MB | GPU: {gpu_name}")
|
| 152 |
+
print("โ
Scripts Looks Good! Generation process completed successfully. Results saved to 'predictions_1.json'.")
|
| 153 |
+
print("Next Step:\n 1) Upload this submission_task1.py script file to HuggingFace model repository.")
|
| 154 |
+
print('''\n 2) Make a submission to the competition:\n Run:: medvqa validate_and_submit --competition=medico-2025 --task=1 --repo_id=...''')
|