alvikhan commited on
Commit
5605f0a
ยท
1 Parent(s): 62305fe

import error corrected

Browse files
Files changed (2) hide show
  1. model.py +6 -6
  2. submission_task1.py +115 -68
model.py CHANGED
@@ -1,12 +1,12 @@
1
  import torch
2
  import torch.nn as nn
3
  import os
4
- from .qtype import QuestionTypeClassifier
5
- from .functions import build_vocabs, build_answer_vocab, collate_fn, preprocess_example, normalize_answer, preprocess_image
6
- from .models import disease_model, device, generate_descriptive_answer, router_tokenizer, gen_model
7
- from .tpred import TaskPredictor
8
- from .model_functions import compute_loss, compute_meteor, compute_rouge, extract_count, forward_batch
9
- from .fussionmodel import BertModel, CoAttentionFusion, ViTModel, F
10
 
11
 
12
  class VQAModel(nn.Module):
 
1
  import torch
2
  import torch.nn as nn
3
  import os
4
+ from qtype import QuestionTypeClassifier
5
+ from functions import build_vocabs, build_answer_vocab, collate_fn, preprocess_example, normalize_answer, preprocess_image
6
+ from models import disease_model, device, generate_descriptive_answer, router_tokenizer, gen_model
7
+ from tpred import TaskPredictor
8
+ from model_functions import compute_loss, compute_meteor, compute_rouge, extract_count, forward_batch
9
+ from fussionmodel import BertModel, CoAttentionFusion, ViTModel, F
10
 
11
 
12
  class VQAModel(nn.Module):
submission_task1.py CHANGED
@@ -1,107 +1,154 @@
1
- import torch
2
- import torch.nn as nn
3
  from datasets import load_dataset, Image as HfImage
4
- from transformers import AutoProcessor, AutoTokenizer
5
- import json, time, platform, sys, subprocess
 
 
6
  from tqdm import tqdm
 
 
 
 
7
  from evaluate import load
8
 
9
- # ================== METRICS ================== #
10
  bleu = load("bleu")
11
  rouge = load("rouge")
12
  meteor = load("meteor")
13
 
14
- # ================== DATASET ================== #
15
  ds = load_dataset("SimulaMet/Kvasir-VQA-x1")["test"]
16
- ds_shuffled = ds.shuffle(seed=42)
17
- val_dataset = ds_shuffled.select(range(1500))
18
  val_dataset = val_dataset.cast_column("image", HfImage())
 
19
 
20
- predictions = []
 
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
 
23
- def get_mem():
24
- return torch.cuda.memory_allocated(device)/(1024**2) if torch.cuda.is_available() else 0
 
 
25
 
26
  initial_mem = get_mem()
27
 
28
- # ================== SUBMISSION INFO ================== #
 
29
  SUBMISSION_INFO = {
30
- "Participant_Names": "Your Name",
31
- "Affiliations": "Your Institute",
32
- "Contact_emails": ["your_email@example.com"],
33
- "Team_Name": "YourTeam",
 
 
 
 
34
  "Country": "Pakistan",
35
  "Notes_to_organizers": "Custom pipeline with disease classifier + co-attention fusion."
36
  }
 
 
37
 
38
- # ================== IMPORT YOUR MODEL ================== #
39
- from modeling_vqa import DiseaseClassifier, CoAttentionFusion, AnswerGenerator
40
-
41
- # load pretrained disease classifier (you must save this separately or integrate HF repo)
42
- disease_model = DiseaseClassifier().to(device)
43
- disease_model.load_state_dict(torch.load("disease_classifier.pt", map_location=device))
44
- disease_model.eval()
45
 
46
- # co-attention fusion
47
- fusion_model = CoAttentionFusion(img_dim=2048, ques_dim=768, disease_dim=23, hidden_dim=512).to(device)
48
 
49
- # answer generator (choose LM decoder)
50
- answer_generator = AnswerGenerator(num_classes=23).to(device)
 
 
51
 
52
- tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
 
53
 
54
- # ================== VALIDATION LOOP ================== #
55
  start_time, post_model_mem = time.time(), get_mem()
 
 
 
56
 
57
  for idx, ex in enumerate(tqdm(val_dataset, desc="Validating")):
58
  question = ex["question"]
59
- image = ex["image"].convert("RGB")
60
-
61
- # --- Step 1: Extract disease vector ---
62
- with torch.no_grad():
63
- dis_vec = disease_model(image).to(device) # [23]
64
-
65
- # --- Step 2: Encode question ---
66
- inputs = tokenizer(question, return_tensors="pt", truncation=True, padding=True).to(device)
67
- ques_feat = inputs["input_ids"]
68
-
69
- # --- Step 3: Get image features (CNN backbone placeholder) ---
70
- img_feat = torch.randn(1, 49, 2048).to(device) # replace with real extractor (ResNet/ViT)
71
-
72
- # --- Step 4: Fusion ---
73
- fused = fusion_model(img_feat, ques_feat.mean(dim=1), dis_vec.unsqueeze(0))
74
-
75
- # --- Step 5: Generate answer ---
76
- answer = answer_generator(fused)
77
-
78
- assert isinstance(answer, str), f"Generated answer at index {idx} is not a string"
79
- predictions.append({"index": idx, "img_id": ex["img_id"], "question": question, "answer": answer})
80
-
81
- # ================== METRICS ================== #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  references = [[e] for e in val_dataset['answer']]
83
  preds = [pred['answer'] for pred in predictions]
84
 
85
- bleu_score = round(bleu.compute(predictions=preds, references=references)['bleu'], 4)
86
- rouge_res = rouge.compute(predictions=preds, references=references)
87
- meteor_score = round(meteor.compute(predictions=preds, references=references)['meteor'], 4)
 
 
 
 
 
88
 
89
  public_scores = {
90
  'bleu': bleu_score,
91
- 'rouge1': round(float(rouge_res['rouge1']), 4),
92
- 'rouge2': round(float(rouge_res['rouge2']), 4),
93
- 'rougeL': round(float(rouge_res['rougeL']), 4),
94
  'meteor': meteor_score
95
  }
96
- print("โœจ Public scores: ", public_scores)
97
-
98
- # ================== SAVE OUTPUT ================== #
99
- output_data = {
100
- "submission_info": SUBMISSION_INFO,
101
- "public_scores": public_scores,
102
- "predictions": predictions,
103
- "gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "cpu",
104
- }
 
 
 
 
 
 
 
 
105
  with open("predictions_1.json", "w") as f:
106
  json.dump(output_data, f, indent=4)
107
- print("โœ… Done. Results saved to predictions_1.json")
 
 
 
 
1
+ from transformers import AutoModelForCausalLM
 
2
  from datasets import load_dataset, Image as HfImage
3
+ from transformers import AutoProcessor
4
+ import torch
5
+ import json
6
+ import time
7
  from tqdm import tqdm
8
+ import subprocess
9
+ import platform
10
+ import sys
11
+
12
  from evaluate import load
13
 
 
14
  bleu = load("bleu")
15
  rouge = load("rouge")
16
  meteor = load("meteor")
17
 
18
+
19
  ds = load_dataset("SimulaMet/Kvasir-VQA-x1")["test"]
20
+ ds_shuffled = ds.shuffle(seed=42) # Shuffle with fixed seed for reproducibility
21
+ val_dataset = ds_shuffled.select(range(1500)) # Select first 1500 after shuffle
22
  val_dataset = val_dataset.cast_column("image", HfImage())
23
+ predictions = [] # List to store predictions
24
 
25
+ gpu_name = torch.cuda.get_device_name(
26
+ 0) if torch.cuda.is_available() else "cpu"
27
  device = "cuda" if torch.cuda.is_available() else "cpu"
28
 
29
+
30
+ def get_mem(): return torch.cuda.memory_allocated(device) / \
31
+ (1024 ** 2) if torch.cuda.is_available() else 0
32
+
33
 
34
  initial_mem = get_mem()
35
 
36
+ # โœ๏ธโœ๏ธ--------EDIT SECTION 1: SUBMISISON DETAILS and MODEL LOADING --------โœ๏ธโœ๏ธ#
37
+
38
  SUBMISSION_INFO = {
39
+ # ๐Ÿ”น TODO: PARTICIPANTS MUST ADD PROPER SUBMISSION INFO FOR THE SUBMISSION ๐Ÿ”น
40
+ # This will be visible to the organizers
41
+ # DONT change the keys, only add your info
42
+ "Participant_Names": "Zeshan Khan",
43
+ "Affiliations": "National University of Computer and Emerging Sciences",
44
+ "Contact_emails": ["zeshankhanalvi@gmail.com"],
45
+ # But, the first email only will be used for correspondance
46
+ "Team_Name": "FAST-NU-DS",
47
  "Country": "Pakistan",
48
  "Notes_to_organizers": "Custom pipeline with disease classifier + co-attention fusion."
49
  }
50
+ # ๐Ÿ”น TODO: PARTICIPANTS MUST LOAD THEIR MODEL HERE, EDIT AS NECESSARY FOR YOUR MODEL ๐Ÿ”น
51
+ # can add necessary library imports here
52
 
53
+ from model import VQAModel
54
+ import torch
55
+ import torch.nn as nn
56
+ from datasets import load_dataset
57
+ from torch.utils.data import DataLoader
58
+ from functions import preprocess_example, collate_fn
 
59
 
 
 
60
 
61
+ model = VQAModel(img_dim=768, ques_dim=768, disease_dim=23, hidden_dim=512).to(device)
62
+ model.load("vqa.pt")
63
+ val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)
64
+ res=model.eval(val_loader)
65
 
66
+ #model_hf.eval() # Ensure model is in evaluation mode
67
+ # ๐Ÿ----------------END SUBMISISON DETAILS and MODEL LOADING -----------------๐Ÿ#
68
 
 
69
  start_time, post_model_mem = time.time(), get_mem()
70
+ total_time, final_mem = round(
71
+ time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
72
+ model_mem_used = round(post_model_mem - initial_mem, 2)
73
 
74
  for idx, ex in enumerate(tqdm(val_dataset, desc="Validating")):
75
  question = ex["question"]
76
+ image = ex["image"].convert(
77
+ "RGB") if ex["image"].mode != "RGB" else ex["image"]
78
+ # you have access to 'question' and 'image' variables for each example
79
+
80
+ # โœ๏ธโœ๏ธ___________EDIT SECTION 2: ANSWER GENERATION___________โœ๏ธโœ๏ธ#
81
+ # ๐Ÿ”น TODO: PARTICIPANTS CAN MODIFY THIS TOKENIZATION STEP IF NEEDED ๐Ÿ”น
82
+ answer=model.predict(image=image,question=question)
83
+ #inputs = processor(text=[question], images=[image],
84
+ # return_tensors="pt", padding=True)
85
+ #inputs = {k: v.to(device) for k, v in inputs.items()
86
+ # if k not in ['labels', 'attention_mask']}
87
+
88
+ # ๐Ÿ”น TODO: PARTICIPANTS CAN MODIFY THE GENERATION AND DECODING METHOD HERE ๐Ÿ”น
89
+ #with torch.no_grad():
90
+ # output = model_hf.generate(**inputs)
91
+ #answer = processor.tokenizer.decode(output[0], skip_special_tokens=True)
92
+
93
+ # make sure 'answer' variable will hold answer (sentence/word) as str
94
+ # ๐Ÿ________________ END ANSWER GENERATION ________________๐Ÿ#
95
+
96
+ # โ›” DO NOT EDIT any lines below from here, can edit only upto decoding step above as required. โ›”
97
+ # Ensures answer is a string
98
+ assert isinstance(
99
+ answer, str), f"Generated answer at index {idx} is not a string"
100
+ # Appends prediction
101
+ predictions.append(
102
+ {"index": idx, "img_id": ex["img_id"], "question": ex["question"], "answer": answer})
103
+
104
+ # Ensure all predictions match dataset length
105
+ assert len(predictions) == len(
106
+ val_dataset), "Mismatch between predictions and dataset length"
107
+
108
+ total_time, final_mem = round(
109
+ time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
110
+ model_mem_used = round(post_model_mem - initial_mem, 2)
111
+
112
+ # caulcualtes metrics
113
  references = [[e] for e in val_dataset['answer']]
114
  preds = [pred['answer'] for pred in predictions]
115
 
116
+ bleu_result = bleu.compute(predictions=preds, references=references)
117
+ rouge_result = rouge.compute(predictions=preds, references=references)
118
+ meteor_result = meteor.compute(predictions=preds, references=references)
119
+ bleu_score = round(bleu_result['bleu'], 4)
120
+ rouge1_score = round(float(rouge_result['rouge1']), 4)
121
+ rouge2_score = round(float(rouge_result['rouge2']), 4)
122
+ rougeL_score = round(float(rouge_result['rougeL']), 4)
123
+ meteor_score = round(float(meteor_result['meteor']), 4)
124
 
125
  public_scores = {
126
  'bleu': bleu_score,
127
+ 'rouge1': rouge1_score,
128
+ 'rouge2': rouge2_score,
129
+ 'rougeL': rougeL_score,
130
  'meteor': meteor_score
131
  }
132
+ print("โœจPublic scores: ", public_scores)
133
+
134
+ # Saves predictions to a JSON file
135
+
136
+ output_data = {"submission_info": SUBMISSION_INFO, "public_scores": public_scores,
137
+ "predictions": predictions, "total_time": total_time, "time_per_item": total_time / len(val_dataset),
138
+ "memory_used_mb": final_mem, "model_memory_mb": model_mem_used, "gpu_name": gpu_name,
139
+ "debug": {
140
+ "packages": json.loads(subprocess.check_output([sys.executable, "-m", "pip", "list", "--format=json"])),
141
+ "system": {
142
+ "python": platform.python_version(),
143
+ "os": platform.system(),
144
+ "platform": platform.platform(),
145
+ "arch": platform.machine()
146
+ }}}
147
+
148
+
149
  with open("predictions_1.json", "w") as f:
150
  json.dump(output_data, f, indent=4)
151
+ print(f"Time: {total_time}s | Mem: {final_mem}MB | Model Load Mem: {model_mem_used}MB | GPU: {gpu_name}")
152
+ print("โœ… Scripts Looks Good! Generation process completed successfully. Results saved to 'predictions_1.json'.")
153
+ print("Next Step:\n 1) Upload this submission_task1.py script file to HuggingFace model repository.")
154
+ print('''\n 2) Make a submission to the competition:\n Run:: medvqa validate_and_submit --competition=medico-2025 --task=1 --repo_id=...''')