| from transformers import AutoModelForCausalLM |
| from datasets import load_dataset, Image as HfImage |
| from transformers import AutoProcessor |
| import torch |
| import json |
| import time |
| from tqdm import tqdm |
| import subprocess |
| import platform |
| import sys |
|
|
| from evaluate import load |
|
|
| bleu = load("bleu") |
| rouge = load("rouge") |
| meteor = load("meteor") |
|
|
|
|
| ds = load_dataset("SimulaMet/Kvasir-VQA-x1")["test"] |
| ds_shuffled = ds.shuffle(seed=42) |
| val_dataset = ds_shuffled.select(range(1500)) |
| val_dataset = val_dataset.cast_column("image", HfImage()) |
| predictions = [] |
|
|
| gpu_name = torch.cuda.get_device_name( |
| 0) if torch.cuda.is_available() else "cpu" |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
| def get_mem(): return torch.cuda.memory_allocated(device) / \ |
| (1024 ** 2) if torch.cuda.is_available() else 0 |
|
|
|
|
| initial_mem = get_mem() |
|
|
| |
|
|
| SUBMISSION_INFO = { |
| |
| |
| |
| "Participant_Names": "Zeshan Khan", |
| "Affiliations": "National University of Computer and Emerging Sciences", |
| "Contact_emails": ["zeshankhanalvi@gmail.com"], |
| |
| "Team_Name": "FAST-NU-DS", |
| "Country": "Pakistan", |
| "Notes_to_organizers": "Custom pipeline with disease classifier + co-attention fusion." |
| } |
| |
| |
|
|
| from .model import VQAModel |
| import torch |
| import torch.nn as nn |
| from datasets import load_dataset |
| from torch.utils.data import DataLoader |
| from functions import preprocess_example, collate_fn |
|
|
|
|
| model = VQAModel(img_dim=768, ques_dim=768, disease_dim=23, hidden_dim=512).to(device) |
| model.load("vqa.pt") |
| val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn) |
| |
|
|
| |
| |
|
|
| start_time, post_model_mem = time.time(), get_mem() |
| total_time, final_mem = round( |
| time.time() - start_time, 4), round(get_mem() - post_model_mem, 2) |
| model_mem_used = round(post_model_mem - initial_mem, 2) |
|
|
| for idx, ex in enumerate(tqdm(val_dataset, desc="Validating")): |
| question = ex["question"] |
| image = ex["image"].convert( |
| "RGB") if ex["image"].mode != "RGB" else ex["image"] |
| |
|
|
| |
| |
| answer=model.predict(image=image,question=question) |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| assert isinstance( |
| answer, str), f"Generated answer at index {idx} is not a string" |
| |
| predictions.append( |
| {"index": idx, "img_id": ex["img_id"], "question": ex["question"], "answer": answer}) |
|
|
| |
| assert len(predictions) == len( |
| val_dataset), "Mismatch between predictions and dataset length" |
|
|
| total_time, final_mem = round( |
| time.time() - start_time, 4), round(get_mem() - post_model_mem, 2) |
| model_mem_used = round(post_model_mem - initial_mem, 2) |
|
|
| |
| references = [[e] for e in val_dataset['answer']] |
| preds = [pred['answer'] for pred in predictions] |
|
|
| bleu_result = bleu.compute(predictions=preds, references=references) |
| rouge_result = rouge.compute(predictions=preds, references=references) |
| meteor_result = meteor.compute(predictions=preds, references=references) |
| bleu_score = round(bleu_result['bleu'], 4) |
| rouge1_score = round(float(rouge_result['rouge1']), 4) |
| rouge2_score = round(float(rouge_result['rouge2']), 4) |
| rougeL_score = round(float(rouge_result['rougeL']), 4) |
| meteor_score = round(float(meteor_result['meteor']), 4) |
|
|
| public_scores = { |
| 'bleu': bleu_score, |
| 'rouge1': rouge1_score, |
| 'rouge2': rouge2_score, |
| 'rougeL': rougeL_score, |
| 'meteor': meteor_score |
| } |
| print("β¨Public scores: ", public_scores) |
|
|
| |
|
|
| output_data = {"submission_info": SUBMISSION_INFO, "public_scores": public_scores, |
| "predictions": predictions, "total_time": total_time, "time_per_item": total_time / len(val_dataset), |
| "memory_used_mb": final_mem, "model_memory_mb": model_mem_used, "gpu_name": gpu_name, |
| "debug": { |
| "packages": json.loads(subprocess.check_output([sys.executable, "-m", "pip", "list", "--format=json"])), |
| "system": { |
| "python": platform.python_version(), |
| "os": platform.system(), |
| "platform": platform.platform(), |
| "arch": platform.machine() |
| }}} |
|
|
|
|
| with open("predictions_1.json", "w") as f: |
| json.dump(output_data, f, indent=4) |
| print(f"Time: {total_time}s | Mem: {final_mem}MB | Model Load Mem: {model_mem_used}MB | GPU: {gpu_name}") |
| print("β
Scripts Looks Good! Generation process completed successfully. Results saved to 'predictions_1.json'.") |
| print("Next Step:\n 1) Upload this submission_task1.py script file to HuggingFace model repository.") |
| print('''\n 2) Make a submission to the competition:\n Run:: medvqa validate_and_submit --competition=medico-2025 --task=1 --repo_id=...''') |