copilot-swe-agent[bot] SushantGautam commited on
Commit
e7a84e7
Β·
1 Parent(s): 5278ffc

Add Medico 2026 competition structure and update README

Browse files

Co-authored-by: SushantGautam <16721983+SushantGautam@users.noreply.github.com>

README.md CHANGED
@@ -6,7 +6,7 @@ app_file: gradio_launcher.py
6
  # MedVQA
7
 
8
  A CLI tool used for multiple MedVQA competitions:
9
- [ImageCLEFmed-MEDVQA-GI-2025] (https://github.com/simula/ImageCLEFmed-MEDVQA-GI-2025) and [MediaEval-Medico-2025](https://github.com/simula/MediaEval-Medico-2025).
10
 
11
  ## Installation
12
 
@@ -28,5 +28,7 @@ Check respective competition repo for detailed submission instructions. For exam
28
 
29
  ```bash
30
  medvqa validate_and_submit --competition=medico-2025 --task=1 --repo_id=...
 
 
31
  ```
32
  where repo_id is your HuggingFace Model repo id (like SushantGautam/XXModelCheckpoint) with the submission script as required by the competition organizers, for eg, submission_task1.py file for task 1 and submission_task2.py for task 2.
 
6
  # MedVQA
7
 
8
  A CLI tool used for multiple MedVQA competitions:
9
+ [ImageCLEFmed-MEDVQA-GI-2025] (https://github.com/simula/ImageCLEFmed-MEDVQA-GI-2025), [MediaEval-Medico-2025](https://github.com/simula/MediaEval-Medico-2025), and [MediaEval-Medico-2026](https://multimediaeval.github.io/editions/2026/tasks/medico/).
10
 
11
  ## Installation
12
 
 
28
 
29
  ```bash
30
  medvqa validate_and_submit --competition=medico-2025 --task=1 --repo_id=...
31
+ # or for Medico 2026:
32
+ medvqa validate_and_submit --competition=medico-2026 --task=1 --repo_id=...
33
  ```
34
  where repo_id is your HuggingFace Model repo id (like SushantGautam/XXModelCheckpoint) with the submission script as required by the competition organizers, for eg, submission_task1.py file for task 1 and submission_task2.py for task 2.
medvqa/competitions/medico-2026/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file can be empty or contain package initialization code
medvqa/competitions/medico-2026/task_1.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client, handle_file
2
+ from huggingface_hub import snapshot_download, login, whoami
3
+ import argparse
4
+ import os
5
+ import subprocess as sp
6
+ import time
7
+ from datetime import datetime, timezone
8
+ import shutil # Add this import
9
+ import json
10
+ from huggingface_hub import HfApi, grant_access
11
+ import re
12
+
13
+ HF_GATE_ACESSLIST = ["SushantGautam",
14
+ "stevenah", "vlbthambawita"]
15
+
16
+ MEDVQA_SUBMIT = True if os.environ.get(
17
+ '_MEDVQA_SUBMIT_FLAG_', 'FALSE') == 'TRUE' else False
18
+ parser = argparse.ArgumentParser(description='Run Medico 2026 Task 1 (Medical Image Question Answering)')
19
+ parser.add_argument('--repo_id', type=str, required=True,
20
+ help='Path to the HF submission repository')
21
+ args, _ = parser.parse_known_args()
22
+
23
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
24
+ submission_file = "submission_task1.py"
25
+ file_from_validation = "predictions_1.json"
26
+
27
+ min_library = ["datasets>=3.4.1", "transformers", "evaluate",
28
+ "rouge_score", 'tqdm', "gradio_client>=1.8.0"]
29
+
30
+ print("🌟 MediaEval Medico 2026: Medical VQA for GI Imaging with Interpretability and Safety 🌟",
31
+ "https://multimediaeval.github.io/editions/2026/tasks/medico/")
32
+ print("πŸ” Subtask 1: Medical Image Question Answering in GI Endoscopy")
33
+ print(f"πŸ‘€ Analyzing submission repository: {args.repo_id} πŸ‘€")
34
+
35
+ try:
36
+ print(f"Logged in to HuggingFace as: {whoami()['name']}")
37
+ except Exception:
38
+ print("⚠️⚠️ Not logged in to HuggingFace! Please get your login token from https://huggingface.co/settings/tokens 🌐")
39
+ login()
40
+
41
+ client = Client("SimulaMet/Medico-2026")
42
+ print("πŸ’“ Communicating with the Submission Server: Ping!")
43
+ result = client.predict(
44
+ api_name="/refresh_page"
45
+ )
46
+ print(result)
47
+
48
+
49
+ hf_username = whoami()['name']
50
+ assert len(hf_username) > 0, "🚫 HuggingFace login failed for some reason"
51
+ current_timestamp = int(time.time())
52
+
53
+ snap_dir = snapshot_download(
54
+ repo_id=args.repo_id, allow_patterns=[submission_file, "requirements.txt"])
55
+
56
+ if not os.path.isfile(os.path.join(snap_dir, submission_file)):
57
+ raise FileNotFoundError(
58
+ f"Submission file '{submission_file}' not found in the repository!")
59
+
60
+ if os.path.isfile(os.path.join(snap_dir, file_from_validation)):
61
+ os.remove(os.path.join(snap_dir, file_from_validation))
62
+
63
+ print("πŸ“¦ Making sure of the minimum requirements to run the script πŸ“¦")
64
+ sp.run(["python", "-m", "pip", "install", "-q"] + min_library, check=True)
65
+
66
+ if os.path.isfile(os.path.join(snap_dir, "requirements.txt")):
67
+ print(
68
+ f"πŸ“¦ Installing requirements from the submission repo: {args.repo_id}/requirements.txt")
69
+ sp.run(["python", "-m", "pip", "install", "-q", "-r",
70
+ f"{snap_dir}/requirements.txt"], cwd=snap_dir, check=True)
71
+
72
+
73
+ if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
74
+ challenge_file = submission_file.replace(".py", "_challenge.py")
75
+ submission_path = os.path.join(snap_dir, submission_file)
76
+ code = open(submission_path, encoding="utf-8").read()
77
+
78
+ patches = [
79
+ (r'ds\s*=\s*load_dataset\(\s*["\']SimulaMet/Kvasir-VQA-x1["\']\)\["test"\]',
80
+ 'val_dataset = load_dataset("SimulaMet/Kvasir-VQA-x1-private")["test"]'),
81
+ (r'.*ds\.shuffle\s*\(\s*seed\s*=\s*42\s*\).*', ''),
82
+ (r'.*ds_shuffled\.select\s*\(\s*range\s*\(\s*1500\s*\)\s*\).*', '')
83
+ ]
84
+
85
+ failed = False
86
+ for i, (pat, repl) in enumerate(patches, 1):
87
+ code, n = re.subn(pat, repl, code)
88
+ if n: print(f"βœ… Patch {i} applied ({n} change).")
89
+ else: print(f"❌ Patch {i} not found."); failed = True
90
+
91
+ if failed:
92
+ print("⚠️ Challenge patch not applied: expected line not found.")
93
+ sys.exit("Please check submission file for compatibility.")
94
+
95
+ with open(os.path.join(snap_dir, challenge_file), "w", encoding="utf-8") as f:
96
+ f.write(code)
97
+
98
+ submission_file = challenge_file
99
+ print(f"πŸŽ‰ Challenge file created at: {os.path.join(snap_dir, challenge_file)}")
100
+
101
+ if os.environ.get("_MEDVQA_FULL_EVALUATE_FLAG_", "FALSE") == "TRUE":
102
+ # Patch submission file for challenge evaluation
103
+ challenge_file = submission_file.replace(".py", "_full_evaluate.py")
104
+ submission_path = os.path.join(snap_dir, submission_file)
105
+ challenge_path = os.path.join(snap_dir, challenge_file)
106
+ with open(submission_path, "r", encoding="utf-8") as f:
107
+ code = f.read()
108
+ # Use regex to match the line, ignoring whitespace
109
+ pattern = r'val_dataset\s*=\s*ds_shuffled\.select\(\s*range\(\s*1500\s*\)\s*\)'
110
+ new_line = 'val_dataset = ds_shuffled'
111
+ if re.search(pattern, code):
112
+ code = re.sub(pattern, new_line, code)
113
+ with open(challenge_path, "w", encoding="utf-8") as f:
114
+ f.write(code)
115
+ submission_file = challenge_file
116
+ print(f"πŸ”„ Full evaluation file created at: {challenge_path}")
117
+ else:
118
+ print("⚠️ Full evaluation patch not applied: expected line not found in submission file.")
119
+ os.exit(
120
+ "Please check the submission file for compatibility with full evaluation.")
121
+
122
+ sp.run(["python", f"{snap_dir}/{submission_file}"],
123
+ cwd=snap_dir, check=True)
124
+ print(
125
+ f"πŸŽ‰ The submission script ran successfully, the intermediate files are at {snap_dir}")
126
+
127
+ if not MEDVQA_SUBMIT:
128
+ print("\n You can now run medvqa validate_and_submit .... command to submit the task.")
129
+ else:
130
+ print("πŸš€ Preparing for submission πŸš€")
131
+ file_path_to_upload = os.path.join(
132
+ snap_dir, f"{hf_username}-_-_-{current_timestamp}-_-_-task1.json")
133
+ shutil.copy(os.path.join(snap_dir, file_from_validation),
134
+ file_path_to_upload) # Use shutil.copy here
135
+ # add repo_id to the submission file
136
+ with open(file_path_to_upload, 'r', encoding='utf-8') as f:
137
+ data = json.load(f)
138
+ data['repo_id'] = args.repo_id
139
+ with open(file_path_to_upload, 'w', encoding='utf-8') as f:
140
+ json.dump(data, f, ensure_ascii=False)
141
+ api = HfApi()
142
+ api.update_repo_visibility(args.repo_id, private=False) # Make public
143
+ api.update_repo_settings(
144
+ args.repo_id, gated='manual') # Enable gated access
145
+ for user in HF_GATE_ACESSLIST:
146
+ try:
147
+ grant_access(args.repo_id, user) # Grant access
148
+ except Exception as e:
149
+ print(user, ":", e)
150
+ print(
151
+ f'''βœ… {args.repo_id} model is now made public, but gated, and is shared with organizers.
152
+ You should not make the model private or remove/update it until the competition results are announced.
153
+ Feel feel to re-submit the task if you change the model on the repository.
154
+ We will notify you if there are any issues with the submission.
155
+ ''')
156
+
157
+ result = client.predict(
158
+ file=handle_file(file_path_to_upload),
159
+ api_name="/add_submission"
160
+ )
161
+ print({"User": hf_username, "Task": "task1",
162
+ "Submitted_time": str(datetime.fromtimestamp(int(current_timestamp), tz=timezone.utc)) + " UTC"
163
+ })
164
+ print(result)
165
+ print("Visit this URL to see the entry: πŸ‘‡")
166
+ Client("SimulaMet/Medico-2026")
167
+
168
+
169
+ if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
170
+ src_json = os.path.join(snap_dir, "predictions_1.json")
171
+ if os.path.isfile(src_json):
172
+ with open(src_json, "r", encoding="utf-8") as f:
173
+ data = json.load(f)
174
+ # Remove 'debug' key if present
175
+ data.pop("debug", None)
176
+ # Rename 'public_scores' to 'challenge_scores' if present
177
+ if "public_scores" in data:
178
+ data["challenge_scores"] = data.pop("public_scores")
179
+ # Get Team_Name from submission_info
180
+ team_name = data.get("submission_info", {}).get(
181
+ "Team_Name", "unknown_team")
182
+ team_name_safe = re.sub(r'[^a-zA-Z0-9_\-]', '_', team_name)
183
+ out_json = os.path.join(os.getcwd(), f"task1_{team_name_safe}.json")
184
+ with open(out_json, "w", encoding="utf-8") as f:
185
+ json.dump(data, f, ensure_ascii=False, indent=2)
186
+ print(f"βœ… Copied and processed predictions to: {out_json}")
187
+ else:
188
+ print("❌ predictions_1.json not found in snapshot directory!")
189
+ # === End: Post-processing predictions_1.json ===
medvqa/competitions/medico-2026/task_2.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client, handle_file
2
+ from huggingface_hub import snapshot_download, login, whoami
3
+ import argparse
4
+ import os
5
+ import subprocess as sp
6
+ import time
7
+ from datetime import datetime, timezone
8
+ import shutil
9
+ import json
10
+ from huggingface_hub import HfApi, grant_access
11
+ import re
12
+ import importlib.util
13
+
14
+ HF_GATE_ACESSLIST = ["SushantGautam", "stevenah", "vlbthambawita"]
15
+
16
+ MEDVQA_SUBMIT = True if os.environ.get('_MEDVQA_SUBMIT_FLAG_', 'FALSE') == 'TRUE' else False
17
+ parser = argparse.ArgumentParser(description='Run Medico 2026 Task 2 (Explainable and Safe Multimodal Reasoning)')
18
+ parser.add_argument('--repo_id', type=str, required=True, help='Path to the HF submission repository')
19
+ args, _ = parser.parse_known_args()
20
+
21
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
22
+ submission_file = "submission_task2.py"
23
+ file_from_validation = "submission_task2.jsonl" # one object per val_id (1500 lines)
24
+
25
+ print("🌟 MediaEval Medico 2026: Medical VQA for GI Imaging with Interpretability and Safety 🌟",
26
+ "https://multimediaeval.github.io/editions/2026/tasks/medico/")
27
+ print("πŸ’¬ Subtask 2: Explainable and Safe Multimodal Reasoning for GI VQA")
28
+
29
+ print(f"πŸ‘€ Analyzing submission repository: {args.repo_id} πŸ‘€")
30
+
31
+ try:
32
+ print(f"Logged in to HuggingFace as: {whoami()['name']}")
33
+ except Exception:
34
+ print("⚠️⚠️ Not logged in to HuggingFace! Please get your login token from https://huggingface.co/settings/tokens 🌐")
35
+ login()
36
+
37
+ client = Client("SimulaMet/Medico-2026")
38
+ print("πŸ’“ Communicating with the Submission Server: Ping!")
39
+ result = client.predict(api_name="/refresh_page")
40
+ print(result)
41
+
42
+ hf_username = whoami()['name']
43
+ assert len(hf_username) > 0, "🚫 HuggingFace login failed for some reason"
44
+ current_timestamp = int(time.time())
45
+
46
+ # Download only what we need
47
+ snap_dir = snapshot_download(
48
+ repo_id=args.repo_id,
49
+ allow_patterns=[submission_file, file_from_validation]
50
+ )
51
+
52
+ subm_path = os.path.join(snap_dir, submission_file)
53
+ jsonl_path = os.path.join(snap_dir, file_from_validation)
54
+
55
+ if not os.path.isfile(subm_path):
56
+ raise FileNotFoundError(f"Submission file '{submission_file}' not found in the repository!")
57
+
58
+ if not os.path.isfile(jsonl_path):
59
+ raise FileNotFoundError(f"Required predictions file '{file_from_validation}' not found in the repository!")
60
+
61
+ # === Validation of submission_task2.jsonl ===
62
+ print(f"πŸ§ͺ Validating '{file_from_validation}' formatting…")
63
+ results = []
64
+ with open(jsonl_path, "r", encoding="utf-8") as f:
65
+ for line_num, line in enumerate(f, start=1):
66
+ stripped = line.strip()
67
+ if not stripped:
68
+ continue
69
+ try:
70
+ obj = json.loads(stripped)
71
+ except json.JSONDecodeError as e:
72
+ raise ValueError(f"Line {line_num} is not valid JSON: {e}")
73
+ if "val_id" not in obj:
74
+ raise ValueError(f"Line {line_num} missing required key 'val_id'.")
75
+ results.append(obj)
76
+
77
+ if len(results) != 1500:
78
+ raise ValueError(f"❌ '{file_from_validation}' must contain exactly 1500 valid JSON objects. Found: {len(results)}")
79
+ print(f"βœ… JSONL formatting OK (exactly {len(results)} lines).")
80
+
81
+ # === Load SUBMISSION_INFO dict from submission_task2.py ===
82
+ print("πŸ“‘ Loading SUBMISSION_INFO from submission_task2.py …")
83
+ spec = importlib.util.spec_from_file_location("subm2", subm_path)
84
+ subm_mod = importlib.util.module_from_spec(spec)
85
+ spec.loader.exec_module(subm_mod)
86
+ if not hasattr(subm_mod, "SUBMISSION_INFO") or not isinstance(subm_mod.SUBMISSION_INFO, dict):
87
+ raise ValueError("submission_task2.py must contain a dict variable named SUBMISSION_INFO")
88
+
89
+ submission_data= {"submission_info": subm_mod.SUBMISSION_INFO}
90
+ submission_data["public_scores"] = {"note": "will be rated by experts later"}
91
+ submission_data["predictions"] = results
92
+ submission_data["repo_id"] = args.repo_id
93
+
94
+
95
+ print(f"πŸŽ‰ Validation checks complete. Snapshot dir: {snap_dir}")
96
+
97
+ if not MEDVQA_SUBMIT:
98
+ print("\nYou can now run `medvqa validate_and_submit ...` to submit Subtask 2.")
99
+ else:
100
+ print("πŸš€ Preparing for submission πŸš€")
101
+ file_path_to_upload = os.path.join(
102
+ snap_dir, f"{hf_username}-_-_-{current_timestamp}-_-_-task2.json"
103
+ )
104
+ with open(file_path_to_upload, "w", encoding="utf-8") as f:
105
+ json.dump(submission_data, f, ensure_ascii=False, indent=2)
106
+
107
+ # Make the repo public (but gated) and grant access to organizers
108
+ api = HfApi()
109
+ api.update_repo_visibility(args.repo_id, private=False)
110
+ api.update_repo_settings(args.repo_id, gated='manual')
111
+ for user in HF_GATE_ACESSLIST:
112
+ try:
113
+ grant_access(args.repo_id, user)
114
+ except Exception as e:
115
+ print(user, ":", e)
116
+
117
+ print(
118
+ f'''βœ… {args.repo_id} model is now made public, but gated, and is shared with organizers.
119
+ You should not make the model private or remove/update it until the competition results are announced.
120
+ Feel free to re-submit Subtask 2 if you update the repository file(s).
121
+ We will notify you if there are any issues with the submission.
122
+ ''')
123
+
124
+ result = client.predict(
125
+ file=handle_file(file_path_to_upload),
126
+ api_name="/add_submission"
127
+ )
128
+ print({
129
+ "User": hf_username,
130
+ "Task": "task2",
131
+ "Submitted_time": str(datetime.fromtimestamp(int(current_timestamp), tz=timezone.utc)) + " UTC"
132
+ })
133
+ print(result)
134
+ print("Visit this URL to see the entry: πŸ‘‡")
135
+ Client("SimulaMet/Medico-2026")
136
+
137
+
138
+ # Optional challenge-evaluate hook intentionally omitted for Subtask 2 (no public scores).