Medico-2026

Running

App Files Files Community

copilot-swe-agent[bot] SushantGautam commited on Feb 11

Commit

e7a84e7

1 Parent(s): 5278ffc

Add Medico 2026 competition structure and update README

Browse files

Co-authored-by: SushantGautam <16721983+SushantGautam@users.noreply.github.com>

Files changed (4) hide show

README.md +3 -1
medvqa/competitions/medico-2026/__init__.py +1 -0
medvqa/competitions/medico-2026/task_1.py +189 -0
medvqa/competitions/medico-2026/task_2.py +138 -0

README.md CHANGED Viewed

@@ -6,7 +6,7 @@ app_file: gradio_launcher.py
 # MedVQA
 A CLI tool used for multiple MedVQA competitions:
- [ImageCLEFmed-MEDVQA-GI-2025] (https://github.com/simula/ImageCLEFmed-MEDVQA-GI-2025) and [MediaEval-Medico-2025](https://github.com/simula/MediaEval-Medico-2025).
 ## Installation
@@ -28,5 +28,7 @@ Check respective competition repo for detailed submission instructions. For exam
 ```bash
 medvqa validate_and_submit --competition=medico-2025 --task=1 --repo_id=...
 ```
 where repo_id is your HuggingFace Model repo id (like SushantGautam/XXModelCheckpoint) with the submission script as required by the competition organizers, for eg, submission_task1.py file for task 1 and submission_task2.py for task 2.

 # MedVQA
 A CLI tool used for multiple MedVQA competitions:
+ [ImageCLEFmed-MEDVQA-GI-2025] (https://github.com/simula/ImageCLEFmed-MEDVQA-GI-2025), [MediaEval-Medico-2025](https://github.com/simula/MediaEval-Medico-2025), and [MediaEval-Medico-2026](https://multimediaeval.github.io/editions/2026/tasks/medico/).
 ## Installation
 ```bash
 medvqa validate_and_submit --competition=medico-2025 --task=1 --repo_id=...
+# or for Medico 2026:
+medvqa validate_and_submit --competition=medico-2026 --task=1 --repo_id=...
 ```
 where repo_id is your HuggingFace Model repo id (like SushantGautam/XXModelCheckpoint) with the submission script as required by the competition organizers, for eg, submission_task1.py file for task 1 and submission_task2.py for task 2.

medvqa/competitions/medico-2026/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file can be empty or contain package initialization code

medvqa/competitions/medico-2026/task_1.py ADDED Viewed

	@@ -0,0 +1,189 @@

+from gradio_client import Client, handle_file
+from huggingface_hub import snapshot_download, login, whoami
+import argparse
+import os
+import subprocess as sp
+import time
+from datetime import datetime, timezone
+import shutil  # Add this import
+import json
+from huggingface_hub import HfApi, grant_access
+import re
+HF_GATE_ACESSLIST = ["SushantGautam",
+                     "stevenah", "vlbthambawita"]
+MEDVQA_SUBMIT = True if os.environ.get(
+    '_MEDVQA_SUBMIT_FLAG_', 'FALSE') == 'TRUE' else False
+parser = argparse.ArgumentParser(description='Run Medico 2026 Task 1 (Medical Image Question Answering)')
+parser.add_argument('--repo_id', type=str, required=True,
+                    help='Path to the HF submission repository')
+args, _ = parser.parse_known_args()
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+submission_file = "submission_task1.py"
+file_from_validation = "predictions_1.json"
+min_library = ["datasets>=3.4.1", "transformers", "evaluate",
+               "rouge_score", 'tqdm', "gradio_client>=1.8.0"]
+print("🌟 MediaEval Medico 2026: Medical VQA for GI Imaging with Interpretability and Safety 🌟",
+      "https://multimediaeval.github.io/editions/2026/tasks/medico/")
+print("🔍 Subtask 1: Medical Image Question Answering in GI Endoscopy")
+print(f"👀 Analyzing submission repository: {args.repo_id} 👀")
+try:
+    print(f"Logged in to HuggingFace as: {whoami()['name']}")
+except Exception:
+    print("⚠️⚠️ Not logged in to HuggingFace! Please get your login token from https://huggingface.co/settings/tokens 🌐")
+    login()
+client = Client("SimulaMet/Medico-2026")
+print("💓 Communicating with the Submission Server: Ping!")
+result = client.predict(
+    api_name="/refresh_page"
+)
+print(result)
+hf_username = whoami()['name']
+assert len(hf_username) > 0, "🚫 HuggingFace login failed for some reason"
+current_timestamp = int(time.time())
+snap_dir = snapshot_download(
+    repo_id=args.repo_id, allow_patterns=[submission_file, "requirements.txt"])
+if not os.path.isfile(os.path.join(snap_dir, submission_file)):
+    raise FileNotFoundError(
+        f"Submission file '{submission_file}' not found in the repository!")
+if os.path.isfile(os.path.join(snap_dir, file_from_validation)):
+    os.remove(os.path.join(snap_dir, file_from_validation))
+print("📦 Making sure of the minimum requirements to run the script 📦")
+sp.run(["python", "-m", "pip", "install", "-q"] + min_library, check=True)
+if os.path.isfile(os.path.join(snap_dir, "requirements.txt")):
+    print(
+        f"📦 Installing requirements from the submission repo: {args.repo_id}/requirements.txt")
+    sp.run(["python", "-m", "pip", "install", "-q", "-r",
+            f"{snap_dir}/requirements.txt"], cwd=snap_dir, check=True)
+if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
+    challenge_file = submission_file.replace(".py", "_challenge.py")
+    submission_path = os.path.join(snap_dir, submission_file)
+    code = open(submission_path, encoding="utf-8").read()
+    patches = [
+        (r'ds\s*=\s*load_dataset\(\s*["\']SimulaMet/Kvasir-VQA-x1["\']\)\["test"\]',
+         'val_dataset = load_dataset("SimulaMet/Kvasir-VQA-x1-private")["test"]'),
+        (r'.*ds\.shuffle\s*\(\s*seed\s*=\s*42\s*\).*', ''),
+        (r'.*ds_shuffled\.select\s*\(\s*range\s*\(\s*1500\s*\)\s*\).*', '')
+    ]
+    failed = False
+    for i, (pat, repl) in enumerate(patches, 1):
+        code, n = re.subn(pat, repl, code)
+        if n: print(f"✅ Patch {i} applied ({n} change).")
+        else: print(f"❌ Patch {i} not found."); failed = True
+    if failed:
+        print("⚠️ Challenge patch not applied: expected line not found.")
+        sys.exit("Please check submission file for compatibility.")
+    with open(os.path.join(snap_dir, challenge_file), "w", encoding="utf-8") as f:
+        f.write(code)
+    submission_file = challenge_file
+    print(f"🎉 Challenge file created at: {os.path.join(snap_dir, challenge_file)}")
+if os.environ.get("_MEDVQA_FULL_EVALUATE_FLAG_", "FALSE") == "TRUE":
+    # Patch submission file for challenge evaluation
+    challenge_file = submission_file.replace(".py", "_full_evaluate.py")
+    submission_path = os.path.join(snap_dir, submission_file)
+    challenge_path = os.path.join(snap_dir, challenge_file)
+    with open(submission_path, "r", encoding="utf-8") as f:
+        code = f.read()
+    # Use regex to match the line, ignoring whitespace
+    pattern = r'val_dataset\s*=\s*ds_shuffled\.select\(\s*range\(\s*1500\s*\)\s*\)'
+    new_line = 'val_dataset = ds_shuffled'
+    if re.search(pattern, code):
+        code = re.sub(pattern, new_line, code)
+        with open(challenge_path, "w", encoding="utf-8") as f:
+            f.write(code)
+        submission_file = challenge_file
+        print(f"🔄 Full evaluation file created at: {challenge_path}")
+    else:
+        print("⚠️ Full evaluation patch not applied: expected line not found in submission file.")
+        os.exit(
+            "Please check the submission file for compatibility with full evaluation.")
+sp.run(["python", f"{snap_dir}/{submission_file}"],
+       cwd=snap_dir, check=True)
+print(
+    f"🎉 The submission script ran successfully, the intermediate files are at {snap_dir}")
+if not MEDVQA_SUBMIT:
+    print("\n You can now run medvqa validate_and_submit .... command to submit the task.")
+else:
+    print("🚀 Preparing for submission 🚀")
+    file_path_to_upload = os.path.join(
+        snap_dir, f"{hf_username}-_-_-{current_timestamp}-_-_-task1.json")
+    shutil.copy(os.path.join(snap_dir, file_from_validation),
+                file_path_to_upload)  # Use shutil.copy here
+    # add repo_id to the submission file
+    with open(file_path_to_upload, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+        data['repo_id'] = args.repo_id
+        with open(file_path_to_upload, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False)
+    api = HfApi()
+    api.update_repo_visibility(args.repo_id, private=False)  # Make public
+    api.update_repo_settings(
+        args.repo_id, gated='manual')  # Enable gated access
+    for user in HF_GATE_ACESSLIST:
+        try:
+            grant_access(args.repo_id, user)  # Grant access
+        except Exception as e:
+            print(user, ":", e)
+    print(
+        f'''✅ {args.repo_id} model is now made public, but gated, and is shared with organizers.
+        You should not make the model private or remove/update it until the competition results are announced.
+        Feel feel to re-submit the task if you change the model on the repository.
+        We will notify you if there are any issues with the submission.
+        ''')
+    result = client.predict(
+        file=handle_file(file_path_to_upload),
+        api_name="/add_submission"
+    )
+    print({"User": hf_username, "Task": "task1",
+           "Submitted_time": str(datetime.fromtimestamp(int(current_timestamp), tz=timezone.utc)) + " UTC"
+           })
+    print(result)
+    print("Visit this URL to see the entry: 👇")
+    Client("SimulaMet/Medico-2026")
+if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
+    src_json = os.path.join(snap_dir, "predictions_1.json")
+    if os.path.isfile(src_json):
+        with open(src_json, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        # Remove 'debug' key if present
+        data.pop("debug", None)
+        # Rename 'public_scores' to 'challenge_scores' if present
+        if "public_scores" in data:
+            data["challenge_scores"] = data.pop("public_scores")
+        # Get Team_Name from submission_info
+        team_name = data.get("submission_info", {}).get(
+            "Team_Name", "unknown_team")
+        team_name_safe = re.sub(r'[^a-zA-Z0-9_\-]', '_', team_name)
+        out_json = os.path.join(os.getcwd(), f"task1_{team_name_safe}.json")
+        with open(out_json, "w", encoding="utf-8") as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+        print(f"✅ Copied and processed predictions to: {out_json}")
+    else:
+        print("❌ predictions_1.json not found in snapshot directory!")
+    # === End: Post-processing predictions_1.json ===

medvqa/competitions/medico-2026/task_2.py ADDED Viewed

	@@ -0,0 +1,138 @@

+from gradio_client import Client, handle_file
+from huggingface_hub import snapshot_download, login, whoami
+import argparse
+import os
+import subprocess as sp
+import time
+from datetime import datetime, timezone
+import shutil
+import json
+from huggingface_hub import HfApi, grant_access
+import re
+import importlib.util
+HF_GATE_ACESSLIST = ["SushantGautam", "stevenah", "vlbthambawita"]
+MEDVQA_SUBMIT = True if os.environ.get('_MEDVQA_SUBMIT_FLAG_', 'FALSE') == 'TRUE' else False
+parser = argparse.ArgumentParser(description='Run Medico 2026 Task 2 (Explainable and Safe Multimodal Reasoning)')
+parser.add_argument('--repo_id', type=str, required=True, help='Path to the HF submission repository')
+args, _ = parser.parse_known_args()
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+submission_file = "submission_task2.py"
+file_from_validation = "submission_task2.jsonl"  # one object per val_id (1500 lines)
+print("🌟 MediaEval Medico 2026: Medical VQA for GI Imaging with Interpretability and Safety 🌟",
+      "https://multimediaeval.github.io/editions/2026/tasks/medico/")
+print("💬 Subtask 2: Explainable and Safe Multimodal Reasoning for GI VQA")
+print(f"👀 Analyzing submission repository: {args.repo_id} 👀")
+try:
+    print(f"Logged in to HuggingFace as: {whoami()['name']}")
+except Exception:
+    print("⚠️⚠️ Not logged in to HuggingFace! Please get your login token from https://huggingface.co/settings/tokens 🌐")
+    login()
+client = Client("SimulaMet/Medico-2026")
+print("💓 Communicating with the Submission Server: Ping!")
+result = client.predict(api_name="/refresh_page")
+print(result)
+hf_username = whoami()['name']
+assert len(hf_username) > 0, "🚫 HuggingFace login failed for some reason"
+current_timestamp = int(time.time())
+# Download only what we need
+snap_dir = snapshot_download(
+    repo_id=args.repo_id,
+    allow_patterns=[submission_file, file_from_validation]
+)
+subm_path = os.path.join(snap_dir, submission_file)
+jsonl_path = os.path.join(snap_dir, file_from_validation)
+if not os.path.isfile(subm_path):
+    raise FileNotFoundError(f"Submission file '{submission_file}' not found in the repository!")
+if not os.path.isfile(jsonl_path):
+    raise FileNotFoundError(f"Required predictions file '{file_from_validation}' not found in the repository!")
+# === Validation of submission_task2.jsonl ===
+print(f"🧪 Validating '{file_from_validation}' formatting…")
+results = []
+with open(jsonl_path, "r", encoding="utf-8") as f:
+    for line_num, line in enumerate(f, start=1):
+        stripped = line.strip()
+        if not stripped:
+            continue
+        try:
+            obj = json.loads(stripped)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Line {line_num} is not valid JSON: {e}")
+        if "val_id" not in obj:
+            raise ValueError(f"Line {line_num} missing required key 'val_id'.")
+        results.append(obj)
+if len(results) != 1500:
+    raise ValueError(f"❌ '{file_from_validation}' must contain exactly 1500 valid JSON objects. Found: {len(results)}")
+print(f"✅ JSONL formatting OK (exactly {len(results)} lines).")
+# === Load SUBMISSION_INFO dict from submission_task2.py ===
+print("📑 Loading SUBMISSION_INFO from submission_task2.py …")
+spec = importlib.util.spec_from_file_location("subm2", subm_path)
+subm_mod = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(subm_mod)
+if not hasattr(subm_mod, "SUBMISSION_INFO") or not isinstance(subm_mod.SUBMISSION_INFO, dict):
+    raise ValueError("submission_task2.py must contain a dict variable named SUBMISSION_INFO")
+submission_data= {"submission_info": subm_mod.SUBMISSION_INFO}
+submission_data["public_scores"] = {"note": "will be rated by experts later"}
+submission_data["predictions"] = results
+submission_data["repo_id"] = args.repo_id
+print(f"🎉 Validation checks complete. Snapshot dir: {snap_dir}")
+if not MEDVQA_SUBMIT:
+    print("\nYou can now run `medvqa validate_and_submit ...` to submit Subtask 2.")
+else:
+    print("🚀 Preparing for submission 🚀")
+    file_path_to_upload = os.path.join(
+        snap_dir, f"{hf_username}-_-_-{current_timestamp}-_-_-task2.json"
+    )
+    with open(file_path_to_upload, "w", encoding="utf-8") as f:
+        json.dump(submission_data, f, ensure_ascii=False, indent=2)
+    # Make the repo public (but gated) and grant access to organizers
+    api = HfApi()
+    api.update_repo_visibility(args.repo_id, private=False)
+    api.update_repo_settings(args.repo_id, gated='manual')
+    for user in HF_GATE_ACESSLIST:
+        try:
+            grant_access(args.repo_id, user)
+        except Exception as e:
+            print(user, ":", e)
+    print(
+        f'''✅ {args.repo_id} model is now made public, but gated, and is shared with organizers.
+You should not make the model private or remove/update it until the competition results are announced.
+Feel free to re-submit Subtask 2 if you update the repository file(s).
+We will notify you if there are any issues with the submission.
+''')
+    result = client.predict(
+        file=handle_file(file_path_to_upload),
+        api_name="/add_submission"
+    )
+    print({
+        "User": hf_username,
+        "Task": "task2",
+        "Submitted_time": str(datetime.fromtimestamp(int(current_timestamp), tz=timezone.utc)) + " UTC"
+    })
+    print(result)
+    print("Visit this URL to see the entry: 👇")
+    Client("SimulaMet/Medico-2026")
+# Optional challenge-evaluate hook intentionally omitted for Subtask 2 (no public scores).