jazeelmohd commited on
Commit
f0cf9fd
·
verified ·
1 Parent(s): d198250

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ MediX-R1-8B-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
37
+ MediX-R1-8B-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
38
+ MediX-R1-8B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
39
+ MediX-R1-8B-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
40
+ MediX-R1-8B-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
41
+ MediX-R1-8B-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
42
+ MediX-R1-8B-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
43
+ MediX-R1-8B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
44
+ MediX-R1-8B-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
45
+ MediX-R1-8B-Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
46
+ MediX-R1-8B-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
47
+ MediX-R1-8B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
48
+ MediX-R1-8B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
49
+ MediX-R1-8B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
50
+ MediX-R1-8B-IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text
51
+ MediX-R1-8B-IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text
52
+ MediX-R1-8B-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
53
+ MediX-R1-8B-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
54
+ mmproj-MediX-R1-8b-F16.gguf filter=lfs diff=lfs merge=lfs -text
55
+ assets/medix-r1_arch.png filter=lfs diff=lfs merge=lfs -text
56
+ assets/reward_design_graph.png filter=lfs diff=lfs merge=lfs -text
57
+ assets/microscopy_qualitative.png filter=lfs diff=lfs merge=lfs -text
58
+ assets/xray_qualitative.png filter=lfs diff=lfs merge=lfs -text
59
+ MediX-R1-8B-F16.gguf filter=lfs diff=lfs merge=lfs -text
MediX-R1-8B-F16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71bfba6d155e3f7a9eb90ea04900389b6b274da464c59143b46769bcaa2cb6c5
3
+ size 16388045088
MediX-R1-8B-IQ3_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7157ae1d306b9a18d9bc24279d85645c9c671668176efeb6b4dfe1f8080a9113
3
+ size 3896621088
MediX-R1-8B-IQ3_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756eae053820440015fcbae6ca01c21d3edd95e8f90db02d41177918f00dc2c8
3
+ size 3789666336
MediX-R1-8B-IQ4_NL.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a13c9fa58b68b7999a1f042a872878505337f724e5bfb64abb23aee21ad01569
3
+ size 4818790432
MediX-R1-8B-IQ4_XS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66aff9e4de2b69f13c6afec950c035a10232aaa7759386a81c055768179f2afe
3
+ size 4593297440
MediX-R1-8B-Q2_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e811b7a4b0315e6eeeeb81bb413c204ea143e934058157854e8313fe03e1b2
3
+ size 3281733664
MediX-R1-8B-Q3_K_L.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e1ba52e4aaa6e58a628e1c60e8c69d91a9defbc68c030316b54df75cead6393
3
+ size 4431394848
MediX-R1-8B-Q3_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:256937adf103049202d2d5c86d90c74dfcdbfc307c536db61573677c401aa74c
3
+ size 4124162080
MediX-R1-8B-Q3_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:666c38e6ffdbf29dc0b290696b87d8455487420fc6895cd662e27b19f288b18b
3
+ size 3769612320
MediX-R1-8B-Q4_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab882436f70e8365178e817b1dc6a79426137ae14f0370bc214c240080065670
3
+ size 4774750240
MediX-R1-8B-Q4_1.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc54373889d37ac15b5fa3ffe7bc1681dc0db804cc7a737eead7ec66bbc5720
3
+ size 5247756320
MediX-R1-8B-Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6728943aa60f090e6396159e5064d3290d03c05060e3129b9271f474f0d7e88
3
+ size 5027784736
MediX-R1-8B-Q4_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5df976fe40738153d00173f1998e68369bba25c2c0f2754c48028c722bfe1641
3
+ size 4802013216
MediX-R1-8B-Q5_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d8930e32fdc54b1f42125cd68a8f932c0beae81b3d7942651a049b3544a7d6c
3
+ size 5720762400
MediX-R1-8B-Q5_1.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62b5b20bb7436d64028f17eed91c0d7e072a73aaef988644033f6d38fb0254ce
3
+ size 6193768480
MediX-R1-8B-Q5_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bf6849c38f39fdd341954aab94c2f415dea063df4ab56ac65c2193afa2a6afc
3
+ size 5851113504
MediX-R1-8B-Q5_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c8bf5edfa53c981a99d6374204cea396a14980453a8d2acd11449ee3392a9a
3
+ size 5720762400
MediX-R1-8B-Q6_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4096c7bfa2ce29876be5765e4552a4de16d87afaa93ff41f9a7e5103f1b4241
3
+ size 6725900320
MediX-R1-8B-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2245342474d96a124e07d6590492a3f946712745331a9174bcde526d2c094485
3
+ size 8709519392
README.md ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-sa-4.0
3
+ language:
4
+ - en
5
+ tags:
6
+ - medical
7
+ - reinforcement-learning
8
+ - multimodal
9
+ - vision-language
10
+ - qwen3-vl
11
+ pipeline_tag: image-text-to-text
12
+ library_name: transformers
13
+ ---
14
+ # MediX-R1: Open-Ended Medical Reinforcement Learning
15
+
16
+ <p align="center">
17
+ <img src="assets/logo_white_no_bg.png" alt="MediX-R1" width="200">
18
+ </p>
19
+
20
+ <p align="center">
21
+ <img src="https://i.imgur.com/waxVImv.png" alt="MediX-R1">
22
+ </p>
23
+
24
+ #### [Sahal Shaji Mullappilly](https://scholar.google.com/citations?user=LJWxVpUAAAAJ&hl=en)\*, [Mohammed Irfan K](https://scholar.google.com/citations?user=GJp0keYAAAAJ&hl=en)\*, [Omair Mohamed](https://scholar.google.com), [Mohamed Zidan](https://scholar.google.com), [Fahad Khan](https://sites.google.com/view/fahadkhans/home), [Salman Khan](https://salman-h-khan.github.io/), [Rao Muhammad Anwer](https://scholar.google.com/citations?hl=en&authuser=1&user=_KlvMVoAAAAJ), and [Hisham Cholakkal](https://scholar.google.com/citations?hl=en&user=bZ3YBRcAAAAJ)
25
+
26
+ \**Equally contributing first authors*
27
+
28
+ #### **Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI), UAE**
29
+
30
+ [![Website](https://img.shields.io/badge/Project-Website-87CEEB)](https://medix.cvmbzuai.com)
31
+ [![Paper](https://img.shields.io/badge/arXiv-Paper-red.svg)](https://arxiv.org/pdf/2602.23363)
32
+ [![HuggingFace](https://img.shields.io/badge/HuggingFace-Page-F9D371)](https://huggingface.co/collections/MBZUAI/medix-r1)
33
+ [![Leaderboard](https://img.shields.io/badge/MediX-Leaderboard-green)](https://medix.cvmbzuai.com/leaderboard)
34
+
35
+ ---
36
+
37
+ ## Overview
38
+
39
+ MediX-R1 is an open-ended Reinforcement Learning (RL) framework for medical multimodal large language models (MLLMs) that enables clinically grounded, free-form answers beyond multiple-choice formats. MediX-R1 fine-tunes vision-language backbones with Group-Based RL and a composite reward tailored for medical reasoning: an LLM-based accuracy reward, a medical embedding-based semantic reward, and lightweight format and modality rewards that enforce interpretable reasoning.
40
+
41
+ Despite using only ~50K instruction examples, MediX-R1 achieves excellent results across standard medical LLM and VLM benchmarks, outperforming strong open-source baselines.
42
+
43
+ **Highlights:**
44
+ - Our **8B** model achieves an overall average of **68.8%**, outperforming the much larger 27B MedGemma (68.4%).
45
+ - Our **30B** model achieves the best overall score of **73.6%**, demonstrating the effectiveness of our composite reward design.
46
+
47
+ ---
48
+
49
+ ## Contributions
50
+
51
+ - We introduce an **open-ended RL framework** for medical MLLMs that produces clinically grounded, free-form answers beyond MCQ formats.
52
+ - We design a **composite reward** combining LLM-based accuracy, embedding-based semantic similarity, format adherence, and modality recognition, providing stable and informative feedback where traditional verifiable or MCQ-only rewards fall short.
53
+ - We propose a **unified evaluation framework** for both text-only and image+text tasks using a Reference-based LLM-as-judge, capturing semantic correctness, reasoning, and contextual alignment.
54
+ - Despite using only **~50K** instruction examples, MediX-R1 achieves state-of-the-art results across diverse medical LLM and VLM benchmarks, with particularly large gains on open-ended clinical tasks.
55
+
56
+ ---
57
+
58
+ ## Architecture
59
+
60
+ <p align="center">
61
+ <img src="assets/medix-r1_arch.png" alt="MediX-R1 Architecture" width="100%">
62
+ </p>
63
+
64
+ ---
65
+
66
+ ## Composite Reward Design
67
+
68
+ MediX-R1 uses a multi-signal reward combining LLM-based accuracy, embedding-based semantic similarity, format adherence, and modality recognition. This stabilizes training and prevents reward hacking compared to single-signal approaches.
69
+
70
+ <p align="center">
71
+ <img src="assets/reward_design_graph.png" alt="Reward Design" width="60%">
72
+ </p>
73
+
74
+ ---
75
+
76
+ ## Qualitative Examples
77
+
78
+ <p align="center">
79
+ <img src="assets/microscopy_qualitative.png" alt="Microscopy Example" width="85%">
80
+ <img src="assets/xray_qualitative.png" alt="X-ray Example" width="85%">
81
+ </p>
82
+
83
+ ---
84
+
85
+ ## Training
86
+
87
+ We provide training configs for all model sizes using GRPO and DAPO algorithms. The training pipeline uses a vLLM-based reward server for LLM-as-judge scoring during RL training.
88
+
89
+ ```bash
90
+ cd training
91
+ pip install -e .
92
+ bash vllm_serve.sh # Step 1: Start the reward server
93
+ bash run_train.sh # Step 2: Launch RL training
94
+ bash merge_model.sh # Step 3: Merge FSDP checkpoints
95
+ ```
96
+
97
+ Training data: [MBZUAI/medix-rl-data](https://huggingface.co/datasets/MBZUAI/medix-rl-data) (~51K train, ~2.5K test samples)
98
+
99
+ See [`training/README.md`](training/README.md) for detailed setup, configuration options, and per-model scripts.
100
+
101
+ ## Evaluation
102
+
103
+ We propose a unified evaluation framework for both text-only (LLM) and image+text (VLM) tasks using a Reference-based LLM-as-judge across 17 medical benchmarks.
104
+
105
+ ```bash
106
+ cd eval
107
+ pip install uv && uv pip install -r requirements.txt
108
+ bash eval.sh # Run all phases: generate, evaluate, score
109
+ ```
110
+
111
+ Supports self-hosted judge models via vLLM or [OpenRouter](https://openrouter.ai/) as a remote alternative. Results can be submitted to the [MediX Leaderboard](https://medix.cvmbzuai.com/leaderboard).
112
+
113
+ See [`eval/README.md`](eval/README.md) for task selection, CLI reference, and MMMU-Medical evaluation.
114
+
115
+ ---
116
+
117
+ ## Model Zoo
118
+
119
+ | Model | HuggingFace |
120
+ |-------|-------------|
121
+ | MediX-R1-2B | [MBZUAI/MediX-R1-2B](https://huggingface.co/MBZUAI/MediX-R1-2B) |
122
+ | MediX-R1-8B | [MBZUAI/MediX-R1-8B](https://huggingface.co/MBZUAI/MediX-R1-8B) |
123
+ | MediX-R1-30B | [MBZUAI/MediX-R1-30B](https://huggingface.co/MBZUAI/MediX-R1-30B) |
124
+
125
+ ---
126
+
127
+ ## Citation
128
+
129
+ If you use MediX-R1 in your research, please cite our work as follows:
130
+
131
+ ```bibtex
132
+ @misc{mullappilly2026medixr1openendedmedical,
133
+ title={MediX-R1: Open Ended Medical Reinforcement Learning},
134
+ author={Sahal Shaji Mullappilly and Mohammed Irfan Kurpath and Omair Mohamed and Mohamed Zidan and Fahad Khan and Salman Khan and Rao Anwer and Hisham Cholakkal},
135
+ year={2026},
136
+ eprint={2602.23363},
137
+ archivePrefix={arXiv},
138
+ primaryClass={cs.CV},
139
+ url={https://arxiv.org/abs/2602.23363},
140
+ }
141
+ ```
142
+
143
+ ---
144
+
145
+ ## License
146
+
147
+ This project is released for **research purposes only** under [*CC-BY-NC-SA 4.0*](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode.en) License. It is not intended for clinical or commercial use.
148
+
149
+ Users are urged to employ MediX-R1 responsibly, especially when applying its outputs in real-world medical scenarios. It is imperative to verify the model's advice with qualified healthcare professionals and not rely on it for medical diagnoses or treatment decisions.
150
+
151
+ ---
152
+
153
+ ## Acknowledgements
154
+
155
+ We are thankful to [EasyR1](https://github.com/hiyouga/EasyR1) (a fork of [veRL](https://github.com/volcengine/verl)) for their open-source RL training framework.
156
+
157
+ This work was partially supported with *NVIDIA Academic Grant 2025* and *MBZUAI-IITD* Research Collaboration Seed Grant.
158
+
159
+ We are grateful to [MBZUAI](https://mbzuai.ac.ae/) for compute and support.
assets/logo_black_no_bg.png ADDED
assets/logo_white_no_bg.png ADDED
assets/medix-r1_arch.png ADDED

Git LFS Details

  • SHA256: c0f3d730d9ba0edfe5d28ece7aaf9660b4645d8bf7d616aced497ba9e3afc2d7
  • Pointer size: 131 Bytes
  • Size of remote file: 407 kB
assets/microscopy_qualitative.png ADDED

Git LFS Details

  • SHA256: 76debe9209a726d3ea4d2ae7c14e478474d3fea2e568ec03724f0935f064b2ff
  • Pointer size: 132 Bytes
  • Size of remote file: 2.34 MB
assets/reward_design_graph.png ADDED

Git LFS Details

  • SHA256: 90b55ae964d8a3020b77c0816841046a0753560d72cfa1a626f30484cdb056d7
  • Pointer size: 131 Bytes
  • Size of remote file: 104 kB
assets/xray_qualitative.png ADDED

Git LFS Details

  • SHA256: 83ffba4616b216208a5ff06d64a62a4a57c8acfda55f1e3337416bc90654d1df
  • Pointer size: 131 Bytes
  • Size of remote file: 692 kB
mmproj-MediX-R1-8b-F16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d2b9604c5c483e7785003f630f99059f15ecd3db2592656296ed6e6f0bd1146
3
+ size 1159030080