Add files using upload-large-folder tool
Browse files- .gitattributes +24 -0
- MediX-R1-8B-F16.gguf +3 -0
- MediX-R1-8B-IQ3_M.gguf +3 -0
- MediX-R1-8B-IQ3_S.gguf +3 -0
- MediX-R1-8B-IQ4_NL.gguf +3 -0
- MediX-R1-8B-IQ4_XS.gguf +3 -0
- MediX-R1-8B-Q2_K.gguf +3 -0
- MediX-R1-8B-Q3_K_L.gguf +3 -0
- MediX-R1-8B-Q3_K_M.gguf +3 -0
- MediX-R1-8B-Q3_K_S.gguf +3 -0
- MediX-R1-8B-Q4_0.gguf +3 -0
- MediX-R1-8B-Q4_1.gguf +3 -0
- MediX-R1-8B-Q4_K_M.gguf +3 -0
- MediX-R1-8B-Q4_K_S.gguf +3 -0
- MediX-R1-8B-Q5_0.gguf +3 -0
- MediX-R1-8B-Q5_1.gguf +3 -0
- MediX-R1-8B-Q5_K_M.gguf +3 -0
- MediX-R1-8B-Q5_K_S.gguf +3 -0
- MediX-R1-8B-Q6_K.gguf +3 -0
- MediX-R1-8B-Q8_0.gguf +3 -0
- README.md +159 -0
- assets/logo_black_no_bg.png +0 -0
- assets/logo_white_no_bg.png +0 -0
- assets/medix-r1_arch.png +3 -0
- assets/microscopy_qualitative.png +3 -0
- assets/reward_design_graph.png +3 -0
- assets/xray_qualitative.png +3 -0
- mmproj-MediX-R1-8b-F16.gguf +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
MediX-R1-8B-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
MediX-R1-8B-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
MediX-R1-8B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
MediX-R1-8B-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
MediX-R1-8B-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
MediX-R1-8B-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
MediX-R1-8B-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
MediX-R1-8B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
MediX-R1-8B-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
MediX-R1-8B-Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
MediX-R1-8B-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
MediX-R1-8B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
MediX-R1-8B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
MediX-R1-8B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
MediX-R1-8B-IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
MediX-R1-8B-IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
MediX-R1-8B-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
MediX-R1-8B-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
mmproj-MediX-R1-8b-F16.gguf filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
assets/medix-r1_arch.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
assets/reward_design_graph.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
assets/microscopy_qualitative.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
assets/xray_qualitative.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
MediX-R1-8B-F16.gguf filter=lfs diff=lfs merge=lfs -text
|
MediX-R1-8B-F16.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71bfba6d155e3f7a9eb90ea04900389b6b274da464c59143b46769bcaa2cb6c5
|
| 3 |
+
size 16388045088
|
MediX-R1-8B-IQ3_M.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7157ae1d306b9a18d9bc24279d85645c9c671668176efeb6b4dfe1f8080a9113
|
| 3 |
+
size 3896621088
|
MediX-R1-8B-IQ3_S.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:756eae053820440015fcbae6ca01c21d3edd95e8f90db02d41177918f00dc2c8
|
| 3 |
+
size 3789666336
|
MediX-R1-8B-IQ4_NL.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a13c9fa58b68b7999a1f042a872878505337f724e5bfb64abb23aee21ad01569
|
| 3 |
+
size 4818790432
|
MediX-R1-8B-IQ4_XS.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66aff9e4de2b69f13c6afec950c035a10232aaa7759386a81c055768179f2afe
|
| 3 |
+
size 4593297440
|
MediX-R1-8B-Q2_K.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0e811b7a4b0315e6eeeeb81bb413c204ea143e934058157854e8313fe03e1b2
|
| 3 |
+
size 3281733664
|
MediX-R1-8B-Q3_K_L.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e1ba52e4aaa6e58a628e1c60e8c69d91a9defbc68c030316b54df75cead6393
|
| 3 |
+
size 4431394848
|
MediX-R1-8B-Q3_K_M.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:256937adf103049202d2d5c86d90c74dfcdbfc307c536db61573677c401aa74c
|
| 3 |
+
size 4124162080
|
MediX-R1-8B-Q3_K_S.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:666c38e6ffdbf29dc0b290696b87d8455487420fc6895cd662e27b19f288b18b
|
| 3 |
+
size 3769612320
|
MediX-R1-8B-Q4_0.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab882436f70e8365178e817b1dc6a79426137ae14f0370bc214c240080065670
|
| 3 |
+
size 4774750240
|
MediX-R1-8B-Q4_1.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc54373889d37ac15b5fa3ffe7bc1681dc0db804cc7a737eead7ec66bbc5720
|
| 3 |
+
size 5247756320
|
MediX-R1-8B-Q4_K_M.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6728943aa60f090e6396159e5064d3290d03c05060e3129b9271f474f0d7e88
|
| 3 |
+
size 5027784736
|
MediX-R1-8B-Q4_K_S.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5df976fe40738153d00173f1998e68369bba25c2c0f2754c48028c722bfe1641
|
| 3 |
+
size 4802013216
|
MediX-R1-8B-Q5_0.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d8930e32fdc54b1f42125cd68a8f932c0beae81b3d7942651a049b3544a7d6c
|
| 3 |
+
size 5720762400
|
MediX-R1-8B-Q5_1.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62b5b20bb7436d64028f17eed91c0d7e072a73aaef988644033f6d38fb0254ce
|
| 3 |
+
size 6193768480
|
MediX-R1-8B-Q5_K_M.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bf6849c38f39fdd341954aab94c2f415dea063df4ab56ac65c2193afa2a6afc
|
| 3 |
+
size 5851113504
|
MediX-R1-8B-Q5_K_S.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8c8bf5edfa53c981a99d6374204cea396a14980453a8d2acd11449ee3392a9a
|
| 3 |
+
size 5720762400
|
MediX-R1-8B-Q6_K.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4096c7bfa2ce29876be5765e4552a4de16d87afaa93ff41f9a7e5103f1b4241
|
| 3 |
+
size 6725900320
|
MediX-R1-8B-Q8_0.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2245342474d96a124e07d6590492a3f946712745331a9174bcde526d2c094485
|
| 3 |
+
size 8709519392
|
README.md
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cc-by-nc-sa-4.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
tags:
|
| 6 |
+
- medical
|
| 7 |
+
- reinforcement-learning
|
| 8 |
+
- multimodal
|
| 9 |
+
- vision-language
|
| 10 |
+
- qwen3-vl
|
| 11 |
+
pipeline_tag: image-text-to-text
|
| 12 |
+
library_name: transformers
|
| 13 |
+
---
|
| 14 |
+
# MediX-R1: Open-Ended Medical Reinforcement Learning
|
| 15 |
+
|
| 16 |
+
<p align="center">
|
| 17 |
+
<img src="assets/logo_white_no_bg.png" alt="MediX-R1" width="200">
|
| 18 |
+
</p>
|
| 19 |
+
|
| 20 |
+
<p align="center">
|
| 21 |
+
<img src="https://i.imgur.com/waxVImv.png" alt="MediX-R1">
|
| 22 |
+
</p>
|
| 23 |
+
|
| 24 |
+
#### [Sahal Shaji Mullappilly](https://scholar.google.com/citations?user=LJWxVpUAAAAJ&hl=en)\*, [Mohammed Irfan K](https://scholar.google.com/citations?user=GJp0keYAAAAJ&hl=en)\*, [Omair Mohamed](https://scholar.google.com), [Mohamed Zidan](https://scholar.google.com), [Fahad Khan](https://sites.google.com/view/fahadkhans/home), [Salman Khan](https://salman-h-khan.github.io/), [Rao Muhammad Anwer](https://scholar.google.com/citations?hl=en&authuser=1&user=_KlvMVoAAAAJ), and [Hisham Cholakkal](https://scholar.google.com/citations?hl=en&user=bZ3YBRcAAAAJ)
|
| 25 |
+
|
| 26 |
+
\**Equally contributing first authors*
|
| 27 |
+
|
| 28 |
+
#### **Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI), UAE**
|
| 29 |
+
|
| 30 |
+
[](https://medix.cvmbzuai.com)
|
| 31 |
+
[](https://arxiv.org/pdf/2602.23363)
|
| 32 |
+
[](https://huggingface.co/collections/MBZUAI/medix-r1)
|
| 33 |
+
[](https://medix.cvmbzuai.com/leaderboard)
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## Overview
|
| 38 |
+
|
| 39 |
+
MediX-R1 is an open-ended Reinforcement Learning (RL) framework for medical multimodal large language models (MLLMs) that enables clinically grounded, free-form answers beyond multiple-choice formats. MediX-R1 fine-tunes vision-language backbones with Group-Based RL and a composite reward tailored for medical reasoning: an LLM-based accuracy reward, a medical embedding-based semantic reward, and lightweight format and modality rewards that enforce interpretable reasoning.
|
| 40 |
+
|
| 41 |
+
Despite using only ~50K instruction examples, MediX-R1 achieves excellent results across standard medical LLM and VLM benchmarks, outperforming strong open-source baselines.
|
| 42 |
+
|
| 43 |
+
**Highlights:**
|
| 44 |
+
- Our **8B** model achieves an overall average of **68.8%**, outperforming the much larger 27B MedGemma (68.4%).
|
| 45 |
+
- Our **30B** model achieves the best overall score of **73.6%**, demonstrating the effectiveness of our composite reward design.
|
| 46 |
+
|
| 47 |
+
---
|
| 48 |
+
|
| 49 |
+
## Contributions
|
| 50 |
+
|
| 51 |
+
- We introduce an **open-ended RL framework** for medical MLLMs that produces clinically grounded, free-form answers beyond MCQ formats.
|
| 52 |
+
- We design a **composite reward** combining LLM-based accuracy, embedding-based semantic similarity, format adherence, and modality recognition, providing stable and informative feedback where traditional verifiable or MCQ-only rewards fall short.
|
| 53 |
+
- We propose a **unified evaluation framework** for both text-only and image+text tasks using a Reference-based LLM-as-judge, capturing semantic correctness, reasoning, and contextual alignment.
|
| 54 |
+
- Despite using only **~50K** instruction examples, MediX-R1 achieves state-of-the-art results across diverse medical LLM and VLM benchmarks, with particularly large gains on open-ended clinical tasks.
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
## Architecture
|
| 59 |
+
|
| 60 |
+
<p align="center">
|
| 61 |
+
<img src="assets/medix-r1_arch.png" alt="MediX-R1 Architecture" width="100%">
|
| 62 |
+
</p>
|
| 63 |
+
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
## Composite Reward Design
|
| 67 |
+
|
| 68 |
+
MediX-R1 uses a multi-signal reward combining LLM-based accuracy, embedding-based semantic similarity, format adherence, and modality recognition. This stabilizes training and prevents reward hacking compared to single-signal approaches.
|
| 69 |
+
|
| 70 |
+
<p align="center">
|
| 71 |
+
<img src="assets/reward_design_graph.png" alt="Reward Design" width="60%">
|
| 72 |
+
</p>
|
| 73 |
+
|
| 74 |
+
---
|
| 75 |
+
|
| 76 |
+
## Qualitative Examples
|
| 77 |
+
|
| 78 |
+
<p align="center">
|
| 79 |
+
<img src="assets/microscopy_qualitative.png" alt="Microscopy Example" width="85%">
|
| 80 |
+
<img src="assets/xray_qualitative.png" alt="X-ray Example" width="85%">
|
| 81 |
+
</p>
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
## Training
|
| 86 |
+
|
| 87 |
+
We provide training configs for all model sizes using GRPO and DAPO algorithms. The training pipeline uses a vLLM-based reward server for LLM-as-judge scoring during RL training.
|
| 88 |
+
|
| 89 |
+
```bash
|
| 90 |
+
cd training
|
| 91 |
+
pip install -e .
|
| 92 |
+
bash vllm_serve.sh # Step 1: Start the reward server
|
| 93 |
+
bash run_train.sh # Step 2: Launch RL training
|
| 94 |
+
bash merge_model.sh # Step 3: Merge FSDP checkpoints
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
Training data: [MBZUAI/medix-rl-data](https://huggingface.co/datasets/MBZUAI/medix-rl-data) (~51K train, ~2.5K test samples)
|
| 98 |
+
|
| 99 |
+
See [`training/README.md`](training/README.md) for detailed setup, configuration options, and per-model scripts.
|
| 100 |
+
|
| 101 |
+
## Evaluation
|
| 102 |
+
|
| 103 |
+
We propose a unified evaluation framework for both text-only (LLM) and image+text (VLM) tasks using a Reference-based LLM-as-judge across 17 medical benchmarks.
|
| 104 |
+
|
| 105 |
+
```bash
|
| 106 |
+
cd eval
|
| 107 |
+
pip install uv && uv pip install -r requirements.txt
|
| 108 |
+
bash eval.sh # Run all phases: generate, evaluate, score
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
Supports self-hosted judge models via vLLM or [OpenRouter](https://openrouter.ai/) as a remote alternative. Results can be submitted to the [MediX Leaderboard](https://medix.cvmbzuai.com/leaderboard).
|
| 112 |
+
|
| 113 |
+
See [`eval/README.md`](eval/README.md) for task selection, CLI reference, and MMMU-Medical evaluation.
|
| 114 |
+
|
| 115 |
+
---
|
| 116 |
+
|
| 117 |
+
## Model Zoo
|
| 118 |
+
|
| 119 |
+
| Model | HuggingFace |
|
| 120 |
+
|-------|-------------|
|
| 121 |
+
| MediX-R1-2B | [MBZUAI/MediX-R1-2B](https://huggingface.co/MBZUAI/MediX-R1-2B) |
|
| 122 |
+
| MediX-R1-8B | [MBZUAI/MediX-R1-8B](https://huggingface.co/MBZUAI/MediX-R1-8B) |
|
| 123 |
+
| MediX-R1-30B | [MBZUAI/MediX-R1-30B](https://huggingface.co/MBZUAI/MediX-R1-30B) |
|
| 124 |
+
|
| 125 |
+
---
|
| 126 |
+
|
| 127 |
+
## Citation
|
| 128 |
+
|
| 129 |
+
If you use MediX-R1 in your research, please cite our work as follows:
|
| 130 |
+
|
| 131 |
+
```bibtex
|
| 132 |
+
@misc{mullappilly2026medixr1openendedmedical,
|
| 133 |
+
title={MediX-R1: Open Ended Medical Reinforcement Learning},
|
| 134 |
+
author={Sahal Shaji Mullappilly and Mohammed Irfan Kurpath and Omair Mohamed and Mohamed Zidan and Fahad Khan and Salman Khan and Rao Anwer and Hisham Cholakkal},
|
| 135 |
+
year={2026},
|
| 136 |
+
eprint={2602.23363},
|
| 137 |
+
archivePrefix={arXiv},
|
| 138 |
+
primaryClass={cs.CV},
|
| 139 |
+
url={https://arxiv.org/abs/2602.23363},
|
| 140 |
+
}
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
---
|
| 144 |
+
|
| 145 |
+
## License
|
| 146 |
+
|
| 147 |
+
This project is released for **research purposes only** under [*CC-BY-NC-SA 4.0*](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode.en) License. It is not intended for clinical or commercial use.
|
| 148 |
+
|
| 149 |
+
Users are urged to employ MediX-R1 responsibly, especially when applying its outputs in real-world medical scenarios. It is imperative to verify the model's advice with qualified healthcare professionals and not rely on it for medical diagnoses or treatment decisions.
|
| 150 |
+
|
| 151 |
+
---
|
| 152 |
+
|
| 153 |
+
## Acknowledgements
|
| 154 |
+
|
| 155 |
+
We are thankful to [EasyR1](https://github.com/hiyouga/EasyR1) (a fork of [veRL](https://github.com/volcengine/verl)) for their open-source RL training framework.
|
| 156 |
+
|
| 157 |
+
This work was partially supported with *NVIDIA Academic Grant 2025* and *MBZUAI-IITD* Research Collaboration Seed Grant.
|
| 158 |
+
|
| 159 |
+
We are grateful to [MBZUAI](https://mbzuai.ac.ae/) for compute and support.
|
assets/logo_black_no_bg.png
ADDED
|
assets/logo_white_no_bg.png
ADDED
|
assets/medix-r1_arch.png
ADDED
|
Git LFS Details
|
assets/microscopy_qualitative.png
ADDED
|
Git LFS Details
|
assets/reward_design_graph.png
ADDED
|
Git LFS Details
|
assets/xray_qualitative.png
ADDED
|
Git LFS Details
|
mmproj-MediX-R1-8b-F16.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d2b9604c5c483e7785003f630f99059f15ecd3db2592656296ed6e6f0bd1146
|
| 3 |
+
size 1159030080
|