ChiPhan1110
/

ScaleReasoner-R1

@@ -1,14 +1,17 @@
 ---
-license: cc-by-nc-nd-4.0
-language:
-- en
 base_model:
 - Qwen/Qwen2.5-VL-7B-Instruct
 tags:
 - Pathology
 - VLM
 - Reasoning
 ---
 <h1 align="center">[MICCAI 2026] Enhancing Pathological VLMs with Cross-scale Reasoning</h1>
 <p align="center"> Chi Phan*, Tianyi Zhang*, Qiaochu Xue, Yufeng Wu, Dan Hu, Zeyu Liu, Sudong Wang, Yueming Jin </p>
@@ -213,7 +216,8 @@ Download **ScaleReasoner-R1** from [HuggingFace](https://huggingface.co/ChiPhan1
 SYSTEM_PROMPT = (
     "You are a pathology expert. Read the question and options about the image carefully. "
     "Think step by step inside <think> </think>. Then output ONLY the SINGLE best option letter "
-    "inside <answer> </answer>.\n"
     "Example: <think>Your reasoning</think> <answer>A</answer>. "
     "Do not include the option text or any extra words inside <answer> </answer> tags."
 )
@@ -251,7 +255,11 @@ response = client.chat.completions.create(
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image('low_mag.jpg')}"}},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image('mid_mag.jpg')}"}},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image('high_mag.jpg')}"}},
-            {"type": "text", "text": "<question>\n(A) ...\n(B) ...\n(C) ...\n(D) ..."},
         ]
     }],
     max_tokens=4096,
@@ -276,7 +284,11 @@ messages = [{
         {"type": "image", "image": "low_mag.jpg"},
         {"type": "image", "image": "mid_mag.jpg"},
         {"type": "image", "image": "high_mag.jpg"},
-        {"type": "text", "text": "<question>\n(A) ...\n(B) ...\n(C) ...\n(D) ..."},
     ]
 }]
@@ -308,9 +320,8 @@ If you find our work helpful, please consider citing our paper and the framework
 ```bibtex
 @article{phan2026enhancing,
   title={Enhancing Pathological VLMs with Cross-scale Reasoning},
-  author={Phan, Chi and Zhang, Tianyi and Xue, Qiaochu and Wu, Yufeng and Hu, Dan and Liu, Zeyu and Wang, Sudong and Jin, Yueming},
   journal={arXiv preprint arXiv:2606.17412},
   year={2026}
 }
 ```

 ---
 base_model:
 - Qwen/Qwen2.5-VL-7B-Instruct
+language:
+- en
+license: cc-by-nc-nd-4.0
+library_name: transformers
+pipeline_tag: image-text-to-text
 tags:
 - Pathology
 - VLM
 - Reasoning
 ---
 <h1 align="center">[MICCAI 2026] Enhancing Pathological VLMs with Cross-scale Reasoning</h1>
 <p align="center"> Chi Phan*, Tianyi Zhang*, Qiaochu Xue, Yufeng Wu, Dan Hu, Zeyu Liu, Sudong Wang, Yueming Jin </p>
 SYSTEM_PROMPT = (
     "You are a pathology expert. Read the question and options about the image carefully. "
     "Think step by step inside <think> </think>. Then output ONLY the SINGLE best option letter "
+    "inside <answer> </answer>.
+"
     "Example: <think>Your reasoning</think> <answer>A</answer>. "
     "Do not include the option text or any extra words inside <answer> </answer> tags."
 )
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image('low_mag.jpg')}"}},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image('mid_mag.jpg')}"}},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image('high_mag.jpg')}"}},
+            {"type": "text", "text": "<question>
+(A) ...
+(B) ...
+(C) ...
+(D) ..."},
         ]
     }],
     max_tokens=4096,
         {"type": "image", "image": "low_mag.jpg"},
         {"type": "image", "image": "mid_mag.jpg"},
         {"type": "image", "image": "high_mag.jpg"},
+        {"type": "text", "text": "<question>
+(A) ...
+(B) ...
+(C) ...
+(D) ..."},
     ]
 }]
 ```bibtex
 @article{phan2026enhancing,
   title={Enhancing Pathological VLMs with Cross-scale Reasoning},
+  author={Phan, Chi and Zhang, Tianyi Beetroot and Xue, Qiaochu and Wu, Yufeng and Hu, Dan and Liu, Zeyu and Wang, Sudong and Jin, Yueming},
   journal={arXiv preprint arXiv:2606.17412},
   year={2026}
 }
 ```