| Here we show a code snippet to show you how to use the model with transformers for inference. | |
| ```python | |
| from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor | |
| from qwen_vl_utils import process_vision_info | |
| instruct_prompt = r"You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}." | |
| model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
| "russwang/ThinkLite-VL-7B", torch_dtype="auto", device_map="auto" | |
| ) | |
| processor = AutoProcessor.from_pretrained("russwang/ThinkLite-VL-7B") | |
| greedy_generation_config = GenerationConfig( | |
| do_sample=False, | |
| max_new_tokens=2048 | |
| ) | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "image", | |
| "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg", | |
| }, | |
| {"type": "text", "text": "Describe this image." + instruct_prompt}, | |
| ], | |
| } | |
| ] | |
| text = processor.apply_chat_template( | |
| messages, tokenize=False, add_generation_prompt=True | |
| ) | |
| inputs = processor( | |
| text=text, | |
| images=image_inputs, | |
| padding=True, | |
| return_tensors="pt", | |
| ).to("cuda") | |
| output = model.generate( | |
| **inputs, | |
| generation_config=greedy_generation_config, | |
| tokenizer=processor.tokenizer | |
| ) | |
| output_text = processor.decode( | |
| output[0], | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=False | |
| ) | |
| print(output_text) | |
| ``` | |
| If you found this work useful, consider citing our paper as followed: | |
| ``` | |
| @article{wang2025sota, | |
| title={SoTA with Less: MCTS-Guided Sample Selection for Data-Efficient Visual Reasoning Self-Improvement}, | |
| author={Wang, Xiyao and Yang, Zhengyuan and Feng, Chao and Lu, Hongjin and Li, Linjie and Lin, Chung-Ching and Lin, Kevin and Huang, Furong and Wang, Lijuan}, | |
| journal={arXiv preprint arXiv:2504.07934}, | |
| year={2025} | |
| } | |
| ``` |