Minimal-RL
Collection
2 items • Updated • 1
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("RLHFlow/Qwen2.5-Math-7B-Zero-RAFTpp")
model = AutoModelForCausalLM.from_pretrained("RLHFlow/Qwen2.5-Math-7B-Zero-RAFTpp")
messages = [
{"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))Qwen-Math-7B-base.
If you found useful, please consider cite,
@inproceedings{Xiong2025AMA,
title={A Minimalist Approach to LLM Reasoning: from Rejection Sampling to Reinforce},
author={Wei Xiong and Jiarui Yao and Yuhui Xu and Bo Pang and Lei Wang and Doyen Sahoo and Junnan Li and Nan Jiang and Tong Zhang and Caiming Xiong and Hanze Dong},
journal={arXiv preprint arXiv:2504.11343},
year={2025},
}
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="RLHFlow/Qwen2.5-Math-7B-Zero-RAFTpp") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)