omrisap
/

Qwen2.5-Math-1.5B-TreeRPO

Text Generation

reinforcement-learning

text-generation-inference

Model card Files Files and versions

omrisap commited on Jul 20, 2025

Commit

4901dec

·

verified ·

1 Parent(s): 7a234fc

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -51,7 +51,7 @@ Research on hierarchical RL for reasoning; math tutoring prototypes with human o
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-model_name = "your-namespace/TreeRPO-Qwen2.5-Math-1.5B"
 tok = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+model_name = "omrisap/TreeRPO-Qwen2.5-Math-1.5B"
 tok = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")