| export GENERATOR_MODEL="qwen2.5-7b-instruct" | |
| export VERIFIER_MODEL="qwen2.5-3b-instruct" | |
| export REWARD_MODEL="nemotron-4-70b-reward-q4km" | |
| export GENERATED_NUM=5 | |
| export GOLD_TOP=100 | |
| mkdir -p cycle | |
| export GENERATOR_MODEL="qwen2.5-7b-instruct" | |
| export VERIFIER_MODEL="qwen2.5-3b-instruct" | |
| export REWARD_MODEL="nemotron-4-70b-reward-q4km" | |
| export GENERATED_NUM=5 | |
| export GOLD_TOP=100 | |
| mkdir -p cycle | |