Update README.md
Browse files
README.md
CHANGED
|
@@ -57,6 +57,7 @@ problem_field = "question"
|
|
| 57 |
solution_field = "answer"
|
| 58 |
dataloader_num_workers = 2
|
| 59 |
test_size = 0.1
|
|
|
|
| 60 |
|
| 61 |
[run]
|
| 62 |
run_name = "rl-zariman-7"
|
|
@@ -82,6 +83,10 @@ use_peft = true
|
|
| 82 |
# vllm_gpu_memory_utilization = 0.25
|
| 83 |
num_generations = 4
|
| 84 |
max_completion_length = 1024
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
[lora]
|
| 87 |
lora_target_modules = [
|
|
|
|
| 57 |
solution_field = "answer"
|
| 58 |
dataloader_num_workers = 2
|
| 59 |
test_size = 0.1
|
| 60 |
+
extract_hash = true
|
| 61 |
|
| 62 |
[run]
|
| 63 |
run_name = "rl-zariman-7"
|
|
|
|
| 83 |
# vllm_gpu_memory_utilization = 0.25
|
| 84 |
num_generations = 4
|
| 85 |
max_completion_length = 1024
|
| 86 |
+
num_iterations = 4 # https://github.com/huggingface/trl/releases/tag/v0.16.0
|
| 87 |
+
scale_rewards = false
|
| 88 |
+
beta = 0.0 # do not load reference model, do not minimize KL-div. Great memory saving opportunity.
|
| 89 |
+
epsilon_high = 0.28 # Increasing upper bound epsilon leads to higher entropy during generation, promoting better exploration
|
| 90 |
|
| 91 |
[lora]
|
| 92 |
lora_target_modules = [
|