Upload grpo_training.py with huggingface_hub
Browse files- grpo_training.py +3 -3
grpo_training.py
CHANGED
|
@@ -68,12 +68,12 @@ config = GRPOConfig(
|
|
| 68 |
)
|
| 69 |
|
| 70 |
# Define reward function for GRPO
|
| 71 |
-
def preference_reward_func(
|
| 72 |
"""Simple reward function based on response length preference"""
|
| 73 |
rewards = []
|
| 74 |
-
for
|
| 75 |
# Prefer shorter, more concise responses (addressing verbosity issue)
|
| 76 |
-
response_length = len(
|
| 77 |
# Reward shorter responses (up to a reasonable length)
|
| 78 |
if response_length < 50:
|
| 79 |
reward = 1.0
|
|
|
|
| 68 |
)
|
| 69 |
|
| 70 |
# Define reward function for GRPO
|
| 71 |
+
def preference_reward_func(inputs, prompts, completions, completion_ids_list):
|
| 72 |
"""Simple reward function based on response length preference"""
|
| 73 |
rewards = []
|
| 74 |
+
for completion in completions:
|
| 75 |
# Prefer shorter, more concise responses (addressing verbosity issue)
|
| 76 |
+
response_length = len(completion.split())
|
| 77 |
# Reward shorter responses (up to a reasonable length)
|
| 78 |
if response_length < 50:
|
| 79 |
reward = 1.0
|