Upload grpo_training.py with huggingface_hub
Browse files- grpo_training.py +4 -1
grpo_training.py
CHANGED
|
@@ -68,8 +68,11 @@ config = GRPOConfig(
|
|
| 68 |
)
|
| 69 |
|
| 70 |
# Define reward function for GRPO
|
| 71 |
-
def preference_reward_func(
|
| 72 |
"""Simple reward function based on response length preference"""
|
|
|
|
|
|
|
|
|
|
| 73 |
rewards = []
|
| 74 |
for completion in completions:
|
| 75 |
# Prefer shorter, more concise responses (addressing verbosity issue)
|
|
|
|
| 68 |
)
|
| 69 |
|
| 70 |
# Define reward function for GRPO
|
| 71 |
+
def preference_reward_func(**kwargs):
|
| 72 |
"""Simple reward function based on response length preference"""
|
| 73 |
+
# Extract completions from kwargs
|
| 74 |
+
completions = kwargs.get('completions', [])
|
| 75 |
+
|
| 76 |
rewards = []
|
| 77 |
for completion in completions:
|
| 78 |
# Prefer shorter, more concise responses (addressing verbosity issue)
|