ligaments-dev commited on
Commit
d3ecd31
·
verified ·
1 Parent(s): 2ce6fb9

Upload grpo_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. grpo_training.py +3 -3
grpo_training.py CHANGED
@@ -68,12 +68,12 @@ config = GRPOConfig(
68
  )
69
 
70
  # Define reward function for GRPO
71
- def preference_reward_func(samples):
72
  """Simple reward function based on response length preference"""
73
  rewards = []
74
- for sample in samples:
75
  # Prefer shorter, more concise responses (addressing verbosity issue)
76
- response_length = len(sample["response"].split())
77
  # Reward shorter responses (up to a reasonable length)
78
  if response_length < 50:
79
  reward = 1.0
 
68
  )
69
 
70
  # Define reward function for GRPO
71
+ def preference_reward_func(inputs, prompts, completions, completion_ids_list):
72
  """Simple reward function based on response length preference"""
73
  rewards = []
74
+ for completion in completions:
75
  # Prefer shorter, more concise responses (addressing verbosity issue)
76
+ response_length = len(completion.split())
77
  # Reward shorter responses (up to a reasonable length)
78
  if response_length < 50:
79
  reward = 1.0