ligaments-dev commited on
Commit
a22a2fe
·
verified ·
1 Parent(s): d3ecd31

Upload grpo_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. grpo_training.py +4 -1
grpo_training.py CHANGED
@@ -68,8 +68,11 @@ config = GRPOConfig(
68
  )
69
 
70
  # Define reward function for GRPO
71
- def preference_reward_func(inputs, prompts, completions, completion_ids_list):
72
  """Simple reward function based on response length preference"""
 
 
 
73
  rewards = []
74
  for completion in completions:
75
  # Prefer shorter, more concise responses (addressing verbosity issue)
 
68
  )
69
 
70
  # Define reward function for GRPO
71
+ def preference_reward_func(**kwargs):
72
  """Simple reward function based on response length preference"""
73
+ # Extract completions from kwargs
74
+ completions = kwargs.get('completions', [])
75
+
76
  rewards = []
77
  for completion in completions:
78
  # Prefer shorter, more concise responses (addressing verbosity issue)