import verl, os verl_dir = os.path.dirname(verl.__file__) # 1. Read get_custom_reward_fn full code reward_file = os.path.join(verl_dir, "trainer", "ppo", "reward.py") with open(reward_file) as f: content = f.read() # Find get_custom_reward_fn idx = content.find("def get_custom_reward_fn") print("=== get_custom_reward_fn ===") # Print from here to next function or end end = content.find("\ndef ", idx + 10) if end == -1: end = len(content) print(content[idx:end][:1500]) # 2. Read default_compute_score init = os.path.join(verl_dir, "utils", "reward_score", "__init__.py") with open(init) as f: print("\n=== reward_score/__init__.py ===") print(f.read()[:1500]) # 3. Read geo3k.py (reference for physics tasks) geo = os.path.join(verl_dir, "utils", "reward_score", "geo3k.py") with open(geo) as f: print("\n=== geo3k.py ===") print(f.read()[:2000]) # 4. Check Hydra config for reward config_dir = os.path.join(verl_dir, "trainer", "config") if os.path.exists(config_dir): print("\n=== config dir ===") for f in os.listdir(config_dir): print(f" {f}") ppo = os.path.join(config_dir, "ppo_trainer.yaml") if os.path.exists(ppo): with open(ppo) as fh: content = fh.read() # Show reward-related parts for line in content.split("\n"): if any(k in line.lower() for k in ["reward", "compute_score", "custom"]): print(f" yaml: {line.rstrip()[:100]}")