|
|
| import verl, os |
| verl_dir = os.path.dirname(verl.__file__) |
|
|
| |
| reward_file = os.path.join(verl_dir, "trainer", "ppo", "reward.py") |
| with open(reward_file) as f: |
| content = f.read() |
| |
| idx = content.find("def get_custom_reward_fn") |
| print("=== get_custom_reward_fn ===") |
| |
| end = content.find("\ndef ", idx + 10) |
| if end == -1: |
| end = len(content) |
| print(content[idx:end][:1500]) |
|
|
| |
| init = os.path.join(verl_dir, "utils", "reward_score", "__init__.py") |
| with open(init) as f: |
| print("\n=== reward_score/__init__.py ===") |
| print(f.read()[:1500]) |
|
|
| |
| geo = os.path.join(verl_dir, "utils", "reward_score", "geo3k.py") |
| with open(geo) as f: |
| print("\n=== geo3k.py ===") |
| print(f.read()[:2000]) |
|
|
| |
| config_dir = os.path.join(verl_dir, "trainer", "config") |
| if os.path.exists(config_dir): |
| print("\n=== config dir ===") |
| for f in os.listdir(config_dir): |
| print(f" {f}") |
| ppo = os.path.join(config_dir, "ppo_trainer.yaml") |
| if os.path.exists(ppo): |
| with open(ppo) as fh: |
| content = fh.read() |
| |
| for line in content.split("\n"): |
| if any(k in line.lower() for k in ["reward", "compute_score", "custom"]): |
| print(f" yaml: {line.rstrip()[:100]}") |
|
|