File size: 1,462 Bytes
12cc0d3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import verl, os
verl_dir = os.path.dirname(verl.__file__)
# 1. Read get_custom_reward_fn full code
reward_file = os.path.join(verl_dir, "trainer", "ppo", "reward.py")
with open(reward_file) as f:
content = f.read()
# Find get_custom_reward_fn
idx = content.find("def get_custom_reward_fn")
print("=== get_custom_reward_fn ===")
# Print from here to next function or end
end = content.find("\ndef ", idx + 10)
if end == -1:
end = len(content)
print(content[idx:end][:1500])
# 2. Read default_compute_score
init = os.path.join(verl_dir, "utils", "reward_score", "__init__.py")
with open(init) as f:
print("\n=== reward_score/__init__.py ===")
print(f.read()[:1500])
# 3. Read geo3k.py (reference for physics tasks)
geo = os.path.join(verl_dir, "utils", "reward_score", "geo3k.py")
with open(geo) as f:
print("\n=== geo3k.py ===")
print(f.read()[:2000])
# 4. Check Hydra config for reward
config_dir = os.path.join(verl_dir, "trainer", "config")
if os.path.exists(config_dir):
print("\n=== config dir ===")
for f in os.listdir(config_dir):
print(f" {f}")
ppo = os.path.join(config_dir, "ppo_trainer.yaml")
if os.path.exists(ppo):
with open(ppo) as fh:
content = fh.read()
# Show reward-related parts
for line in content.split("\n"):
if any(k in line.lower() for k in ["reward", "compute_score", "custom"]):
print(f" yaml: {line.rstrip()[:100]}")
|