rl4phyx-backup / root_scripts /read_reward_full.py
YUNTA88's picture
Upload root_scripts/read_reward_full.py with huggingface_hub
12cc0d3 verified
import verl, os
verl_dir = os.path.dirname(verl.__file__)
# 1. Read get_custom_reward_fn full code
reward_file = os.path.join(verl_dir, "trainer", "ppo", "reward.py")
with open(reward_file) as f:
content = f.read()
# Find get_custom_reward_fn
idx = content.find("def get_custom_reward_fn")
print("=== get_custom_reward_fn ===")
# Print from here to next function or end
end = content.find("\ndef ", idx + 10)
if end == -1:
end = len(content)
print(content[idx:end][:1500])
# 2. Read default_compute_score
init = os.path.join(verl_dir, "utils", "reward_score", "__init__.py")
with open(init) as f:
print("\n=== reward_score/__init__.py ===")
print(f.read()[:1500])
# 3. Read geo3k.py (reference for physics tasks)
geo = os.path.join(verl_dir, "utils", "reward_score", "geo3k.py")
with open(geo) as f:
print("\n=== geo3k.py ===")
print(f.read()[:2000])
# 4. Check Hydra config for reward
config_dir = os.path.join(verl_dir, "trainer", "config")
if os.path.exists(config_dir):
print("\n=== config dir ===")
for f in os.listdir(config_dir):
print(f" {f}")
ppo = os.path.join(config_dir, "ppo_trainer.yaml")
if os.path.exists(ppo):
with open(ppo) as fh:
content = fh.read()
# Show reward-related parts
for line in content.split("\n"):
if any(k in line.lower() for k in ["reward", "compute_score", "custom"]):
print(f" yaml: {line.rstrip()[:100]}")