File size: 1,462 Bytes
12cc0d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

import verl, os
verl_dir = os.path.dirname(verl.__file__)

# 1. Read get_custom_reward_fn full code
reward_file = os.path.join(verl_dir, "trainer", "ppo", "reward.py")
with open(reward_file) as f:
    content = f.read()
# Find get_custom_reward_fn
idx = content.find("def get_custom_reward_fn")
print("=== get_custom_reward_fn ===")
# Print from here to next function or end
end = content.find("\ndef ", idx + 10)
if end == -1:
    end = len(content)
print(content[idx:end][:1500])

# 2. Read default_compute_score
init = os.path.join(verl_dir, "utils", "reward_score", "__init__.py")
with open(init) as f:
    print("\n=== reward_score/__init__.py ===")
    print(f.read()[:1500])

# 3. Read geo3k.py (reference for physics tasks)
geo = os.path.join(verl_dir, "utils", "reward_score", "geo3k.py")
with open(geo) as f:
    print("\n=== geo3k.py ===")
    print(f.read()[:2000])

# 4. Check Hydra config for reward
config_dir = os.path.join(verl_dir, "trainer", "config")
if os.path.exists(config_dir):
    print("\n=== config dir ===")
    for f in os.listdir(config_dir):
        print(f"  {f}")
    ppo = os.path.join(config_dir, "ppo_trainer.yaml")
    if os.path.exists(ppo):
        with open(ppo) as fh:
            content = fh.read()
        # Show reward-related parts
        for line in content.split("\n"):
            if any(k in line.lower() for k in ["reward", "compute_score", "custom"]):
                print(f"  yaml: {line.rstrip()[:100]}")