Upload root_scripts/read_reward_full.py with huggingface_hub

12cc0d3 verified about 2 months ago

1.46 kB


	import verl, os
	verl_dir = os.path.dirname(verl.__file__)

	# 1. Read get_custom_reward_fn full code
	reward_file = os.path.join(verl_dir, "trainer", "ppo", "reward.py")
	with open(reward_file) as f:
	content = f.read()
	# Find get_custom_reward_fn
	idx = content.find("def get_custom_reward_fn")
	print("=== get_custom_reward_fn ===")
	# Print from here to next function or end
	end = content.find("\ndef ", idx + 10)
	if end == -1:
	end = len(content)
	print(content[idx:end][:1500])

	# 2. Read default_compute_score
	init = os.path.join(verl_dir, "utils", "reward_score", "__init__.py")
	with open(init) as f:
	print("\n=== reward_score/__init__.py ===")
	print(f.read()[:1500])

	# 3. Read geo3k.py (reference for physics tasks)
	geo = os.path.join(verl_dir, "utils", "reward_score", "geo3k.py")
	with open(geo) as f:
	print("\n=== geo3k.py ===")
	print(f.read()[:2000])

	# 4. Check Hydra config for reward
	config_dir = os.path.join(verl_dir, "trainer", "config")
	if os.path.exists(config_dir):
	print("\n=== config dir ===")
	for f in os.listdir(config_dir):
	print(f" {f}")
	ppo = os.path.join(config_dir, "ppo_trainer.yaml")
	if os.path.exists(ppo):
	with open(ppo) as fh:
	content = fh.read()
	# Show reward-related parts
	for line in content.split("\n"):
	if any(k in line.lower() for k in ["reward", "compute_score", "custom"]):
	print(f" yaml: {line.rstrip()[:100]}")