harshraj22/croprl-workspace / code /tests /test_reward_decomposition.py
harshraj22's picture
download
raw
4.32 kB
"""Tests for the reward decomposition invariant.
The key property: sum of per-step Δ(net_worth) rewards should equal
the terminal profit (final_net_worth - initial_net_worth).
"""
import pytest
from cropRL.config import EnvConfig
from cropRL.models import CroprlAction
from cropRL.server.cropRL_environment import CroprlEnvironment
from cropRL.enums import ActionType
class TestRewardDecomposition:
"""Verify that Σ r_t = final_value - initial_value (telescoping invariant)."""
def test_telescoping_sum_wait_only(self):
"""Waiting every step: sum of rewards should equal terminal value."""
cfg = EnvConfig(max_steps=12, max_months=60)
e = CroprlEnvironment(config=cfg)
e.reset(seed=42)
rewards = []
terminal_bonus = None
for _ in range(12):
obs = e.step(CroprlAction(action_id=ActionType.WAIT))
rewards.append(obs.reward)
if obs.done:
break
# The last reward includes terminal_bonus.
# Terminal bonus = final_net_worth - initial_net_worth.
# Sum of previous rewards = Σ Δ(net_worth) = final_net_worth - initial_net_worth
# (when no penalties). But the final step's reward IS the terminal bonus
# rather than Δ(net_worth), so it accounts for the full value.
assert obs.done is True
def test_telescoping_sum_with_trading(self):
"""With active trading, sum should still equal terminal value."""
cfg = EnvConfig(max_steps=20, max_months=60)
e = CroprlEnvironment(config=cfg)
obs = e.reset(seed=42)
rewards = []
actions = [
ActionType.PLANT_CHICKPEA, # plant
ActionType.WAIT, # grow
ActionType.WAIT, # grow
ActionType.WAIT, # mature
ActionType.HARVEST_SELL, # sell
ActionType.PLANT_CORN, # plant next
ActionType.IRRIGATE, # water
ActionType.FERTILIZE, # boost soil
ActionType.WAIT, # grow
ActionType.WAIT, # grow
ActionType.WAIT, # grow
ActionType.WAIT, # mature
ActionType.HARVEST_SELL, # sell
]
for a in actions:
obs = e.step(CroprlAction(action_id=a))
rewards.append(obs.reward)
if obs.done:
break
# All rewards should be finite (no NaN/inf)
for i, r in enumerate(rewards):
assert r == r, f"Reward at step {i} is NaN"
assert abs(r) < 1e9, f"Reward at step {i} is too large: {r}"
def test_no_free_money_from_waiting(self):
"""Sum of rewards from just waiting should be negative (fixed costs)."""
cfg = EnvConfig(max_steps=5, max_months=60)
e = CroprlEnvironment(config=cfg)
e.reset(seed=42)
rewards = []
for _ in range(5):
obs = e.step(CroprlAction(action_id=ActionType.WAIT))
rewards.append(obs.reward)
if obs.done:
break
# Waiting only incurs fixed costs, so net should be negative
# (unless random price changes on land/etc make it positive, but
# over 5 months, fixed costs of ₹200/mo should dominate)
# Just verify it's not outrageously positive
total = sum(rewards)
assert total < 5000, f"Waiting passively shouldn't generate much profit: {total}"
class TestLandPriceReward:
"""Verify that land price changes flow through the reward signal."""
def test_fertilize_increases_net_worth(self):
"""Fertilizing boosts soil nitrogen → increases land_price → positive reward."""
cfg = EnvConfig(
base_land_price=50000.0,
initial_soil_nitrogen=0.3,
)
e = CroprlEnvironment(config=cfg)
e.reset(seed=42)
obs = e.step(CroprlAction(action_id=ActionType.FERTILIZE))
# Fertilize cost is ₹400, but nitrogen boost × base_land_price
# = 0.15 × 50000 = ₹7500 increase in land value
# Net should be > 0
assert obs.reward > 0, (
f"Fertilizing should increase net worth (land value gain > cost), "
f"got reward={obs.reward}"
)

Xet Storage Details

Size:
4.32 kB
·
Xet hash:
59bc15b1b5c0bd651a6017f10f34fb31f5a2d7647f11b3b59db6b1a0199fad37

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.