Spaces:
Running
Running
| import pytest | |
| import torch | |
| import copy | |
| from unittest.mock import patch | |
| from ding.framework import OnlineRLContext, task | |
| from ding.framework.middleware import interaction_evaluator | |
| from ding.framework.middleware.tests import MockPolicy, MockEnv, CONFIG | |
| def test_interaction_evaluator(): | |
| cfg = copy.deepcopy(CONFIG) | |
| ctx = OnlineRLContext() | |
| with patch("ding.policy.Policy", MockPolicy), patch("ding.envs.BaseEnvManagerV2", MockEnv): | |
| with task.start(): | |
| policy = MockPolicy() | |
| env = MockEnv() | |
| for i in range(30): | |
| ctx.train_iter += 1 | |
| interaction_evaluator(cfg, policy, env)(ctx) | |
| # interaction_evaluator will run every 10 train_iter in the test | |
| assert ctx.last_eval_iter == i // 10 * 10 + 1 | |
| # the reward will increase 1.0 each step. | |
| # there are 2 env_num and 5 episodes in the test. | |
| # so when interaction_evaluator runs the first time, reward is [[1, 2, 3], [2, 3]] and the avg = 2.2 | |
| # the second time, reward is [[4, 5, 6], [5, 6]] . . . | |
| assert ctx.eval_value == 2.2 + i // 10 * 3.0 | |