| | import pytest |
| | import yaml |
| |
|
| |
|
| | from mlagents.trainers.exception import TrainerConfigError, TrainerConfigWarning |
| | from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager |
| | from mlagents.trainers.settings import ( |
| | RunOptions, |
| | UniformSettings, |
| | GaussianSettings, |
| | ConstantSettings, |
| | CompletionCriteriaSettings, |
| | ) |
| |
|
| |
|
| | test_sampler_config_yaml = """ |
| | environment_parameters: |
| | param_1: |
| | sampler_type: uniform |
| | sampler_parameters: |
| | min_value: 0.5 |
| | max_value: 10 |
| | """ |
| |
|
| |
|
| | def test_sampler_conversion(): |
| | run_options = RunOptions.from_dict(yaml.safe_load(test_sampler_config_yaml)) |
| | assert run_options.environment_parameters is not None |
| | assert "param_1" in run_options.environment_parameters |
| | lessons = run_options.environment_parameters["param_1"].curriculum |
| | assert len(lessons) == 1 |
| | assert lessons[0].completion_criteria is None |
| | assert isinstance(lessons[0].value, UniformSettings) |
| | assert lessons[0].value.min_value == 0.5 |
| | assert lessons[0].value.max_value == 10 |
| |
|
| |
|
| | test_sampler_and_constant_config_yaml = """ |
| | environment_parameters: |
| | param_1: |
| | sampler_type: gaussian |
| | sampler_parameters: |
| | mean: 4 |
| | st_dev: 5 |
| | param_2: 20 |
| | """ |
| |
|
| |
|
| | def test_sampler_and_constant_conversion(): |
| | run_options = RunOptions.from_dict( |
| | yaml.safe_load(test_sampler_and_constant_config_yaml) |
| | ) |
| | assert "param_1" in run_options.environment_parameters |
| | assert "param_2" in run_options.environment_parameters |
| | lessons_1 = run_options.environment_parameters["param_1"].curriculum |
| | lessons_2 = run_options.environment_parameters["param_2"].curriculum |
| | |
| | assert isinstance(lessons_1[0].value, GaussianSettings) |
| | assert lessons_1[0].value.mean == 4 |
| | assert lessons_1[0].value.st_dev == 5 |
| | |
| | assert isinstance(lessons_2[0].value, ConstantSettings) |
| | assert lessons_2[0].value.value == 20 |
| |
|
| |
|
| | test_curriculum_config_yaml = """ |
| | environment_parameters: |
| | param_1: |
| | curriculum: |
| | - name: Lesson1 |
| | completion_criteria: |
| | measure: reward |
| | behavior: fake_behavior |
| | threshold: 30 |
| | min_lesson_length: 100 |
| | require_reset: true |
| | value: 1 |
| | - name: Lesson2 |
| | completion_criteria: |
| | measure: reward |
| | behavior: fake_behavior |
| | threshold: 60 |
| | min_lesson_length: 100 |
| | require_reset: false |
| | value: 2 |
| | - name: Lesson3 |
| | value: |
| | sampler_type: uniform |
| | sampler_parameters: |
| | min_value: 1 |
| | max_value: 3 |
| | """ |
| |
|
| |
|
| | def test_curriculum_conversion(): |
| | run_options = RunOptions.from_dict(yaml.safe_load(test_curriculum_config_yaml)) |
| | assert "param_1" in run_options.environment_parameters |
| | lessons = run_options.environment_parameters["param_1"].curriculum |
| | assert len(lessons) == 3 |
| | |
| | lesson = lessons[0] |
| | assert lesson.completion_criteria is not None |
| | assert ( |
| | lesson.completion_criteria.measure |
| | == CompletionCriteriaSettings.MeasureType.REWARD |
| | ) |
| | assert lesson.completion_criteria.behavior == "fake_behavior" |
| | assert lesson.completion_criteria.threshold == 30.0 |
| | assert lesson.completion_criteria.min_lesson_length == 100 |
| | assert lesson.completion_criteria.require_reset |
| | assert isinstance(lesson.value, ConstantSettings) |
| | assert lesson.value.value == 1 |
| | |
| | lesson = lessons[1] |
| | assert lesson.completion_criteria is not None |
| | assert ( |
| | lesson.completion_criteria.measure |
| | == CompletionCriteriaSettings.MeasureType.REWARD |
| | ) |
| | assert lesson.completion_criteria.behavior == "fake_behavior" |
| | assert lesson.completion_criteria.threshold == 60.0 |
| | assert lesson.completion_criteria.min_lesson_length == 100 |
| | assert not lesson.completion_criteria.require_reset |
| | assert isinstance(lesson.value, ConstantSettings) |
| | assert lesson.value.value == 2 |
| | |
| | lesson = lessons[2] |
| | assert lesson.completion_criteria is None |
| | assert isinstance(lesson.value, UniformSettings) |
| | assert lesson.value.min_value == 1 |
| | assert lesson.value.max_value == 3 |
| |
|
| |
|
| | test_bad_curriculum_no_competion_criteria_config_yaml = """ |
| | environment_parameters: |
| | param_1: |
| | curriculum: |
| | - name: Lesson1 |
| | completion_criteria: |
| | measure: reward |
| | behavior: fake_behavior |
| | threshold: 30 |
| | min_lesson_length: 100 |
| | require_reset: true |
| | value: 1 |
| | - name: Lesson2 |
| | value: 2 |
| | - name: Lesson3 |
| | value: |
| | sampler_type: uniform |
| | sampler_parameters: |
| | min_value: 1 |
| | max_value: 3 |
| | """ |
| |
|
| |
|
| | test_bad_curriculum_all_competion_criteria_config_yaml = """ |
| | environment_parameters: |
| | param_1: |
| | curriculum: |
| | - name: Lesson1 |
| | completion_criteria: |
| | measure: reward |
| | behavior: fake_behavior |
| | threshold: 30 |
| | min_lesson_length: 100 |
| | require_reset: true |
| | value: 1 |
| | - name: Lesson2 |
| | completion_criteria: |
| | measure: reward |
| | behavior: fake_behavior |
| | threshold: 30 |
| | min_lesson_length: 100 |
| | require_reset: true |
| | value: 2 |
| | - name: Lesson3 |
| | completion_criteria: |
| | measure: reward |
| | behavior: fake_behavior |
| | threshold: 30 |
| | min_lesson_length: 100 |
| | require_reset: true |
| | value: |
| | sampler_type: uniform |
| | sampler_parameters: |
| | min_value: 1 |
| | max_value: 3 |
| | """ |
| |
|
| |
|
| | def test_curriculum_raises_no_completion_criteria_conversion(): |
| | with pytest.raises(TrainerConfigError): |
| | RunOptions.from_dict( |
| | yaml.safe_load(test_bad_curriculum_no_competion_criteria_config_yaml) |
| | ) |
| |
|
| |
|
| | def test_curriculum_raises_all_completion_criteria_conversion(): |
| | with pytest.warns(TrainerConfigWarning): |
| | run_options = RunOptions.from_dict( |
| | yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml) |
| | ) |
| |
|
| | param_manager = EnvironmentParameterManager( |
| | run_options.environment_parameters, 1337, False |
| | ) |
| | assert param_manager.update_lessons( |
| | trainer_steps={"fake_behavior": 500}, |
| | trainer_max_steps={"fake_behavior": 1000}, |
| | trainer_reward_buffer={"fake_behavior": [1000] * 101}, |
| | ) == (True, True) |
| | assert param_manager.update_lessons( |
| | trainer_steps={"fake_behavior": 500}, |
| | trainer_max_steps={"fake_behavior": 1000}, |
| | trainer_reward_buffer={"fake_behavior": [1000] * 101}, |
| | ) == (True, True) |
| | assert param_manager.update_lessons( |
| | trainer_steps={"fake_behavior": 500}, |
| | trainer_max_steps={"fake_behavior": 1000}, |
| | trainer_reward_buffer={"fake_behavior": [1000] * 101}, |
| | ) == (False, False) |
| | assert param_manager.get_current_lesson_number() == {"param_1": 2} |
| |
|
| |
|
| | test_everything_config_yaml = """ |
| | environment_parameters: |
| | param_1: |
| | curriculum: |
| | - name: Lesson1 |
| | completion_criteria: |
| | measure: reward |
| | behavior: fake_behavior |
| | threshold: 30 |
| | min_lesson_length: 100 |
| | require_reset: true |
| | value: 1 |
| | - name: Lesson2 |
| | completion_criteria: |
| | measure: progress |
| | behavior: fake_behavior |
| | threshold: 0.5 |
| | min_lesson_length: 100 |
| | require_reset: false |
| | value: 2 |
| | - name: Lesson3 |
| | value: |
| | sampler_type: uniform |
| | sampler_parameters: |
| | min_value: 1 |
| | max_value: 3 |
| | param_2: |
| | sampler_type: gaussian |
| | sampler_parameters: |
| | mean: 4 |
| | st_dev: 5 |
| | param_3: 20 |
| | """ |
| |
|
| |
|
| | def test_create_manager(): |
| | run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml)) |
| | param_manager = EnvironmentParameterManager( |
| | run_options.environment_parameters, 1337, False |
| | ) |
| | assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100 |
| | assert param_manager.get_current_lesson_number() == { |
| | "param_1": 0, |
| | "param_2": 0, |
| | "param_3": 0, |
| | } |
| | assert param_manager.get_current_samplers() == { |
| | "param_1": ConstantSettings(seed=1337, value=1), |
| | "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), |
| | "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), |
| | } |
| | |
| | assert param_manager.update_lessons( |
| | trainer_steps={"fake_behavior": 500}, |
| | trainer_max_steps={"fake_behavior": 1000}, |
| | trainer_reward_buffer={"fake_behavior": [1000] * 99}, |
| | ) == (False, False) |
| | |
| | assert param_manager.update_lessons( |
| | trainer_steps={"fake_behavior": 500}, |
| | trainer_max_steps={"fake_behavior": 1000}, |
| | trainer_reward_buffer={"fake_behavior": [1] * 101}, |
| | ) == (False, False) |
| | assert param_manager.update_lessons( |
| | trainer_steps={"fake_behavior": 500}, |
| | trainer_max_steps={"fake_behavior": 1000}, |
| | trainer_reward_buffer={"fake_behavior": [1000] * 101}, |
| | ) == (True, True) |
| | assert param_manager.get_current_lesson_number() == { |
| | "param_1": 1, |
| | "param_2": 0, |
| | "param_3": 0, |
| | } |
| | param_manager_2 = EnvironmentParameterManager( |
| | run_options.environment_parameters, 1337, restore=True |
| | ) |
| | |
| | assert param_manager_2.get_current_lesson_number() == { |
| | "param_1": 1, |
| | "param_2": 0, |
| | "param_3": 0, |
| | } |
| | |
| | assert param_manager.update_lessons( |
| | trainer_steps={"fake_behavior": 700}, |
| | trainer_max_steps={"fake_behavior": 1000}, |
| | trainer_reward_buffer={"fake_behavior": [0] * 101}, |
| | ) == (True, False) |
| | assert param_manager.get_current_samplers() == { |
| | "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3), |
| | "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), |
| | "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), |
| | } |
| |
|
| |
|
| | test_curriculum_no_behavior_yaml = """ |
| | environment_parameters: |
| | param_1: |
| | curriculum: |
| | - name: Lesson1 |
| | completion_criteria: |
| | measure: reward |
| | threshold: 30 |
| | min_lesson_length: 100 |
| | require_reset: true |
| | value: 1 |
| | - name: Lesson2 |
| | value: 2 |
| | """ |
| |
|
| |
|
| | def test_curriculum_no_behavior(): |
| | with pytest.raises(TypeError): |
| | run_options = RunOptions.from_dict( |
| | yaml.safe_load(test_curriculum_no_behavior_yaml) |
| | ) |
| | EnvironmentParameterManager(run_options.environment_parameters, 1337, False) |
| |
|