QuadOpt-RL / environment /config.yaml
ropercha
Mesh environement
ba246bb
project_name : "Quadmesh"
experiment_name : "full-dataset-obs36-10darts-v0"
description : "Apprentissage avec une observation de taille 36. Après optimisation de l'environnement. Nouvelle observatin avec dummy vertices et pas de temps augmenté à 150 "
total_timesteps : 500000
paths:
log_dir : "training/results/e1/"
policy_saving_dir : "training/policy_saved/e1/"
wandb_model_saving_dir : "training/wandb_models/e1/"
episode_recording_dir: "training/results/e1/episode_recordings/" # Not working
observation_counts_dir: "training/results/e1/observation_counts/"
parameters_saving_dir : "training/results/e1/parameters/"
dataset:
training_dataset_dir : "training/dataset/training_dataset"
exploit_dataset_dir : "training/dataset/test_dataset"
#If datasets directory above are set to None, the agent will either train or be exploited on a single mesh file given below
training_mesh_file_path : "mesh_files/simple_quad.msh"
evaluation_mesh_file_path : "mesh_files/simple_quad.msh"
seed : 1
env:
env_id : "Quadmesh-v0"
n_vec_envs : 0
max_episode_steps : 75
n_darts_selected : 10
deep : 36
obs_size : 360 # It needs to be calculated manually
action_restriction : false
with_degree_observation : false
render_mode : null
obs_count : false
analysis_type : "topo" #only the topo analysis type is working
debug : false
ppo:
policy : MlpPolicy
n_steps : 2048
n_epochs : 5
batch_size: 64
learning_rate : 0.0001
gamma : 0.9
clip_range : 0.2
eval:
eval_env_id : "Quadmesh-v0"
max_episode_steps : 300
n_darts_selected: 5
deep: 36
obs_size: 180
render_mode: null #"human"
action_restriction: false
with_quality_observation: false
# The three parameters below are not used anymore
min_evals : 5
max_no_improvement_evals : 10
eval_freq : 500
metrics:
normalized_return: 0
ep_len_mean: 0
ep_reward_mean: 0