Spaces:
Sleeping
Sleeping
| exp_name: "ResNetEncoder" | |
| #the name of this experiment# | |
| seed: 1 | |
| #seed of the experiment# | |
| torch_deterministic: true | |
| #if toggled, `torch.backends.cudnn.deterministic=False`# | |
| device: "cuda" | |
| #if toggled, cuda will be enabled by default#rue | |
| track: false | |
| #if toggled, this experiment will be tracked with Weights and Biases# | |
| # Algorithm specific arguments | |
| total_timesteps: 20 | |
| #total timesteps of the experiments# | |
| buffer_size: 40000 | |
| #the replay memory buffer size# | |
| gamma: 0 | |
| #the discount factor gamma# | |
| tau: 0.005 | |
| #target smoothing coefficient (default: 0.005)# | |
| batch_size: 64 | |
| #the batch size of sample from the reply memory# | |
| learning_starts: 10 | |
| #timestep to start learning# | |
| policy_lr: 0.0003 | |
| #the learning rate of the policy network optimizer# | |
| q_lr: 0.0003 | |
| #the learning rate of the Q network network optimizer# | |
| policy_frequency: 2 | |
| #the frequency of training policy (delayed)# | |
| target_network_frequency: 1 # Denis Yarats' implementation delays this by 2. | |
| #the frequency of updates for the target nerworks# | |
| alpha: 0.2 | |
| #Entropy regularization coefficient.# | |
| autotune: true | |
| #automatic tuning of the entropy coefficient#. | |
| max_episode_timesteps: 1 | |