exp_name: "ResNetEncoder" #the name of this experiment# seed: 1 #seed of the experiment# torch_deterministic: true #if toggled, `torch.backends.cudnn.deterministic=False`# device: "cuda" #if toggled, cuda will be enabled by default#rue track: false #if toggled, this experiment will be tracked with Weights and Biases# # Algorithm specific arguments total_timesteps: 20 #total timesteps of the experiments# buffer_size: 40000 #the replay memory buffer size# gamma: 0 #the discount factor gamma# tau: 0.005 #target smoothing coefficient (default: 0.005)# batch_size: 64 #the batch size of sample from the reply memory# learning_starts: 10 #timestep to start learning# policy_lr: 0.0003 #the learning rate of the policy network optimizer# q_lr: 0.0003 #the learning rate of the Q network network optimizer# policy_frequency: 2 #the frequency of training policy (delayed)# target_network_frequency: 1 # Denis Yarats' implementation delays this by 2. #the frequency of updates for the target nerworks# alpha: 0.2 #Entropy regularization coefficient.# autotune: true #automatic tuning of the entropy coefficient#. max_episode_timesteps: 1