|
|
import numpy as np
|
|
|
import random
|
|
|
import torch
|
|
|
import torch.nn as nn
|
|
|
|
|
|
|
|
|
def AvgL1Norm(x, eps=1e-8):
|
|
|
return x / x.abs().mean(-1, keepdim=True).clamp(min=eps)
|
|
|
|
|
|
|
|
|
def LAP_huber(x, min_priority=1):
|
|
|
return torch.where(x < min_priority, 0.5 * x.pow(2), min_priority * x).sum(1).mean()
|
|
|
|
|
|
|
|
|
def weight_init(m):
|
|
|
"""Custom weight init for Conv2D and Linear layers.
|
|
|
Reference: https://github.com/MishaLaskin/rad/blob/master/curl_sac.py"""
|
|
|
|
|
|
if isinstance(m, nn.Linear):
|
|
|
nn.init.orthogonal_(m.weight.data)
|
|
|
m.bias.data.fill_(0.0)
|
|
|
elif isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
|
|
|
|
|
|
assert m.weight.size(2) == m.weight.size(3)
|
|
|
m.weight.data.fill_(0.0)
|
|
|
m.bias.data.fill_(0.0)
|
|
|
mid = m.weight.size(2) // 2
|
|
|
gain = nn.init.calculate_gain('relu')
|
|
|
nn.init.orthogonal_(m.weight.data[:, :, mid, mid], gain)
|
|
|
|
|
|
|
|
|
def hard_update(network, target_network):
|
|
|
for param, target_param in zip(network.parameters(), target_network.parameters()):
|
|
|
target_param.data.copy_(param.data)
|
|
|
|
|
|
|
|
|
def soft_update(network, target_network, tau):
|
|
|
for param, target_param in zip(network.parameters(), target_network.parameters()):
|
|
|
target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
|
|
|
|
|
|
|
|
|
def set_seed(random_seed):
|
|
|
if random_seed <= 0:
|
|
|
random_seed = np.random.randint(1, 9999)
|
|
|
else:
|
|
|
random_seed = random_seed
|
|
|
|
|
|
torch.manual_seed(random_seed)
|
|
|
np.random.seed(random_seed)
|
|
|
random.seed(random_seed)
|
|
|
return random_seed
|
|
|
|
|
|
|
|
|
def make_env(env_name, random_seed):
|
|
|
import gymnasium as gym
|
|
|
|
|
|
env = gym.make(env_name)
|
|
|
env.action_space.seed(random_seed)
|
|
|
|
|
|
eval_env = gym.make(env_name)
|
|
|
eval_env.action_space.seed(random_seed + 100)
|
|
|
|
|
|
return env, eval_env
|
|
|
|