Commit
·
4c3e56c
1
Parent(s):
3abed59
add small-rl-gen9beta
Browse files- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_0.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_10.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_12.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_14.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_16.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_18.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_2.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_20.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_22.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_24.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_4.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_6.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_8.pt +3 -0
- small-rl-gen9beta/config.txt +130 -0
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4c0e2750fb1d118c645a49f1c3dd3b6266f1d6e04e22fcbd56895a45d96b17e
|
| 3 |
+
size 56576177
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_10.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd9c1296b431039a8ccb5ad3ddd13bde8e55cccd3c84a93623f6920de53989fa
|
| 3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_12.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c466284dd15acc688efae73e7e2f66ca9fc77070daaa28b58786091519ce82f7
|
| 3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_14.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb33f5610cf213451343f04b73527591c7e2774e85402c88069168958e0859af
|
| 3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_16.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9d97e58a357d2c3401f13ec5a05a22eb803ed26c716813cc866c7928bb1f204
|
| 3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_18.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4e9b3a42f208c3e4538db8871b1a0d2c05f05266e2aaceda919f33181d4926a
|
| 3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e8891b6c6a134a1bf6ea7d64c5e31a0c6ccc4e6fc34c1f6ecc378df833ddb6c
|
| 3 |
+
size 56576177
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_20.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:beaa0fa4a4c400defc0801d3b31e383e045a497d792f4f6a9f6b18650385f8e6
|
| 3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_22.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d93a4598039280403913ec35c41dd87a489f9f381c26854481a7a3afc973f991
|
| 3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:921d0c1bfcbc9a4319bccad8003ee4fbb22bee93c9bc588e0118e6deb3c6b144
|
| 3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_4.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02904f0048c05b7008460aac2bfa07ed43e078669be6b1f3641aa4108eb13dec
|
| 3 |
+
size 56576177
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_6.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4fad78a293aff52a9ec310e24a464c37b0969c783efa464c60e59df40a152f3
|
| 3 |
+
size 56576177
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_8.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9124f620013bdaa82afe502454c0e89503507f31c8720b77cab2bbfe20e6d1f5
|
| 3 |
+
size 56576177
|
small-rl-gen9beta/config.txt
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import amago.agent
|
| 2 |
+
import amago.experiment as amago2
|
| 3 |
+
import amago.nets.actor_critic as amago3
|
| 4 |
+
import amago.nets.traj_encoders as amago4
|
| 5 |
+
|
| 6 |
+
# Parameters for Actor:
|
| 7 |
+
# ==============================================================================
|
| 8 |
+
# None.
|
| 9 |
+
|
| 10 |
+
# Parameters for Agent:
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
# None.
|
| 13 |
+
|
| 14 |
+
# Parameters for Discrete:
|
| 15 |
+
# ==============================================================================
|
| 16 |
+
Discrete.clip_prob_high = 0.99
|
| 17 |
+
Discrete.clip_prob_low = 0.001
|
| 18 |
+
|
| 19 |
+
# Parameters for Experiment:
|
| 20 |
+
# ==============================================================================
|
| 21 |
+
Experiment.always_load_latest = False
|
| 22 |
+
Experiment.always_save_latest = True
|
| 23 |
+
Experiment.has_dset_edit_rights = True
|
| 24 |
+
Experiment.save_trajs_as = 'npz'
|
| 25 |
+
Experiment.stagger_traj_file_lengths = True
|
| 26 |
+
Experiment.traj_save_len = 10000000000.0
|
| 27 |
+
Experiment.wandb_group_name = None
|
| 28 |
+
|
| 29 |
+
# Parameters for FlashAttention:
|
| 30 |
+
# ==============================================================================
|
| 31 |
+
FlashAttention.window_size = (32, 0)
|
| 32 |
+
|
| 33 |
+
# Parameters for MetamonAMAGOExperiment:
|
| 34 |
+
# ==============================================================================
|
| 35 |
+
MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
|
| 36 |
+
MetamonAMAGOExperiment.critic_loss_weight = 10.0
|
| 37 |
+
MetamonAMAGOExperiment.grad_clip = 1.5
|
| 38 |
+
MetamonAMAGOExperiment.l2_coeff = 0.0001
|
| 39 |
+
MetamonAMAGOExperiment.learning_rate = 0.00015
|
| 40 |
+
MetamonAMAGOExperiment.lr_warmup_steps = 1000
|
| 41 |
+
MetamonAMAGOExperiment.max_seq_len = 200
|
| 42 |
+
MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
|
| 43 |
+
MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder
|
| 44 |
+
|
| 45 |
+
# Parameters for MetamonMaskedActor:
|
| 46 |
+
# ==============================================================================
|
| 47 |
+
MetamonMaskedActor.activation = 'leaky_relu'
|
| 48 |
+
MetamonMaskedActor.continuous_dist_type = None
|
| 49 |
+
MetamonMaskedActor.d_hidden = 300
|
| 50 |
+
MetamonMaskedActor.dropout_p = 0.0
|
| 51 |
+
MetamonMaskedActor.mask_illegal_actions = True
|
| 52 |
+
MetamonMaskedActor.n_layers = 2
|
| 53 |
+
|
| 54 |
+
# Parameters for MetamonTstepEncoder:
|
| 55 |
+
# ==============================================================================
|
| 56 |
+
MetamonTstepEncoder.d_model = 100
|
| 57 |
+
MetamonTstepEncoder.dropout = 0.05
|
| 58 |
+
MetamonTstepEncoder.extra_emb_dim = 18
|
| 59 |
+
MetamonTstepEncoder.n_heads = 5
|
| 60 |
+
MetamonTstepEncoder.n_layers = 3
|
| 61 |
+
MetamonTstepEncoder.numerical_tokens = 6
|
| 62 |
+
MetamonTstepEncoder.scratch_tokens = 4
|
| 63 |
+
MetamonTstepEncoder.token_mask_aug = False
|
| 64 |
+
|
| 65 |
+
# Parameters for Multigammas:
|
| 66 |
+
# ==============================================================================
|
| 67 |
+
Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
|
| 68 |
+
Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
|
| 69 |
+
|
| 70 |
+
# Parameters for MultiTaskAgent:
|
| 71 |
+
# ==============================================================================
|
| 72 |
+
MultiTaskAgent.actor_type = @MetamonMaskedActor
|
| 73 |
+
MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot
|
| 74 |
+
MultiTaskAgent.fake_filter = False
|
| 75 |
+
MultiTaskAgent.fbc_filter_func = @agent.exp_filter
|
| 76 |
+
MultiTaskAgent.gamma = 0.999
|
| 77 |
+
MultiTaskAgent.num_actions_for_value_in_actor_loss = 3
|
| 78 |
+
MultiTaskAgent.num_actions_for_value_in_critic_loss = 5
|
| 79 |
+
MultiTaskAgent.num_critics = 4
|
| 80 |
+
MultiTaskAgent.num_critics_td = 2
|
| 81 |
+
MultiTaskAgent.offline_coeff = 1.0
|
| 82 |
+
MultiTaskAgent.online_coeff = 0.0
|
| 83 |
+
MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
|
| 84 |
+
MultiTaskAgent.popart = True
|
| 85 |
+
MultiTaskAgent.reward_multiplier = 10.0
|
| 86 |
+
MultiTaskAgent.tau = 0.004
|
| 87 |
+
MultiTaskAgent.use_multigamma = True
|
| 88 |
+
MultiTaskAgent.use_target_actor = True
|
| 89 |
+
|
| 90 |
+
# Parameters for NCriticsTwoHot:
|
| 91 |
+
# ==============================================================================
|
| 92 |
+
NCriticsTwoHot.activation = 'leaky_relu'
|
| 93 |
+
NCriticsTwoHot.d_hidden = 300
|
| 94 |
+
NCriticsTwoHot.dropout_p = 0.0
|
| 95 |
+
NCriticsTwoHot.max_return = 1100
|
| 96 |
+
NCriticsTwoHot.min_return = -1100
|
| 97 |
+
NCriticsTwoHot.n_layers = 2
|
| 98 |
+
NCriticsTwoHot.output_bins = 64
|
| 99 |
+
NCriticsTwoHot.use_symlog = False
|
| 100 |
+
|
| 101 |
+
# Parameters for PopArtLayer:
|
| 102 |
+
# ==============================================================================
|
| 103 |
+
PopArtLayer.beta = 0.0005
|
| 104 |
+
PopArtLayer.init_nu = 100.0
|
| 105 |
+
|
| 106 |
+
# Parameters for SigmaReparam:
|
| 107 |
+
# ==============================================================================
|
| 108 |
+
SigmaReparam.bias = True
|
| 109 |
+
SigmaReparam.fast_init = False
|
| 110 |
+
|
| 111 |
+
# Parameters for TformerTrajEncoder:
|
| 112 |
+
# ==============================================================================
|
| 113 |
+
TformerTrajEncoder.activation = 'leaky_relu'
|
| 114 |
+
TformerTrajEncoder.d_ff = 2048
|
| 115 |
+
TformerTrajEncoder.d_model = 512
|
| 116 |
+
TformerTrajEncoder.dropout_attn = 0.0
|
| 117 |
+
TformerTrajEncoder.dropout_emb = 0.05
|
| 118 |
+
TformerTrajEncoder.dropout_ff = 0.05
|
| 119 |
+
TformerTrajEncoder.dropout_qkv = 0.0
|
| 120 |
+
TformerTrajEncoder.head_scaling = True
|
| 121 |
+
TformerTrajEncoder.n_heads = 8
|
| 122 |
+
TformerTrajEncoder.n_layers = 3
|
| 123 |
+
TformerTrajEncoder.norm = 'layer'
|
| 124 |
+
TformerTrajEncoder.normformer_norms = True
|
| 125 |
+
TformerTrajEncoder.pos_emb = 'fixed'
|
| 126 |
+
TformerTrajEncoder.sigma_reparam = True
|
| 127 |
+
|
| 128 |
+
# Parameters for TransformerTurnEmbedding:
|
| 129 |
+
# ==============================================================================
|
| 130 |
+
# None.
|