Commit
·
2de4913
1
Parent(s):
717dcf8
change model names
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- alakazam/ckpts/config.txt +31 -28
- alakazam/ckpts/latest/policy.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_0.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_10.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_12.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_14.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_16.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_18.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_2.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_20.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_22.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_24.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_26.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_28.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_30.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_32.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_34.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_36.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_38.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_4.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_40.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_42.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_44.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_46.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_48.pt +2 -2
- alakazam/ckpts/policy_weights/policy_epoch_50.pt +2 -2
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_52.pt +0 -0
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_54.pt +0 -0
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_56.pt +0 -0
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_58.pt +0 -0
- alakazam/ckpts/policy_weights/policy_epoch_6.pt +2 -2
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_60.pt +0 -0
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_62.pt +0 -0
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_64.pt +0 -0
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_66.pt +0 -0
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_68.pt +0 -0
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_70.pt +0 -0
- {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_72.pt +0 -0
- alakazam/ckpts/policy_weights/policy_epoch_8.pt +2 -2
- {alakazam → kadabra2}/alakazam/ckpts/config.txt +0 -0
- {alakazam_patched → kadabra2}/ckpts/config.txt +28 -31
- {alakazam_patched → kadabra2}/ckpts/latest/policy.pt +2 -2
- {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_0.pt +2 -2
- {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_10.pt +2 -2
- {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_12.pt +2 -2
- {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_14.pt +2 -2
- {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_16.pt +2 -2
- {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_18.pt +2 -2
- {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_2.pt +2 -2
- {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_20.pt +2 -2
alakazam/ckpts/config.txt
CHANGED
|
@@ -8,13 +8,6 @@ import amago.nets.transformer as amago5
|
|
| 8 |
# ==============================================================================
|
| 9 |
# None.
|
| 10 |
|
| 11 |
-
# Parameters for ClippedSlidingSinkAttention:
|
| 12 |
-
# ==============================================================================
|
| 13 |
-
ClippedSlidingSinkAttention.logit_clip = 50
|
| 14 |
-
ClippedSlidingSinkAttention.sink_bias = 0.0
|
| 15 |
-
ClippedSlidingSinkAttention.sink_size = 5
|
| 16 |
-
ClippedSlidingSinkAttention.window_size = 96
|
| 17 |
-
|
| 18 |
# Parameters for Discrete:
|
| 19 |
# ==============================================================================
|
| 20 |
Discrete.clip_prob_high = 0.99
|
|
@@ -30,6 +23,10 @@ Experiment.stagger_traj_file_lengths = True
|
|
| 30 |
Experiment.traj_save_len = 10000000000.0
|
| 31 |
Experiment.wandb_group_name = None
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# Parameters for MetamonAMAGOExperiment:
|
| 34 |
# ==============================================================================
|
| 35 |
MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
|
|
@@ -37,32 +34,33 @@ MetamonAMAGOExperiment.critic_loss_weight = 13.5
|
|
| 37 |
MetamonAMAGOExperiment.grad_clip = 1.5
|
| 38 |
MetamonAMAGOExperiment.l2_coeff = 0.0001
|
| 39 |
MetamonAMAGOExperiment.learning_rate = 0.000125
|
| 40 |
-
MetamonAMAGOExperiment.lr_warmup_steps =
|
| 41 |
-
MetamonAMAGOExperiment.max_seq_len =
|
| 42 |
MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
|
| 43 |
-
MetamonAMAGOExperiment.tstep_encoder_type = @
|
| 44 |
|
| 45 |
# Parameters for MetamonMaskedResidualActor:
|
| 46 |
# ==============================================================================
|
| 47 |
MetamonMaskedResidualActor.activation = 'leaky_relu'
|
| 48 |
MetamonMaskedResidualActor.continuous_dist_type = None
|
| 49 |
MetamonMaskedResidualActor.dropout_p = 0.0
|
| 50 |
-
MetamonMaskedResidualActor.feature_dim =
|
| 51 |
MetamonMaskedResidualActor.mask_illegal_actions = True
|
| 52 |
MetamonMaskedResidualActor.normalization = 'layer'
|
| 53 |
MetamonMaskedResidualActor.residual_blocks = 2
|
| 54 |
MetamonMaskedResidualActor.residual_ff_dim = 512
|
| 55 |
|
| 56 |
-
# Parameters for
|
| 57 |
# ==============================================================================
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
| 66 |
|
| 67 |
# Parameters for Multigammas:
|
| 68 |
# ==============================================================================
|
|
@@ -81,7 +79,7 @@ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4
|
|
| 81 |
MultiTaskAgent.num_critics = 6
|
| 82 |
MultiTaskAgent.num_critics_td = 2
|
| 83 |
MultiTaskAgent.offline_coeff = 1.0
|
| 84 |
-
MultiTaskAgent.online_coeff = 0.
|
| 85 |
MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
|
| 86 |
MultiTaskAgent.popart = True
|
| 87 |
MultiTaskAgent.reward_multiplier = 10.0
|
|
@@ -100,6 +98,10 @@ NCriticsTwoHot.n_layers = 2
|
|
| 100 |
NCriticsTwoHot.output_bins = 96
|
| 101 |
NCriticsTwoHot.use_symlog = False
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
# Parameters for PopArtLayer:
|
| 104 |
# ==============================================================================
|
| 105 |
PopArtLayer.beta = 0.0005
|
|
@@ -109,10 +111,15 @@ PopArtLayer.init_nu = 100.0
|
|
| 109 |
# ==============================================================================
|
| 110 |
# None.
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# Parameters for TformerTrajEncoder:
|
| 113 |
# ==============================================================================
|
| 114 |
TformerTrajEncoder.activation = 'leaky_relu'
|
| 115 |
-
TformerTrajEncoder.attention_type = @transformer.
|
| 116 |
TformerTrajEncoder.d_ff = 3072
|
| 117 |
TformerTrajEncoder.d_model = 768
|
| 118 |
TformerTrajEncoder.dropout_attn = 0.0
|
|
@@ -121,12 +128,8 @@ TformerTrajEncoder.dropout_ff = 0.05
|
|
| 121 |
TformerTrajEncoder.dropout_qkv = 0.0
|
| 122 |
TformerTrajEncoder.head_scaling = True
|
| 123 |
TformerTrajEncoder.n_heads = 12
|
| 124 |
-
TformerTrajEncoder.n_layers =
|
| 125 |
TformerTrajEncoder.norm = 'layer'
|
| 126 |
TformerTrajEncoder.normformer_norms = True
|
| 127 |
TformerTrajEncoder.pos_emb = 'fixed'
|
| 128 |
-
TformerTrajEncoder.sigma_reparam =
|
| 129 |
-
|
| 130 |
-
# Parameters for TransformerTurnEmbedding:
|
| 131 |
-
# ==============================================================================
|
| 132 |
-
# None.
|
|
|
|
| 8 |
# ==============================================================================
|
| 9 |
# None.
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
# Parameters for Discrete:
|
| 12 |
# ==============================================================================
|
| 13 |
Discrete.clip_prob_high = 0.99
|
|
|
|
| 23 |
Experiment.traj_save_len = 10000000000.0
|
| 24 |
Experiment.wandb_group_name = None
|
| 25 |
|
| 26 |
+
# Parameters for FlashAttention:
|
| 27 |
+
# ==============================================================================
|
| 28 |
+
FlashAttention.window_size = (96, 0)
|
| 29 |
+
|
| 30 |
# Parameters for MetamonAMAGOExperiment:
|
| 31 |
# ==============================================================================
|
| 32 |
MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
|
|
|
|
| 34 |
MetamonAMAGOExperiment.grad_clip = 1.5
|
| 35 |
MetamonAMAGOExperiment.l2_coeff = 0.0001
|
| 36 |
MetamonAMAGOExperiment.learning_rate = 0.000125
|
| 37 |
+
MetamonAMAGOExperiment.lr_warmup_steps = 2000
|
| 38 |
+
MetamonAMAGOExperiment.max_seq_len = 128
|
| 39 |
MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
|
| 40 |
+
MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder
|
| 41 |
|
| 42 |
# Parameters for MetamonMaskedResidualActor:
|
| 43 |
# ==============================================================================
|
| 44 |
MetamonMaskedResidualActor.activation = 'leaky_relu'
|
| 45 |
MetamonMaskedResidualActor.continuous_dist_type = None
|
| 46 |
MetamonMaskedResidualActor.dropout_p = 0.0
|
| 47 |
+
MetamonMaskedResidualActor.feature_dim = 450
|
| 48 |
MetamonMaskedResidualActor.mask_illegal_actions = True
|
| 49 |
MetamonMaskedResidualActor.normalization = 'layer'
|
| 50 |
MetamonMaskedResidualActor.residual_blocks = 2
|
| 51 |
MetamonMaskedResidualActor.residual_ff_dim = 512
|
| 52 |
|
| 53 |
+
# Parameters for MetamonPerceiverTstepEncoder:
|
| 54 |
# ==============================================================================
|
| 55 |
+
MetamonPerceiverTstepEncoder.d_model = 108
|
| 56 |
+
MetamonPerceiverTstepEncoder.dropout = 0.05
|
| 57 |
+
MetamonPerceiverTstepEncoder.extra_emb_dim = 18
|
| 58 |
+
MetamonPerceiverTstepEncoder.latent_tokens = 8
|
| 59 |
+
MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128
|
| 60 |
+
MetamonPerceiverTstepEncoder.n_heads = 6
|
| 61 |
+
MetamonPerceiverTstepEncoder.n_layers = 8
|
| 62 |
+
MetamonPerceiverTstepEncoder.numerical_tokens = 6
|
| 63 |
+
MetamonPerceiverTstepEncoder.token_mask_aug = False
|
| 64 |
|
| 65 |
# Parameters for Multigammas:
|
| 66 |
# ==============================================================================
|
|
|
|
| 79 |
MultiTaskAgent.num_critics = 6
|
| 80 |
MultiTaskAgent.num_critics_td = 2
|
| 81 |
MultiTaskAgent.offline_coeff = 1.0
|
| 82 |
+
MultiTaskAgent.online_coeff = 0.2
|
| 83 |
MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
|
| 84 |
MultiTaskAgent.popart = True
|
| 85 |
MultiTaskAgent.reward_multiplier = 10.0
|
|
|
|
| 98 |
NCriticsTwoHot.output_bins = 96
|
| 99 |
NCriticsTwoHot.use_symlog = False
|
| 100 |
|
| 101 |
+
# Parameters for PerceiverTurnEmbedding:
|
| 102 |
+
# ==============================================================================
|
| 103 |
+
# None.
|
| 104 |
+
|
| 105 |
# Parameters for PopArtLayer:
|
| 106 |
# ==============================================================================
|
| 107 |
PopArtLayer.beta = 0.0005
|
|
|
|
| 111 |
# ==============================================================================
|
| 112 |
# None.
|
| 113 |
|
| 114 |
+
# Parameters for SigmaReparam:
|
| 115 |
+
# ==============================================================================
|
| 116 |
+
SigmaReparam.bias = True
|
| 117 |
+
SigmaReparam.fast_init = False
|
| 118 |
+
|
| 119 |
# Parameters for TformerTrajEncoder:
|
| 120 |
# ==============================================================================
|
| 121 |
TformerTrajEncoder.activation = 'leaky_relu'
|
| 122 |
+
TformerTrajEncoder.attention_type = @transformer.FlashAttention
|
| 123 |
TformerTrajEncoder.d_ff = 3072
|
| 124 |
TformerTrajEncoder.d_model = 768
|
| 125 |
TformerTrajEncoder.dropout_attn = 0.0
|
|
|
|
| 128 |
TformerTrajEncoder.dropout_qkv = 0.0
|
| 129 |
TformerTrajEncoder.head_scaling = True
|
| 130 |
TformerTrajEncoder.n_heads = 12
|
| 131 |
+
TformerTrajEncoder.n_layers = 8
|
| 132 |
TformerTrajEncoder.norm = 'layer'
|
| 133 |
TformerTrajEncoder.normformer_norms = True
|
| 134 |
TformerTrajEncoder.pos_emb = 'fixed'
|
| 135 |
+
TformerTrajEncoder.sigma_reparam = True
|
|
|
|
|
|
|
|
|
|
|
|
alakazam/ckpts/latest/policy.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:263e04fa26f1fde0df2fbe959f94b18245a3387d98918701f9f532202c4f888f
|
| 3 |
+
size 304148153
|
alakazam/ckpts/policy_weights/policy_epoch_0.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fe0bcfa730996a37c126e4e7bd52d57f756293ba71ed1c458f9c5ab755d6dc9
|
| 3 |
+
size 304154601
|
alakazam/ckpts/policy_weights/policy_epoch_10.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51a69dc023d1f1068858bfe89b7aaafdb7b609ca18aa8a26a06f9e9a72a8722b
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_12.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d82e78b0c9e622308056217288419278d1fdc94a14a0aaf8ad06e56b6e7eb3b
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_14.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b258755bd773a01cfeb2fe87fd5512a2559aec9cff05220c4bfb41627cac61de
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_16.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:422c854d0c144b43a851a09b4da32f674b207ca7b8f8eeb3c5d2e59431c4ef9f
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_18.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bba20b8d8dd3b6429f7c532120e55a34737a5f6379ec5179de897ceed3ad3737
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_2.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f72fb7645fa54b8fb4ae83f94444f3da49be1220f3a8ccd051375c1ce25541d
|
| 3 |
+
size 304154601
|
alakazam/ckpts/policy_weights/policy_epoch_20.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6639d34acc91cd0eddaa86028875bf6b59813e3a5faf64ea89fb85c35e16aef
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_22.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:118f44a65b00d1dac61ca0cf4ae5ec9e136bced8a5a748b5aa3dfadcb2da5981
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_24.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14c16320f64608e8700d50280587b9038e26b49180d04e15e98b6a60f4d55217
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_26.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f26608bec87988179827df538c0f29d3df551a8da1942c0a9f59d66e8307b3cf
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_28.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77320c39c973ce8413461708b59d0fdb19a84413ab06b86aef5ee44aa0e2e78c
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_30.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a08830d881f36926d79d20c14bd82e16563385d9f7c0b724608851419d0d6e68
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_32.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79b66f40eb8da147158d506579ddd0cebc5959e38c70ee4d853e17c234e68f05
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_34.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fd288a05ae901b98e6685b6a656190f50cb8d4eff614055527482063ea3841a
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_36.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:023787563c166b5a163f3c9b13eb8fa7ecbd8089fe1a850ca412a82c1c35b1ba
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_38.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61089d65dc794f5326a1de700363c201b6ac6de6d424363635fb87af0dad8835
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_4.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12d35584822a75d9b3886a1b9405a847e421386338c7398ea5ed40117b72d886
|
| 3 |
+
size 304154601
|
alakazam/ckpts/policy_weights/policy_epoch_40.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e6849caa9426c47885fb2834056d244b3137d3235bd30f0ec87309ec95641d4
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_42.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1cf6c7e5679788f20cebc4cabc3414c297e61a1b60b5fc875503bfd2da4e577
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_44.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:177d63ce5452caf3e81809c0cf211e8816be8b79ae56f5a87a7d48d5a6587556
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_46.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48764d855cded18a77977f198f3915991ba86e25a1f3ae5e9755b7c00a177ece
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_48.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a16589fc9feb4da418b9c75bc86bde419f9a83b0ee2506a4e172580d3f4cb13
|
| 3 |
+
size 304155119
|
alakazam/ckpts/policy_weights/policy_epoch_50.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8b0db60998cb863039edaf0c31fdf0e409dcc11ac7fe36b3bd8b651e2e046e9
|
| 3 |
+
size 304155119
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_52.pt
RENAMED
|
File without changes
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_54.pt
RENAMED
|
File without changes
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_56.pt
RENAMED
|
File without changes
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_58.pt
RENAMED
|
File without changes
|
alakazam/ckpts/policy_weights/policy_epoch_6.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44d3bfcd674a29a7ea4e114103e4b4b7253a819b4b26ab349d91b00515dd67b2
|
| 3 |
+
size 304154601
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_60.pt
RENAMED
|
File without changes
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_62.pt
RENAMED
|
File without changes
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_64.pt
RENAMED
|
File without changes
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_66.pt
RENAMED
|
File without changes
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_68.pt
RENAMED
|
File without changes
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_70.pt
RENAMED
|
File without changes
|
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_72.pt
RENAMED
|
File without changes
|
alakazam/ckpts/policy_weights/policy_epoch_8.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47a98516df7b346118fa824f9137e6aaaa1428d5a7150af01bf606c1ea935944
|
| 3 |
+
size 304154601
|
{alakazam → kadabra2}/alakazam/ckpts/config.txt
RENAMED
|
File without changes
|
{alakazam_patched → kadabra2}/ckpts/config.txt
RENAMED
|
@@ -8,6 +8,13 @@ import amago.nets.transformer as amago5
|
|
| 8 |
# ==============================================================================
|
| 9 |
# None.
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
# Parameters for Discrete:
|
| 12 |
# ==============================================================================
|
| 13 |
Discrete.clip_prob_high = 0.99
|
|
@@ -23,10 +30,6 @@ Experiment.stagger_traj_file_lengths = True
|
|
| 23 |
Experiment.traj_save_len = 10000000000.0
|
| 24 |
Experiment.wandb_group_name = None
|
| 25 |
|
| 26 |
-
# Parameters for FlashAttention:
|
| 27 |
-
# ==============================================================================
|
| 28 |
-
FlashAttention.window_size = (96, 0)
|
| 29 |
-
|
| 30 |
# Parameters for MetamonAMAGOExperiment:
|
| 31 |
# ==============================================================================
|
| 32 |
MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
|
|
@@ -34,33 +37,32 @@ MetamonAMAGOExperiment.critic_loss_weight = 13.5
|
|
| 34 |
MetamonAMAGOExperiment.grad_clip = 1.5
|
| 35 |
MetamonAMAGOExperiment.l2_coeff = 0.0001
|
| 36 |
MetamonAMAGOExperiment.learning_rate = 0.000125
|
| 37 |
-
MetamonAMAGOExperiment.lr_warmup_steps =
|
| 38 |
-
MetamonAMAGOExperiment.max_seq_len =
|
| 39 |
MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
|
| 40 |
-
MetamonAMAGOExperiment.tstep_encoder_type = @
|
| 41 |
|
| 42 |
# Parameters for MetamonMaskedResidualActor:
|
| 43 |
# ==============================================================================
|
| 44 |
MetamonMaskedResidualActor.activation = 'leaky_relu'
|
| 45 |
MetamonMaskedResidualActor.continuous_dist_type = None
|
| 46 |
MetamonMaskedResidualActor.dropout_p = 0.0
|
| 47 |
-
MetamonMaskedResidualActor.feature_dim =
|
| 48 |
MetamonMaskedResidualActor.mask_illegal_actions = True
|
| 49 |
MetamonMaskedResidualActor.normalization = 'layer'
|
| 50 |
MetamonMaskedResidualActor.residual_blocks = 2
|
| 51 |
MetamonMaskedResidualActor.residual_ff_dim = 512
|
| 52 |
|
| 53 |
-
# Parameters for
|
| 54 |
# ==============================================================================
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
MetamonPerceiverTstepEncoder.token_mask_aug = False
|
| 64 |
|
| 65 |
# Parameters for Multigammas:
|
| 66 |
# ==============================================================================
|
|
@@ -79,7 +81,7 @@ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4
|
|
| 79 |
MultiTaskAgent.num_critics = 6
|
| 80 |
MultiTaskAgent.num_critics_td = 2
|
| 81 |
MultiTaskAgent.offline_coeff = 1.0
|
| 82 |
-
MultiTaskAgent.online_coeff = 0.
|
| 83 |
MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
|
| 84 |
MultiTaskAgent.popart = True
|
| 85 |
MultiTaskAgent.reward_multiplier = 10.0
|
|
@@ -98,10 +100,6 @@ NCriticsTwoHot.n_layers = 2
|
|
| 98 |
NCriticsTwoHot.output_bins = 96
|
| 99 |
NCriticsTwoHot.use_symlog = False
|
| 100 |
|
| 101 |
-
# Parameters for PerceiverTurnEmbedding:
|
| 102 |
-
# ==============================================================================
|
| 103 |
-
# None.
|
| 104 |
-
|
| 105 |
# Parameters for PopArtLayer:
|
| 106 |
# ==============================================================================
|
| 107 |
PopArtLayer.beta = 0.0005
|
|
@@ -111,15 +109,10 @@ PopArtLayer.init_nu = 100.0
|
|
| 111 |
# ==============================================================================
|
| 112 |
# None.
|
| 113 |
|
| 114 |
-
# Parameters for SigmaReparam:
|
| 115 |
-
# ==============================================================================
|
| 116 |
-
SigmaReparam.bias = True
|
| 117 |
-
SigmaReparam.fast_init = False
|
| 118 |
-
|
| 119 |
# Parameters for TformerTrajEncoder:
|
| 120 |
# ==============================================================================
|
| 121 |
TformerTrajEncoder.activation = 'leaky_relu'
|
| 122 |
-
TformerTrajEncoder.attention_type = @transformer.
|
| 123 |
TformerTrajEncoder.d_ff = 3072
|
| 124 |
TformerTrajEncoder.d_model = 768
|
| 125 |
TformerTrajEncoder.dropout_attn = 0.0
|
|
@@ -128,8 +121,12 @@ TformerTrajEncoder.dropout_ff = 0.05
|
|
| 128 |
TformerTrajEncoder.dropout_qkv = 0.0
|
| 129 |
TformerTrajEncoder.head_scaling = True
|
| 130 |
TformerTrajEncoder.n_heads = 12
|
| 131 |
-
TformerTrajEncoder.n_layers =
|
| 132 |
TformerTrajEncoder.norm = 'layer'
|
| 133 |
TformerTrajEncoder.normformer_norms = True
|
| 134 |
TformerTrajEncoder.pos_emb = 'fixed'
|
| 135 |
-
TformerTrajEncoder.sigma_reparam =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# ==============================================================================
|
| 9 |
# None.
|
| 10 |
|
| 11 |
+
# Parameters for ClippedSlidingSinkAttention:
|
| 12 |
+
# ==============================================================================
|
| 13 |
+
ClippedSlidingSinkAttention.logit_clip = 50
|
| 14 |
+
ClippedSlidingSinkAttention.sink_bias = 0.0
|
| 15 |
+
ClippedSlidingSinkAttention.sink_size = 5
|
| 16 |
+
ClippedSlidingSinkAttention.window_size = 96
|
| 17 |
+
|
| 18 |
# Parameters for Discrete:
|
| 19 |
# ==============================================================================
|
| 20 |
Discrete.clip_prob_high = 0.99
|
|
|
|
| 30 |
Experiment.traj_save_len = 10000000000.0
|
| 31 |
Experiment.wandb_group_name = None
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# Parameters for MetamonAMAGOExperiment:
|
| 34 |
# ==============================================================================
|
| 35 |
MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
|
|
|
|
| 37 |
MetamonAMAGOExperiment.grad_clip = 1.5
|
| 38 |
MetamonAMAGOExperiment.l2_coeff = 0.0001
|
| 39 |
MetamonAMAGOExperiment.learning_rate = 0.000125
|
| 40 |
+
MetamonAMAGOExperiment.lr_warmup_steps = 1500
|
| 41 |
+
MetamonAMAGOExperiment.max_seq_len = 200
|
| 42 |
MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
|
| 43 |
+
MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder
|
| 44 |
|
| 45 |
# Parameters for MetamonMaskedResidualActor:
|
| 46 |
# ==============================================================================
|
| 47 |
MetamonMaskedResidualActor.activation = 'leaky_relu'
|
| 48 |
MetamonMaskedResidualActor.continuous_dist_type = None
|
| 49 |
MetamonMaskedResidualActor.dropout_p = 0.0
|
| 50 |
+
MetamonMaskedResidualActor.feature_dim = 256
|
| 51 |
MetamonMaskedResidualActor.mask_illegal_actions = True
|
| 52 |
MetamonMaskedResidualActor.normalization = 'layer'
|
| 53 |
MetamonMaskedResidualActor.residual_blocks = 2
|
| 54 |
MetamonMaskedResidualActor.residual_ff_dim = 512
|
| 55 |
|
| 56 |
+
# Parameters for MetamonTstepEncoder:
|
| 57 |
# ==============================================================================
|
| 58 |
+
MetamonTstepEncoder.d_model = 108
|
| 59 |
+
MetamonTstepEncoder.dropout = 0.05
|
| 60 |
+
MetamonTstepEncoder.extra_emb_dim = 18
|
| 61 |
+
MetamonTstepEncoder.n_heads = 6
|
| 62 |
+
MetamonTstepEncoder.n_layers = 4
|
| 63 |
+
MetamonTstepEncoder.numerical_tokens = 6
|
| 64 |
+
MetamonTstepEncoder.scratch_tokens = 6
|
| 65 |
+
MetamonTstepEncoder.token_mask_aug = False
|
|
|
|
| 66 |
|
| 67 |
# Parameters for Multigammas:
|
| 68 |
# ==============================================================================
|
|
|
|
| 81 |
MultiTaskAgent.num_critics = 6
|
| 82 |
MultiTaskAgent.num_critics_td = 2
|
| 83 |
MultiTaskAgent.offline_coeff = 1.0
|
| 84 |
+
MultiTaskAgent.online_coeff = 0.1
|
| 85 |
MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
|
| 86 |
MultiTaskAgent.popart = True
|
| 87 |
MultiTaskAgent.reward_multiplier = 10.0
|
|
|
|
| 100 |
NCriticsTwoHot.output_bins = 96
|
| 101 |
NCriticsTwoHot.use_symlog = False
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
# Parameters for PopArtLayer:
|
| 104 |
# ==============================================================================
|
| 105 |
PopArtLayer.beta = 0.0005
|
|
|
|
| 109 |
# ==============================================================================
|
| 110 |
# None.
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# Parameters for TformerTrajEncoder:
|
| 113 |
# ==============================================================================
|
| 114 |
TformerTrajEncoder.activation = 'leaky_relu'
|
| 115 |
+
TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention
|
| 116 |
TformerTrajEncoder.d_ff = 3072
|
| 117 |
TformerTrajEncoder.d_model = 768
|
| 118 |
TformerTrajEncoder.dropout_attn = 0.0
|
|
|
|
| 121 |
TformerTrajEncoder.dropout_qkv = 0.0
|
| 122 |
TformerTrajEncoder.head_scaling = True
|
| 123 |
TformerTrajEncoder.n_heads = 12
|
| 124 |
+
TformerTrajEncoder.n_layers = 6
|
| 125 |
TformerTrajEncoder.norm = 'layer'
|
| 126 |
TformerTrajEncoder.normformer_norms = True
|
| 127 |
TformerTrajEncoder.pos_emb = 'fixed'
|
| 128 |
+
TformerTrajEncoder.sigma_reparam = False
|
| 129 |
+
|
| 130 |
+
# Parameters for TransformerTurnEmbedding:
|
| 131 |
+
# ==============================================================================
|
| 132 |
+
# None.
|
{alakazam_patched → kadabra2}/ckpts/latest/policy.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68b9d7b7dbb389dc8fb66e92392384feb2a11b302b6e1130bfe32790cd1c177c
|
| 3 |
+
size 233767193
|
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_0.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23066ec5e029b29b95d8c2a41c18e3793a69a765bfd630814b5eaaf5b290bf65
|
| 3 |
+
size 233772873
|
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_10.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb1dee5aa36e76bacdf1bb72c5428fa4c08865756573a27192f632df6b5634a3
|
| 3 |
+
size 233773103
|
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_12.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ed66cb98aa85736f05b94c939dc22e1885570d33326bf413bf73b938d26bca5
|
| 3 |
+
size 233773103
|
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_14.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de4aa18f93025d31bba32170207c1c09c712dd9b4ca0bd411bb23b3297900e01
|
| 3 |
+
size 233773103
|
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_16.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a07b872b14ff15a303a37a05beef6ffd72e56871056b465bfd012260b579b0f8
|
| 3 |
+
size 233773103
|
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_18.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4cffb486f4256c6a82be7df6ac68e275f4a121d2516e37a6f5ac6bbdd0608d56
|
| 3 |
+
size 233773103
|
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_2.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92421ef958cb85f73f43172431d7d648103efde6bddb6cc128258c284f874bd7
|
| 3 |
+
size 233772873
|
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_20.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fe5a8dc3a349176445ae8a82ab605180de6bdfd431a6c692eb38d4a16a74b8b
|
| 3 |
+
size 233773103
|