diff --git a/alakazam/ckpts/config.txt b/alakazam/ckpts/config.txt index f1961e50085a6f3a24d443740fda8bd81080eb12..7664a71d55595a33192386637ee1c3b1682b53a3 100755 --- a/alakazam/ckpts/config.txt +++ b/alakazam/ckpts/config.txt @@ -8,13 +8,6 @@ import amago.nets.transformer as amago5 # ============================================================================== # None. -# Parameters for ClippedSlidingSinkAttention: -# ============================================================================== -ClippedSlidingSinkAttention.logit_clip = 50 -ClippedSlidingSinkAttention.sink_bias = 0.0 -ClippedSlidingSinkAttention.sink_size = 5 -ClippedSlidingSinkAttention.window_size = 96 - # Parameters for Discrete: # ============================================================================== Discrete.clip_prob_high = 0.99 @@ -30,6 +23,10 @@ Experiment.stagger_traj_file_lengths = True Experiment.traj_save_len = 10000000000.0 Experiment.wandb_group_name = None +# Parameters for FlashAttention: +# ============================================================================== +FlashAttention.window_size = (96, 0) + # Parameters for MetamonAMAGOExperiment: # ============================================================================== MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent @@ -37,32 +34,33 @@ MetamonAMAGOExperiment.critic_loss_weight = 13.5 MetamonAMAGOExperiment.grad_clip = 1.5 MetamonAMAGOExperiment.l2_coeff = 0.0001 MetamonAMAGOExperiment.learning_rate = 0.000125 -MetamonAMAGOExperiment.lr_warmup_steps = 1500 -MetamonAMAGOExperiment.max_seq_len = 200 +MetamonAMAGOExperiment.lr_warmup_steps = 2000 +MetamonAMAGOExperiment.max_seq_len = 128 MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder -MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder # Parameters for MetamonMaskedResidualActor: # ============================================================================== MetamonMaskedResidualActor.activation = 'leaky_relu' MetamonMaskedResidualActor.continuous_dist_type = None MetamonMaskedResidualActor.dropout_p = 0.0 -MetamonMaskedResidualActor.feature_dim = 256 +MetamonMaskedResidualActor.feature_dim = 450 MetamonMaskedResidualActor.mask_illegal_actions = True MetamonMaskedResidualActor.normalization = 'layer' MetamonMaskedResidualActor.residual_blocks = 2 MetamonMaskedResidualActor.residual_ff_dim = 512 -# Parameters for MetamonTstepEncoder: +# Parameters for MetamonPerceiverTstepEncoder: # ============================================================================== -MetamonTstepEncoder.d_model = 108 -MetamonTstepEncoder.dropout = 0.05 -MetamonTstepEncoder.extra_emb_dim = 18 -MetamonTstepEncoder.n_heads = 6 -MetamonTstepEncoder.n_layers = 4 -MetamonTstepEncoder.numerical_tokens = 6 -MetamonTstepEncoder.scratch_tokens = 6 -MetamonTstepEncoder.token_mask_aug = False +MetamonPerceiverTstepEncoder.d_model = 108 +MetamonPerceiverTstepEncoder.dropout = 0.05 +MetamonPerceiverTstepEncoder.extra_emb_dim = 18 +MetamonPerceiverTstepEncoder.latent_tokens = 8 +MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128 +MetamonPerceiverTstepEncoder.n_heads = 6 +MetamonPerceiverTstepEncoder.n_layers = 8 +MetamonPerceiverTstepEncoder.numerical_tokens = 6 +MetamonPerceiverTstepEncoder.token_mask_aug = False # Parameters for Multigammas: # ============================================================================== @@ -81,7 +79,7 @@ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 MultiTaskAgent.num_critics = 6 MultiTaskAgent.num_critics_td = 2 MultiTaskAgent.offline_coeff = 1.0 -MultiTaskAgent.online_coeff = 0.1 +MultiTaskAgent.online_coeff = 0.2 MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] MultiTaskAgent.popart = True MultiTaskAgent.reward_multiplier = 10.0 @@ -100,6 +98,10 @@ NCriticsTwoHot.n_layers = 2 NCriticsTwoHot.output_bins = 96 NCriticsTwoHot.use_symlog = False +# Parameters for PerceiverTurnEmbedding: +# ============================================================================== +# None. + # Parameters for PopArtLayer: # ============================================================================== PopArtLayer.beta = 0.0005 @@ -109,10 +111,15 @@ PopArtLayer.init_nu = 100.0 # ============================================================================== # None. +# Parameters for SigmaReparam: +# ============================================================================== +SigmaReparam.bias = True +SigmaReparam.fast_init = False + # Parameters for TformerTrajEncoder: # ============================================================================== TformerTrajEncoder.activation = 'leaky_relu' -TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention +TformerTrajEncoder.attention_type = @transformer.FlashAttention TformerTrajEncoder.d_ff = 3072 TformerTrajEncoder.d_model = 768 TformerTrajEncoder.dropout_attn = 0.0 @@ -121,12 +128,8 @@ TformerTrajEncoder.dropout_ff = 0.05 TformerTrajEncoder.dropout_qkv = 0.0 TformerTrajEncoder.head_scaling = True TformerTrajEncoder.n_heads = 12 -TformerTrajEncoder.n_layers = 6 +TformerTrajEncoder.n_layers = 8 TformerTrajEncoder.norm = 'layer' TformerTrajEncoder.normformer_norms = True TformerTrajEncoder.pos_emb = 'fixed' -TformerTrajEncoder.sigma_reparam = False - -# Parameters for TransformerTurnEmbedding: -# ============================================================================== -# None. +TformerTrajEncoder.sigma_reparam = True diff --git a/alakazam/ckpts/latest/policy.pt b/alakazam/ckpts/latest/policy.pt index d3c0c7e685f9de430f82275c0f7c7d26b93d5a78..e4b2feafa94abadfdf9edfc5e4292fb666d305ed 100755 --- a/alakazam/ckpts/latest/policy.pt +++ b/alakazam/ckpts/latest/policy.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68b9d7b7dbb389dc8fb66e92392384feb2a11b302b6e1130bfe32790cd1c177c -size 233767193 +oid sha256:263e04fa26f1fde0df2fbe959f94b18245a3387d98918701f9f532202c4f888f +size 304148153 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_0.pt b/alakazam/ckpts/policy_weights/policy_epoch_0.pt index 67b27f7269a0793e722ecb4731c8d9bb171ec071..41605cbe5975d3660df6e5cd70fb1a2a7c1020e2 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_0.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_0.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23066ec5e029b29b95d8c2a41c18e3793a69a765bfd630814b5eaaf5b290bf65 -size 233772873 +oid sha256:8fe0bcfa730996a37c126e4e7bd52d57f756293ba71ed1c458f9c5ab755d6dc9 +size 304154601 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_10.pt b/alakazam/ckpts/policy_weights/policy_epoch_10.pt index 9e44cc543c4ce9336131291ef5c517d502b97b50..838b2fb27b02947686f480c27271c21dd3196c1c 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_10.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_10.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb1dee5aa36e76bacdf1bb72c5428fa4c08865756573a27192f632df6b5634a3 -size 233773103 +oid sha256:51a69dc023d1f1068858bfe89b7aaafdb7b609ca18aa8a26a06f9e9a72a8722b +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_12.pt b/alakazam/ckpts/policy_weights/policy_epoch_12.pt index dee1392283f80302f90c1defa73ba8c21c739389..336d3e09948360bb71b83b075c6f42d47b6130f8 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_12.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_12.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ed66cb98aa85736f05b94c939dc22e1885570d33326bf413bf73b938d26bca5 -size 233773103 +oid sha256:5d82e78b0c9e622308056217288419278d1fdc94a14a0aaf8ad06e56b6e7eb3b +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_14.pt b/alakazam/ckpts/policy_weights/policy_epoch_14.pt index fae750487bffb29190849ee9aba34a16088c4648..c71d7501c4aa816f128636c203b5f39f835a5261 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_14.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_14.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de4aa18f93025d31bba32170207c1c09c712dd9b4ca0bd411bb23b3297900e01 -size 233773103 +oid sha256:b258755bd773a01cfeb2fe87fd5512a2559aec9cff05220c4bfb41627cac61de +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_16.pt b/alakazam/ckpts/policy_weights/policy_epoch_16.pt index 2749bfbc503f72b1d8e755b0a3e4dfbc6c491561..383cbd6d7f60da34507caacfed71e8753e78e49d 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_16.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_16.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a07b872b14ff15a303a37a05beef6ffd72e56871056b465bfd012260b579b0f8 -size 233773103 +oid sha256:422c854d0c144b43a851a09b4da32f674b207ca7b8f8eeb3c5d2e59431c4ef9f +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_18.pt b/alakazam/ckpts/policy_weights/policy_epoch_18.pt index 9e804ac1b0eeb482669093eb00b7cb7b592155d8..2c5275ae5560c5bba0de71fb1563705a5b2b5140 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_18.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_18.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cffb486f4256c6a82be7df6ac68e275f4a121d2516e37a6f5ac6bbdd0608d56 -size 233773103 +oid sha256:bba20b8d8dd3b6429f7c532120e55a34737a5f6379ec5179de897ceed3ad3737 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_2.pt b/alakazam/ckpts/policy_weights/policy_epoch_2.pt index c6dd8b1bc34560f82ebb242d3a2c50f94c29b15e..08cb5ce94501339ed342c2cd3fe779f735e9e05e 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_2.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_2.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92421ef958cb85f73f43172431d7d648103efde6bddb6cc128258c284f874bd7 -size 233772873 +oid sha256:7f72fb7645fa54b8fb4ae83f94444f3da49be1220f3a8ccd051375c1ce25541d +size 304154601 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_20.pt b/alakazam/ckpts/policy_weights/policy_epoch_20.pt index d4660356d646e858a4f12c509f8bcfbd79a0ac54..11ec3e954421ffeed8d70d85b0c468910d2252de 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_20.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_20.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9fe5a8dc3a349176445ae8a82ab605180de6bdfd431a6c692eb38d4a16a74b8b -size 233773103 +oid sha256:e6639d34acc91cd0eddaa86028875bf6b59813e3a5faf64ea89fb85c35e16aef +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_22.pt b/alakazam/ckpts/policy_weights/policy_epoch_22.pt index 6f8a486c773c10fdc24f8dd08e25fb6bfb9923fd..b26d92e754bf944f1486cc84317d8cb9eb157472 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_22.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_22.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbc1c4c1091337fa7eeb24adff88405d0794bc319ad536335922a2327fa44a68 -size 233773103 +oid sha256:118f44a65b00d1dac61ca0cf4ae5ec9e136bced8a5a748b5aa3dfadcb2da5981 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_24.pt b/alakazam/ckpts/policy_weights/policy_epoch_24.pt index cdd3e43a1398d0ffdcc867fccb6cc35505595d26..200f313367ac707617831725b9dfe6d3f1625193 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_24.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_24.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d85d042459182e9510e5c1d806fc7bec25510d76af846d87b6c6612e8d55c4d -size 233773103 +oid sha256:14c16320f64608e8700d50280587b9038e26b49180d04e15e98b6a60f4d55217 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_26.pt b/alakazam/ckpts/policy_weights/policy_epoch_26.pt index ccc18af63284b2de688d55b0cc1f87e029a280f8..f7580774bd3f9e317bd6a18bea3b45ade1088270 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_26.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_26.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:134397c7107ce1278bf2021e571aa01340bb59270c17b7f4b052e79aee903e88 -size 233773103 +oid sha256:f26608bec87988179827df538c0f29d3df551a8da1942c0a9f59d66e8307b3cf +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_28.pt b/alakazam/ckpts/policy_weights/policy_epoch_28.pt index 8c48e56a4b009dc1d70f13df8223bcfc12fad18f..912d7aa7d3ee571a1f81f49d7c006834be0b3e8d 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_28.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_28.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:864d54aeeb99b5a78c9c2b447a4d20427c1f89ae0bff57476fe7ec14e9cc331d -size 233773103 +oid sha256:77320c39c973ce8413461708b59d0fdb19a84413ab06b86aef5ee44aa0e2e78c +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_30.pt b/alakazam/ckpts/policy_weights/policy_epoch_30.pt index dc4678c3131be02b264168c8d4843f119d56f91a..1555c03589456656d88065f24a4a4caed57a99d6 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_30.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_30.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0549977f771affff1b8509e44f24bf48990fea824ae4d287090442a06634a637 -size 233773103 +oid sha256:a08830d881f36926d79d20c14bd82e16563385d9f7c0b724608851419d0d6e68 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_32.pt b/alakazam/ckpts/policy_weights/policy_epoch_32.pt index d67ff08f13f0ade509f80035083a7c2e77878e8b..e916e063f725b0a4ccfa370884bb7cf0ab71c748 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_32.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_32.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04fd9a00c67e19b3aceff2248a6e5ba12aba59d56db9731ee40b6d03f5976a84 -size 233773103 +oid sha256:79b66f40eb8da147158d506579ddd0cebc5959e38c70ee4d853e17c234e68f05 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_34.pt b/alakazam/ckpts/policy_weights/policy_epoch_34.pt index 482da30d51e97b2db952abdb62792f40ef3f3fdd..e31f38c6a3aac3e2c34f5358b2d46756bb0db672 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_34.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_34.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:35e3edcc5115e7e0911cf63171a0daa7f103ef5efb1235b19e09d8cc403921e1 -size 233773103 +oid sha256:7fd288a05ae901b98e6685b6a656190f50cb8d4eff614055527482063ea3841a +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_36.pt b/alakazam/ckpts/policy_weights/policy_epoch_36.pt index 544f69fff7c1b40353c980dd01b26c5f1fc19782..7ffcb86ae20c596379ecc68b117eb2d32ef2345d 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_36.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_36.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68e79bf875d6ed5c0303504509f35d451c95bd1155f38b7dbe541c96dc8b4227 -size 233773103 +oid sha256:023787563c166b5a163f3c9b13eb8fa7ecbd8089fe1a850ca412a82c1c35b1ba +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_38.pt b/alakazam/ckpts/policy_weights/policy_epoch_38.pt index 32cca650f7afb80e9d409472cb8e4d7fb17a6195..708f1fbeb5672bfe243ce17c2fcba21b99aee09d 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_38.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_38.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f7c199ee69f72250c88d4d5ff69efd2931bbfad19fd3304bf7669cccbe0060b -size 233773103 +oid sha256:61089d65dc794f5326a1de700363c201b6ac6de6d424363635fb87af0dad8835 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_4.pt b/alakazam/ckpts/policy_weights/policy_epoch_4.pt index 3f3260378f9e212773beb752a6bca1da9608316a..51b1757c71d57fed35c94f448dff6c8a6159cc25 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_4.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_4.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3af3e365fdc6db33abd761b7c192d84f4dad32adf0ab7979229c799d64a0fbce -size 233772873 +oid sha256:12d35584822a75d9b3886a1b9405a847e421386338c7398ea5ed40117b72d886 +size 304154601 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_40.pt b/alakazam/ckpts/policy_weights/policy_epoch_40.pt index 97c17107b0718930317a5f8dc35b5ae1eec73a7e..ec0cce54e58d9d03283424966eb750e6899b78d6 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_40.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_40.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d14a7aecd49e3a201d9354f17c7d0b3d5161b8d1546168b31d878a0818b7e82 -size 233773103 +oid sha256:7e6849caa9426c47885fb2834056d244b3137d3235bd30f0ec87309ec95641d4 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_42.pt b/alakazam/ckpts/policy_weights/policy_epoch_42.pt index 387e80fe9316d8f25a56bc48992d6566fe6f58fd..70675eb826ad30649c0c0d226f79714600d5979d 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_42.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_42.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:468d24a4ae25a6d5a1aa58e9234403259c8d4e544727da5f1f7c3083f8c6af66 -size 233773103 +oid sha256:c1cf6c7e5679788f20cebc4cabc3414c297e61a1b60b5fc875503bfd2da4e577 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_44.pt b/alakazam/ckpts/policy_weights/policy_epoch_44.pt index f391934d8512fad663283193b3c6a6289ee3d2ae..ab04ea1ac651c6211ceb9b6d6060e4cc474daa2a 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_44.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_44.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba5a20cd3df727a11681e4a554ce8adc9e6cf5f81d38f0a59a3e661a987ec7f2 -size 233773103 +oid sha256:177d63ce5452caf3e81809c0cf211e8816be8b79ae56f5a87a7d48d5a6587556 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_46.pt b/alakazam/ckpts/policy_weights/policy_epoch_46.pt index 8bea842695a5b087237cebe75f6521c0d0f24528..a6299f712df1473104482e4735ad973278eac2bf 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_46.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_46.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc4ae28f50214281a494bebce96b85d3b58b20b07b7d742e9bb72f3f4f983c50 -size 233773103 +oid sha256:48764d855cded18a77977f198f3915991ba86e25a1f3ae5e9755b7c00a177ece +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_48.pt b/alakazam/ckpts/policy_weights/policy_epoch_48.pt index 4f1f42312454800f92eb9bc968ca2724c45ab48c..c0433a8073aab27d2a2848206cc15cce4c9818b2 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_48.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_48.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d8f9eff4f41fc552c5e154fa0659bcf6b4038706f166d34d35370be614df3cd -size 233773103 +oid sha256:4a16589fc9feb4da418b9c75bc86bde419f9a83b0ee2506a4e172580d3f4cb13 +size 304155119 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_50.pt b/alakazam/ckpts/policy_weights/policy_epoch_50.pt index 175fee701df731ec7c3ab87a7373b2b404b79878..52413054b337ac18b73056f581791e3e1a0c83e0 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_50.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_50.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af3a6fe709abf8c057c43c8e09672c9614a29808ef7cdf3277ed98a2502cd933 -size 233773103 +oid sha256:f8b0db60998cb863039edaf0c31fdf0e409dcc11ac7fe36b3bd8b651e2e046e9 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_52.pt b/alakazam/ckpts/policy_weights/policy_epoch_52.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_52.pt rename to alakazam/ckpts/policy_weights/policy_epoch_52.pt diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_54.pt b/alakazam/ckpts/policy_weights/policy_epoch_54.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_54.pt rename to alakazam/ckpts/policy_weights/policy_epoch_54.pt diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_56.pt b/alakazam/ckpts/policy_weights/policy_epoch_56.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_56.pt rename to alakazam/ckpts/policy_weights/policy_epoch_56.pt diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_58.pt b/alakazam/ckpts/policy_weights/policy_epoch_58.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_58.pt rename to alakazam/ckpts/policy_weights/policy_epoch_58.pt diff --git a/alakazam/ckpts/policy_weights/policy_epoch_6.pt b/alakazam/ckpts/policy_weights/policy_epoch_6.pt index 83c467d590c0fc3db3bb71475558245196bc6dac..b0d1e0bf3ec01366c1ec9df47c4fc7b146369fb5 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_6.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_6.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cc117703af7788d33cb011bbffd621ac26467298020e39262e22511dfba9751 -size 233772873 +oid sha256:44d3bfcd674a29a7ea4e114103e4b4b7253a819b4b26ab349d91b00515dd67b2 +size 304154601 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_60.pt b/alakazam/ckpts/policy_weights/policy_epoch_60.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_60.pt rename to alakazam/ckpts/policy_weights/policy_epoch_60.pt diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_62.pt b/alakazam/ckpts/policy_weights/policy_epoch_62.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_62.pt rename to alakazam/ckpts/policy_weights/policy_epoch_62.pt diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_64.pt b/alakazam/ckpts/policy_weights/policy_epoch_64.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_64.pt rename to alakazam/ckpts/policy_weights/policy_epoch_64.pt diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_66.pt b/alakazam/ckpts/policy_weights/policy_epoch_66.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_66.pt rename to alakazam/ckpts/policy_weights/policy_epoch_66.pt diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_68.pt b/alakazam/ckpts/policy_weights/policy_epoch_68.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_68.pt rename to alakazam/ckpts/policy_weights/policy_epoch_68.pt diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_70.pt b/alakazam/ckpts/policy_weights/policy_epoch_70.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_70.pt rename to alakazam/ckpts/policy_weights/policy_epoch_70.pt diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_72.pt b/alakazam/ckpts/policy_weights/policy_epoch_72.pt similarity index 100% rename from alakazam_patched/ckpts/policy_weights/policy_epoch_72.pt rename to alakazam/ckpts/policy_weights/policy_epoch_72.pt diff --git a/alakazam/ckpts/policy_weights/policy_epoch_8.pt b/alakazam/ckpts/policy_weights/policy_epoch_8.pt index 98cf07c617269ddb562432b4d037ef6f24c4b6b5..f81081abe4b12af4cb0953d3fb5760f8c0085dae 100755 --- a/alakazam/ckpts/policy_weights/policy_epoch_8.pt +++ b/alakazam/ckpts/policy_weights/policy_epoch_8.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:466bb352d16d9191fb32271e4504f43ba8d9aa7115c279006818984f7f1533e0 -size 233772873 +oid sha256:47a98516df7b346118fa824f9137e6aaaa1428d5a7150af01bf606c1ea935944 +size 304154601 diff --git a/alakazam_patched/ckpts/latest/policy.pt b/alakazam_patched/ckpts/latest/policy.pt deleted file mode 100755 index e4b2feafa94abadfdf9edfc5e4292fb666d305ed..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/latest/policy.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:263e04fa26f1fde0df2fbe959f94b18245a3387d98918701f9f532202c4f888f -size 304148153 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_0.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_0.pt deleted file mode 100755 index 41605cbe5975d3660df6e5cd70fb1a2a7c1020e2..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_0.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8fe0bcfa730996a37c126e4e7bd52d57f756293ba71ed1c458f9c5ab755d6dc9 -size 304154601 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_10.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_10.pt deleted file mode 100755 index 838b2fb27b02947686f480c27271c21dd3196c1c..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_10.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:51a69dc023d1f1068858bfe89b7aaafdb7b609ca18aa8a26a06f9e9a72a8722b -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_12.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_12.pt deleted file mode 100755 index 336d3e09948360bb71b83b075c6f42d47b6130f8..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_12.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d82e78b0c9e622308056217288419278d1fdc94a14a0aaf8ad06e56b6e7eb3b -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_14.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_14.pt deleted file mode 100755 index c71d7501c4aa816f128636c203b5f39f835a5261..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_14.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b258755bd773a01cfeb2fe87fd5512a2559aec9cff05220c4bfb41627cac61de -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_16.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_16.pt deleted file mode 100755 index 383cbd6d7f60da34507caacfed71e8753e78e49d..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_16.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:422c854d0c144b43a851a09b4da32f674b207ca7b8f8eeb3c5d2e59431c4ef9f -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_18.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_18.pt deleted file mode 100755 index 2c5275ae5560c5bba0de71fb1563705a5b2b5140..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_18.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bba20b8d8dd3b6429f7c532120e55a34737a5f6379ec5179de897ceed3ad3737 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_2.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_2.pt deleted file mode 100755 index 08cb5ce94501339ed342c2cd3fe779f735e9e05e..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_2.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7f72fb7645fa54b8fb4ae83f94444f3da49be1220f3a8ccd051375c1ce25541d -size 304154601 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_20.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_20.pt deleted file mode 100755 index 11ec3e954421ffeed8d70d85b0c468910d2252de..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_20.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6639d34acc91cd0eddaa86028875bf6b59813e3a5faf64ea89fb85c35e16aef -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_22.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_22.pt deleted file mode 100755 index b26d92e754bf944f1486cc84317d8cb9eb157472..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_22.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:118f44a65b00d1dac61ca0cf4ae5ec9e136bced8a5a748b5aa3dfadcb2da5981 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_24.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_24.pt deleted file mode 100755 index 200f313367ac707617831725b9dfe6d3f1625193..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_24.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:14c16320f64608e8700d50280587b9038e26b49180d04e15e98b6a60f4d55217 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_26.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_26.pt deleted file mode 100755 index f7580774bd3f9e317bd6a18bea3b45ade1088270..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_26.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f26608bec87988179827df538c0f29d3df551a8da1942c0a9f59d66e8307b3cf -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_28.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_28.pt deleted file mode 100755 index 912d7aa7d3ee571a1f81f49d7c006834be0b3e8d..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_28.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:77320c39c973ce8413461708b59d0fdb19a84413ab06b86aef5ee44aa0e2e78c -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_30.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_30.pt deleted file mode 100755 index 1555c03589456656d88065f24a4a4caed57a99d6..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_30.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a08830d881f36926d79d20c14bd82e16563385d9f7c0b724608851419d0d6e68 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_32.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_32.pt deleted file mode 100755 index e916e063f725b0a4ccfa370884bb7cf0ab71c748..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_32.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79b66f40eb8da147158d506579ddd0cebc5959e38c70ee4d853e17c234e68f05 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_34.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_34.pt deleted file mode 100755 index e31f38c6a3aac3e2c34f5358b2d46756bb0db672..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_34.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7fd288a05ae901b98e6685b6a656190f50cb8d4eff614055527482063ea3841a -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_36.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_36.pt deleted file mode 100755 index 7ffcb86ae20c596379ecc68b117eb2d32ef2345d..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_36.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:023787563c166b5a163f3c9b13eb8fa7ecbd8089fe1a850ca412a82c1c35b1ba -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_38.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_38.pt deleted file mode 100755 index 708f1fbeb5672bfe243ce17c2fcba21b99aee09d..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_38.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:61089d65dc794f5326a1de700363c201b6ac6de6d424363635fb87af0dad8835 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_4.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_4.pt deleted file mode 100755 index 51b1757c71d57fed35c94f448dff6c8a6159cc25..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_4.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:12d35584822a75d9b3886a1b9405a847e421386338c7398ea5ed40117b72d886 -size 304154601 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_40.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_40.pt deleted file mode 100755 index ec0cce54e58d9d03283424966eb750e6899b78d6..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_40.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7e6849caa9426c47885fb2834056d244b3137d3235bd30f0ec87309ec95641d4 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_42.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_42.pt deleted file mode 100755 index 70675eb826ad30649c0c0d226f79714600d5979d..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_42.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c1cf6c7e5679788f20cebc4cabc3414c297e61a1b60b5fc875503bfd2da4e577 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_44.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_44.pt deleted file mode 100755 index ab04ea1ac651c6211ceb9b6d6060e4cc474daa2a..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_44.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:177d63ce5452caf3e81809c0cf211e8816be8b79ae56f5a87a7d48d5a6587556 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_46.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_46.pt deleted file mode 100755 index a6299f712df1473104482e4735ad973278eac2bf..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_46.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:48764d855cded18a77977f198f3915991ba86e25a1f3ae5e9755b7c00a177ece -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_48.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_48.pt deleted file mode 100755 index c0433a8073aab27d2a2848206cc15cce4c9818b2..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_48.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4a16589fc9feb4da418b9c75bc86bde419f9a83b0ee2506a4e172580d3f4cb13 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_50.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_50.pt deleted file mode 100755 index 52413054b337ac18b73056f581791e3e1a0c83e0..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_50.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8b0db60998cb863039edaf0c31fdf0e409dcc11ac7fe36b3bd8b651e2e046e9 -size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_6.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_6.pt deleted file mode 100755 index b0d1e0bf3ec01366c1ec9df47c4fc7b146369fb5..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_6.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44d3bfcd674a29a7ea4e114103e4b4b7253a819b4b26ab349d91b00515dd67b2 -size 304154601 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_8.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_8.pt deleted file mode 100755 index f81081abe4b12af4cb0953d3fb5760f8c0085dae..0000000000000000000000000000000000000000 --- a/alakazam_patched/ckpts/policy_weights/policy_epoch_8.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:47a98516df7b346118fa824f9137e6aaaa1428d5a7150af01bf606c1ea935944 -size 304154601 diff --git a/alakazam/alakazam/ckpts/config.txt b/kadabra2/alakazam/ckpts/config.txt similarity index 100% rename from alakazam/alakazam/ckpts/config.txt rename to kadabra2/alakazam/ckpts/config.txt diff --git a/alakazam_patched/ckpts/config.txt b/kadabra2/ckpts/config.txt similarity index 79% rename from alakazam_patched/ckpts/config.txt rename to kadabra2/ckpts/config.txt index 7664a71d55595a33192386637ee1c3b1682b53a3..f1961e50085a6f3a24d443740fda8bd81080eb12 100755 --- a/alakazam_patched/ckpts/config.txt +++ b/kadabra2/ckpts/config.txt @@ -8,6 +8,13 @@ import amago.nets.transformer as amago5 # ============================================================================== # None. +# Parameters for ClippedSlidingSinkAttention: +# ============================================================================== +ClippedSlidingSinkAttention.logit_clip = 50 +ClippedSlidingSinkAttention.sink_bias = 0.0 +ClippedSlidingSinkAttention.sink_size = 5 +ClippedSlidingSinkAttention.window_size = 96 + # Parameters for Discrete: # ============================================================================== Discrete.clip_prob_high = 0.99 @@ -23,10 +30,6 @@ Experiment.stagger_traj_file_lengths = True Experiment.traj_save_len = 10000000000.0 Experiment.wandb_group_name = None -# Parameters for FlashAttention: -# ============================================================================== -FlashAttention.window_size = (96, 0) - # Parameters for MetamonAMAGOExperiment: # ============================================================================== MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent @@ -34,33 +37,32 @@ MetamonAMAGOExperiment.critic_loss_weight = 13.5 MetamonAMAGOExperiment.grad_clip = 1.5 MetamonAMAGOExperiment.l2_coeff = 0.0001 MetamonAMAGOExperiment.learning_rate = 0.000125 -MetamonAMAGOExperiment.lr_warmup_steps = 2000 -MetamonAMAGOExperiment.max_seq_len = 128 +MetamonAMAGOExperiment.lr_warmup_steps = 1500 +MetamonAMAGOExperiment.max_seq_len = 200 MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder -MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder # Parameters for MetamonMaskedResidualActor: # ============================================================================== MetamonMaskedResidualActor.activation = 'leaky_relu' MetamonMaskedResidualActor.continuous_dist_type = None MetamonMaskedResidualActor.dropout_p = 0.0 -MetamonMaskedResidualActor.feature_dim = 450 +MetamonMaskedResidualActor.feature_dim = 256 MetamonMaskedResidualActor.mask_illegal_actions = True MetamonMaskedResidualActor.normalization = 'layer' MetamonMaskedResidualActor.residual_blocks = 2 MetamonMaskedResidualActor.residual_ff_dim = 512 -# Parameters for MetamonPerceiverTstepEncoder: +# Parameters for MetamonTstepEncoder: # ============================================================================== -MetamonPerceiverTstepEncoder.d_model = 108 -MetamonPerceiverTstepEncoder.dropout = 0.05 -MetamonPerceiverTstepEncoder.extra_emb_dim = 18 -MetamonPerceiverTstepEncoder.latent_tokens = 8 -MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128 -MetamonPerceiverTstepEncoder.n_heads = 6 -MetamonPerceiverTstepEncoder.n_layers = 8 -MetamonPerceiverTstepEncoder.numerical_tokens = 6 -MetamonPerceiverTstepEncoder.token_mask_aug = False +MetamonTstepEncoder.d_model = 108 +MetamonTstepEncoder.dropout = 0.05 +MetamonTstepEncoder.extra_emb_dim = 18 +MetamonTstepEncoder.n_heads = 6 +MetamonTstepEncoder.n_layers = 4 +MetamonTstepEncoder.numerical_tokens = 6 +MetamonTstepEncoder.scratch_tokens = 6 +MetamonTstepEncoder.token_mask_aug = False # Parameters for Multigammas: # ============================================================================== @@ -79,7 +81,7 @@ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 MultiTaskAgent.num_critics = 6 MultiTaskAgent.num_critics_td = 2 MultiTaskAgent.offline_coeff = 1.0 -MultiTaskAgent.online_coeff = 0.2 +MultiTaskAgent.online_coeff = 0.1 MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] MultiTaskAgent.popart = True MultiTaskAgent.reward_multiplier = 10.0 @@ -98,10 +100,6 @@ NCriticsTwoHot.n_layers = 2 NCriticsTwoHot.output_bins = 96 NCriticsTwoHot.use_symlog = False -# Parameters for PerceiverTurnEmbedding: -# ============================================================================== -# None. - # Parameters for PopArtLayer: # ============================================================================== PopArtLayer.beta = 0.0005 @@ -111,15 +109,10 @@ PopArtLayer.init_nu = 100.0 # ============================================================================== # None. -# Parameters for SigmaReparam: -# ============================================================================== -SigmaReparam.bias = True -SigmaReparam.fast_init = False - # Parameters for TformerTrajEncoder: # ============================================================================== TformerTrajEncoder.activation = 'leaky_relu' -TformerTrajEncoder.attention_type = @transformer.FlashAttention +TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention TformerTrajEncoder.d_ff = 3072 TformerTrajEncoder.d_model = 768 TformerTrajEncoder.dropout_attn = 0.0 @@ -128,8 +121,12 @@ TformerTrajEncoder.dropout_ff = 0.05 TformerTrajEncoder.dropout_qkv = 0.0 TformerTrajEncoder.head_scaling = True TformerTrajEncoder.n_heads = 12 -TformerTrajEncoder.n_layers = 8 +TformerTrajEncoder.n_layers = 6 TformerTrajEncoder.norm = 'layer' TformerTrajEncoder.normformer_norms = True TformerTrajEncoder.pos_emb = 'fixed' -TformerTrajEncoder.sigma_reparam = True +TformerTrajEncoder.sigma_reparam = False + +# Parameters for TransformerTurnEmbedding: +# ============================================================================== +# None. diff --git a/kadabra2/ckpts/latest/policy.pt b/kadabra2/ckpts/latest/policy.pt new file mode 100755 index 0000000000000000000000000000000000000000..d3c0c7e685f9de430f82275c0f7c7d26b93d5a78 --- /dev/null +++ b/kadabra2/ckpts/latest/policy.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b9d7b7dbb389dc8fb66e92392384feb2a11b302b6e1130bfe32790cd1c177c +size 233767193 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_0.pt b/kadabra2/ckpts/policy_weights/policy_epoch_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..67b27f7269a0793e722ecb4731c8d9bb171ec071 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23066ec5e029b29b95d8c2a41c18e3793a69a765bfd630814b5eaaf5b290bf65 +size 233772873 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_10.pt b/kadabra2/ckpts/policy_weights/policy_epoch_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..9e44cc543c4ce9336131291ef5c517d502b97b50 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb1dee5aa36e76bacdf1bb72c5428fa4c08865756573a27192f632df6b5634a3 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_12.pt b/kadabra2/ckpts/policy_weights/policy_epoch_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..dee1392283f80302f90c1defa73ba8c21c739389 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed66cb98aa85736f05b94c939dc22e1885570d33326bf413bf73b938d26bca5 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_14.pt b/kadabra2/ckpts/policy_weights/policy_epoch_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..fae750487bffb29190849ee9aba34a16088c4648 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de4aa18f93025d31bba32170207c1c09c712dd9b4ca0bd411bb23b3297900e01 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_16.pt b/kadabra2/ckpts/policy_weights/policy_epoch_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..2749bfbc503f72b1d8e755b0a3e4dfbc6c491561 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07b872b14ff15a303a37a05beef6ffd72e56871056b465bfd012260b579b0f8 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_18.pt b/kadabra2/ckpts/policy_weights/policy_epoch_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..9e804ac1b0eeb482669093eb00b7cb7b592155d8 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cffb486f4256c6a82be7df6ac68e275f4a121d2516e37a6f5ac6bbdd0608d56 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_2.pt b/kadabra2/ckpts/policy_weights/policy_epoch_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..c6dd8b1bc34560f82ebb242d3a2c50f94c29b15e --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92421ef958cb85f73f43172431d7d648103efde6bddb6cc128258c284f874bd7 +size 233772873 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_20.pt b/kadabra2/ckpts/policy_weights/policy_epoch_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..d4660356d646e858a4f12c509f8bcfbd79a0ac54 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe5a8dc3a349176445ae8a82ab605180de6bdfd431a6c692eb38d4a16a74b8b +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_22.pt b/kadabra2/ckpts/policy_weights/policy_epoch_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..6f8a486c773c10fdc24f8dd08e25fb6bfb9923fd --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc1c4c1091337fa7eeb24adff88405d0794bc319ad536335922a2327fa44a68 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_24.pt b/kadabra2/ckpts/policy_weights/policy_epoch_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..cdd3e43a1398d0ffdcc867fccb6cc35505595d26 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d85d042459182e9510e5c1d806fc7bec25510d76af846d87b6c6612e8d55c4d +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_26.pt b/kadabra2/ckpts/policy_weights/policy_epoch_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..ccc18af63284b2de688d55b0cc1f87e029a280f8 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:134397c7107ce1278bf2021e571aa01340bb59270c17b7f4b052e79aee903e88 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_28.pt b/kadabra2/ckpts/policy_weights/policy_epoch_28.pt new file mode 100755 index 0000000000000000000000000000000000000000..8c48e56a4b009dc1d70f13df8223bcfc12fad18f --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864d54aeeb99b5a78c9c2b447a4d20427c1f89ae0bff57476fe7ec14e9cc331d +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_30.pt b/kadabra2/ckpts/policy_weights/policy_epoch_30.pt new file mode 100755 index 0000000000000000000000000000000000000000..dc4678c3131be02b264168c8d4843f119d56f91a --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0549977f771affff1b8509e44f24bf48990fea824ae4d287090442a06634a637 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_32.pt b/kadabra2/ckpts/policy_weights/policy_epoch_32.pt new file mode 100755 index 0000000000000000000000000000000000000000..d67ff08f13f0ade509f80035083a7c2e77878e8b --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fd9a00c67e19b3aceff2248a6e5ba12aba59d56db9731ee40b6d03f5976a84 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_34.pt b/kadabra2/ckpts/policy_weights/policy_epoch_34.pt new file mode 100755 index 0000000000000000000000000000000000000000..482da30d51e97b2db952abdb62792f40ef3f3fdd --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e3edcc5115e7e0911cf63171a0daa7f103ef5efb1235b19e09d8cc403921e1 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_36.pt b/kadabra2/ckpts/policy_weights/policy_epoch_36.pt new file mode 100755 index 0000000000000000000000000000000000000000..544f69fff7c1b40353c980dd01b26c5f1fc19782 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e79bf875d6ed5c0303504509f35d451c95bd1155f38b7dbe541c96dc8b4227 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_38.pt b/kadabra2/ckpts/policy_weights/policy_epoch_38.pt new file mode 100755 index 0000000000000000000000000000000000000000..32cca650f7afb80e9d409472cb8e4d7fb17a6195 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7c199ee69f72250c88d4d5ff69efd2931bbfad19fd3304bf7669cccbe0060b +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_4.pt b/kadabra2/ckpts/policy_weights/policy_epoch_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..3f3260378f9e212773beb752a6bca1da9608316a --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3af3e365fdc6db33abd761b7c192d84f4dad32adf0ab7979229c799d64a0fbce +size 233772873 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_40.pt b/kadabra2/ckpts/policy_weights/policy_epoch_40.pt new file mode 100755 index 0000000000000000000000000000000000000000..97c17107b0718930317a5f8dc35b5ae1eec73a7e --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d14a7aecd49e3a201d9354f17c7d0b3d5161b8d1546168b31d878a0818b7e82 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_42.pt b/kadabra2/ckpts/policy_weights/policy_epoch_42.pt new file mode 100755 index 0000000000000000000000000000000000000000..387e80fe9316d8f25a56bc48992d6566fe6f58fd --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_42.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:468d24a4ae25a6d5a1aa58e9234403259c8d4e544727da5f1f7c3083f8c6af66 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_44.pt b/kadabra2/ckpts/policy_weights/policy_epoch_44.pt new file mode 100755 index 0000000000000000000000000000000000000000..f391934d8512fad663283193b3c6a6289ee3d2ae --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_44.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5a20cd3df727a11681e4a554ce8adc9e6cf5f81d38f0a59a3e661a987ec7f2 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_46.pt b/kadabra2/ckpts/policy_weights/policy_epoch_46.pt new file mode 100755 index 0000000000000000000000000000000000000000..8bea842695a5b087237cebe75f6521c0d0f24528 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_46.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc4ae28f50214281a494bebce96b85d3b58b20b07b7d742e9bb72f3f4f983c50 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_48.pt b/kadabra2/ckpts/policy_weights/policy_epoch_48.pt new file mode 100755 index 0000000000000000000000000000000000000000..4f1f42312454800f92eb9bc968ca2724c45ab48c --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_48.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d8f9eff4f41fc552c5e154fa0659bcf6b4038706f166d34d35370be614df3cd +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_50.pt b/kadabra2/ckpts/policy_weights/policy_epoch_50.pt new file mode 100755 index 0000000000000000000000000000000000000000..175fee701df731ec7c3ab87a7373b2b404b79878 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_50.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af3a6fe709abf8c057c43c8e09672c9614a29808ef7cdf3277ed98a2502cd933 +size 233773103 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_6.pt b/kadabra2/ckpts/policy_weights/policy_epoch_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..83c467d590c0fc3db3bb71475558245196bc6dac --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc117703af7788d33cb011bbffd621ac26467298020e39262e22511dfba9751 +size 233772873 diff --git a/kadabra2/ckpts/policy_weights/policy_epoch_8.pt b/kadabra2/ckpts/policy_weights/policy_epoch_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..98cf07c617269ddb562432b4d037ef6f24c4b6b5 --- /dev/null +++ b/kadabra2/ckpts/policy_weights/policy_epoch_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466bb352d16d9191fb32271e4504f43ba8d9aa7115c279006818984f7f1533e0 +size 233772873 diff --git a/alakazam2/ckpts/config.txt b/kadabra3/ckpts/config.txt similarity index 100% rename from alakazam2/ckpts/config.txt rename to kadabra3/ckpts/config.txt diff --git a/alakazam2/ckpts/latest/policy.pt b/kadabra3/ckpts/latest/policy.pt similarity index 100% rename from alakazam2/ckpts/latest/policy.pt rename to kadabra3/ckpts/latest/policy.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_0.pt b/kadabra3/ckpts/policy_weights/policy_epoch_0.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_0.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_0.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_10.pt b/kadabra3/ckpts/policy_weights/policy_epoch_10.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_10.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_10.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_12.pt b/kadabra3/ckpts/policy_weights/policy_epoch_12.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_12.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_12.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_14.pt b/kadabra3/ckpts/policy_weights/policy_epoch_14.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_14.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_14.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_16.pt b/kadabra3/ckpts/policy_weights/policy_epoch_16.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_16.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_16.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_18.pt b/kadabra3/ckpts/policy_weights/policy_epoch_18.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_18.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_18.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_2.pt b/kadabra3/ckpts/policy_weights/policy_epoch_2.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_2.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_2.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_20.pt b/kadabra3/ckpts/policy_weights/policy_epoch_20.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_20.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_20.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_22.pt b/kadabra3/ckpts/policy_weights/policy_epoch_22.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_22.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_22.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_24.pt b/kadabra3/ckpts/policy_weights/policy_epoch_24.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_24.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_24.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_26.pt b/kadabra3/ckpts/policy_weights/policy_epoch_26.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_26.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_26.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_28.pt b/kadabra3/ckpts/policy_weights/policy_epoch_28.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_28.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_28.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_30.pt b/kadabra3/ckpts/policy_weights/policy_epoch_30.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_30.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_30.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_32.pt b/kadabra3/ckpts/policy_weights/policy_epoch_32.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_32.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_32.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_34.pt b/kadabra3/ckpts/policy_weights/policy_epoch_34.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_34.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_34.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_36.pt b/kadabra3/ckpts/policy_weights/policy_epoch_36.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_36.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_36.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_38.pt b/kadabra3/ckpts/policy_weights/policy_epoch_38.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_38.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_38.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_4.pt b/kadabra3/ckpts/policy_weights/policy_epoch_4.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_4.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_4.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_40.pt b/kadabra3/ckpts/policy_weights/policy_epoch_40.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_40.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_40.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_42.pt b/kadabra3/ckpts/policy_weights/policy_epoch_42.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_42.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_42.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_44.pt b/kadabra3/ckpts/policy_weights/policy_epoch_44.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_44.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_44.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_46.pt b/kadabra3/ckpts/policy_weights/policy_epoch_46.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_46.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_46.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_48.pt b/kadabra3/ckpts/policy_weights/policy_epoch_48.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_48.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_48.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_6.pt b/kadabra3/ckpts/policy_weights/policy_epoch_6.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_6.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_6.pt diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_8.pt b/kadabra3/ckpts/policy_weights/policy_epoch_8.pt similarity index 100% rename from alakazam2/ckpts/policy_weights/policy_epoch_8.pt rename to kadabra3/ckpts/policy_weights/policy_epoch_8.pt diff --git a/alakazam3/ckpts/config.txt b/kadabra4/ckpts/config.txt similarity index 100% rename from alakazam3/ckpts/config.txt rename to kadabra4/ckpts/config.txt diff --git a/alakazam3/ckpts/latest/policy.pt b/kadabra4/ckpts/latest/policy.pt similarity index 100% rename from alakazam3/ckpts/latest/policy.pt rename to kadabra4/ckpts/latest/policy.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_0.pt b/kadabra4/ckpts/policy_weights/policy_epoch_0.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_0.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_0.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_10.pt b/kadabra4/ckpts/policy_weights/policy_epoch_10.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_10.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_10.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_12.pt b/kadabra4/ckpts/policy_weights/policy_epoch_12.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_12.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_12.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_14.pt b/kadabra4/ckpts/policy_weights/policy_epoch_14.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_14.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_14.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_16.pt b/kadabra4/ckpts/policy_weights/policy_epoch_16.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_16.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_16.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_18.pt b/kadabra4/ckpts/policy_weights/policy_epoch_18.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_18.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_18.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_2.pt b/kadabra4/ckpts/policy_weights/policy_epoch_2.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_2.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_2.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_20.pt b/kadabra4/ckpts/policy_weights/policy_epoch_20.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_20.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_20.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_22.pt b/kadabra4/ckpts/policy_weights/policy_epoch_22.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_22.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_22.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_4.pt b/kadabra4/ckpts/policy_weights/policy_epoch_4.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_4.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_4.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_6.pt b/kadabra4/ckpts/policy_weights/policy_epoch_6.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_6.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_6.pt diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_8.pt b/kadabra4/ckpts/policy_weights/policy_epoch_8.pt similarity index 100% rename from alakazam3/ckpts/policy_weights/policy_epoch_8.pt rename to kadabra4/ckpts/policy_weights/policy_epoch_8.pt diff --git a/alakazam4/ckpts/config.txt b/kadabra5/ckpts/config.txt similarity index 100% rename from alakazam4/ckpts/config.txt rename to kadabra5/ckpts/config.txt diff --git a/alakazam4/ckpts/latest/policy.pt b/kadabra5/ckpts/latest/policy.pt similarity index 100% rename from alakazam4/ckpts/latest/policy.pt rename to kadabra5/ckpts/latest/policy.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_0.pt b/kadabra5/ckpts/policy_weights/policy_epoch_0.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_0.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_0.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_10.pt b/kadabra5/ckpts/policy_weights/policy_epoch_10.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_10.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_10.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_12.pt b/kadabra5/ckpts/policy_weights/policy_epoch_12.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_12.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_12.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_14.pt b/kadabra5/ckpts/policy_weights/policy_epoch_14.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_14.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_14.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_16.pt b/kadabra5/ckpts/policy_weights/policy_epoch_16.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_16.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_16.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_18.pt b/kadabra5/ckpts/policy_weights/policy_epoch_18.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_18.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_18.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_2.pt b/kadabra5/ckpts/policy_weights/policy_epoch_2.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_2.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_2.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_20.pt b/kadabra5/ckpts/policy_weights/policy_epoch_20.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_20.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_20.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_22.pt b/kadabra5/ckpts/policy_weights/policy_epoch_22.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_22.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_22.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_24.pt b/kadabra5/ckpts/policy_weights/policy_epoch_24.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_24.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_24.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_26.pt b/kadabra5/ckpts/policy_weights/policy_epoch_26.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_26.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_26.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_28.pt b/kadabra5/ckpts/policy_weights/policy_epoch_28.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_28.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_28.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_30.pt b/kadabra5/ckpts/policy_weights/policy_epoch_30.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_30.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_30.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_32.pt b/kadabra5/ckpts/policy_weights/policy_epoch_32.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_32.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_32.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_34.pt b/kadabra5/ckpts/policy_weights/policy_epoch_34.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_34.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_34.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_36.pt b/kadabra5/ckpts/policy_weights/policy_epoch_36.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_36.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_36.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_38.pt b/kadabra5/ckpts/policy_weights/policy_epoch_38.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_38.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_38.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_4.pt b/kadabra5/ckpts/policy_weights/policy_epoch_4.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_4.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_4.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_40.pt b/kadabra5/ckpts/policy_weights/policy_epoch_40.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_40.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_40.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_42.pt b/kadabra5/ckpts/policy_weights/policy_epoch_42.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_42.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_42.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_44.pt b/kadabra5/ckpts/policy_weights/policy_epoch_44.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_44.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_44.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_46.pt b/kadabra5/ckpts/policy_weights/policy_epoch_46.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_46.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_46.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_48.pt b/kadabra5/ckpts/policy_weights/policy_epoch_48.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_48.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_48.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_50.pt b/kadabra5/ckpts/policy_weights/policy_epoch_50.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_50.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_50.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_52.pt b/kadabra5/ckpts/policy_weights/policy_epoch_52.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_52.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_52.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_54.pt b/kadabra5/ckpts/policy_weights/policy_epoch_54.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_54.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_54.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_6.pt b/kadabra5/ckpts/policy_weights/policy_epoch_6.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_6.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_6.pt diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_8.pt b/kadabra5/ckpts/policy_weights/policy_epoch_8.pt similarity index 100% rename from alakazam4/ckpts/policy_weights/policy_epoch_8.pt rename to kadabra5/ckpts/policy_weights/policy_epoch_8.pt