jakegrigsby commited on
Commit
2de4913
·
1 Parent(s): 717dcf8

change model names

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. alakazam/ckpts/config.txt +31 -28
  2. alakazam/ckpts/latest/policy.pt +2 -2
  3. alakazam/ckpts/policy_weights/policy_epoch_0.pt +2 -2
  4. alakazam/ckpts/policy_weights/policy_epoch_10.pt +2 -2
  5. alakazam/ckpts/policy_weights/policy_epoch_12.pt +2 -2
  6. alakazam/ckpts/policy_weights/policy_epoch_14.pt +2 -2
  7. alakazam/ckpts/policy_weights/policy_epoch_16.pt +2 -2
  8. alakazam/ckpts/policy_weights/policy_epoch_18.pt +2 -2
  9. alakazam/ckpts/policy_weights/policy_epoch_2.pt +2 -2
  10. alakazam/ckpts/policy_weights/policy_epoch_20.pt +2 -2
  11. alakazam/ckpts/policy_weights/policy_epoch_22.pt +2 -2
  12. alakazam/ckpts/policy_weights/policy_epoch_24.pt +2 -2
  13. alakazam/ckpts/policy_weights/policy_epoch_26.pt +2 -2
  14. alakazam/ckpts/policy_weights/policy_epoch_28.pt +2 -2
  15. alakazam/ckpts/policy_weights/policy_epoch_30.pt +2 -2
  16. alakazam/ckpts/policy_weights/policy_epoch_32.pt +2 -2
  17. alakazam/ckpts/policy_weights/policy_epoch_34.pt +2 -2
  18. alakazam/ckpts/policy_weights/policy_epoch_36.pt +2 -2
  19. alakazam/ckpts/policy_weights/policy_epoch_38.pt +2 -2
  20. alakazam/ckpts/policy_weights/policy_epoch_4.pt +2 -2
  21. alakazam/ckpts/policy_weights/policy_epoch_40.pt +2 -2
  22. alakazam/ckpts/policy_weights/policy_epoch_42.pt +2 -2
  23. alakazam/ckpts/policy_weights/policy_epoch_44.pt +2 -2
  24. alakazam/ckpts/policy_weights/policy_epoch_46.pt +2 -2
  25. alakazam/ckpts/policy_weights/policy_epoch_48.pt +2 -2
  26. alakazam/ckpts/policy_weights/policy_epoch_50.pt +2 -2
  27. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_52.pt +0 -0
  28. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_54.pt +0 -0
  29. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_56.pt +0 -0
  30. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_58.pt +0 -0
  31. alakazam/ckpts/policy_weights/policy_epoch_6.pt +2 -2
  32. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_60.pt +0 -0
  33. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_62.pt +0 -0
  34. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_64.pt +0 -0
  35. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_66.pt +0 -0
  36. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_68.pt +0 -0
  37. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_70.pt +0 -0
  38. {alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_72.pt +0 -0
  39. alakazam/ckpts/policy_weights/policy_epoch_8.pt +2 -2
  40. {alakazam → kadabra2}/alakazam/ckpts/config.txt +0 -0
  41. {alakazam_patched → kadabra2}/ckpts/config.txt +28 -31
  42. {alakazam_patched → kadabra2}/ckpts/latest/policy.pt +2 -2
  43. {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_0.pt +2 -2
  44. {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_10.pt +2 -2
  45. {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_12.pt +2 -2
  46. {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_14.pt +2 -2
  47. {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_16.pt +2 -2
  48. {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_18.pt +2 -2
  49. {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_2.pt +2 -2
  50. {alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_20.pt +2 -2
alakazam/ckpts/config.txt CHANGED
@@ -8,13 +8,6 @@ import amago.nets.transformer as amago5
8
  # ==============================================================================
9
  # None.
10
 
11
- # Parameters for ClippedSlidingSinkAttention:
12
- # ==============================================================================
13
- ClippedSlidingSinkAttention.logit_clip = 50
14
- ClippedSlidingSinkAttention.sink_bias = 0.0
15
- ClippedSlidingSinkAttention.sink_size = 5
16
- ClippedSlidingSinkAttention.window_size = 96
17
-
18
  # Parameters for Discrete:
19
  # ==============================================================================
20
  Discrete.clip_prob_high = 0.99
@@ -30,6 +23,10 @@ Experiment.stagger_traj_file_lengths = True
30
  Experiment.traj_save_len = 10000000000.0
31
  Experiment.wandb_group_name = None
32
 
 
 
 
 
33
  # Parameters for MetamonAMAGOExperiment:
34
  # ==============================================================================
35
  MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
@@ -37,32 +34,33 @@ MetamonAMAGOExperiment.critic_loss_weight = 13.5
37
  MetamonAMAGOExperiment.grad_clip = 1.5
38
  MetamonAMAGOExperiment.l2_coeff = 0.0001
39
  MetamonAMAGOExperiment.learning_rate = 0.000125
40
- MetamonAMAGOExperiment.lr_warmup_steps = 1500
41
- MetamonAMAGOExperiment.max_seq_len = 200
42
  MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
43
- MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder
44
 
45
  # Parameters for MetamonMaskedResidualActor:
46
  # ==============================================================================
47
  MetamonMaskedResidualActor.activation = 'leaky_relu'
48
  MetamonMaskedResidualActor.continuous_dist_type = None
49
  MetamonMaskedResidualActor.dropout_p = 0.0
50
- MetamonMaskedResidualActor.feature_dim = 256
51
  MetamonMaskedResidualActor.mask_illegal_actions = True
52
  MetamonMaskedResidualActor.normalization = 'layer'
53
  MetamonMaskedResidualActor.residual_blocks = 2
54
  MetamonMaskedResidualActor.residual_ff_dim = 512
55
 
56
- # Parameters for MetamonTstepEncoder:
57
  # ==============================================================================
58
- MetamonTstepEncoder.d_model = 108
59
- MetamonTstepEncoder.dropout = 0.05
60
- MetamonTstepEncoder.extra_emb_dim = 18
61
- MetamonTstepEncoder.n_heads = 6
62
- MetamonTstepEncoder.n_layers = 4
63
- MetamonTstepEncoder.numerical_tokens = 6
64
- MetamonTstepEncoder.scratch_tokens = 6
65
- MetamonTstepEncoder.token_mask_aug = False
 
66
 
67
  # Parameters for Multigammas:
68
  # ==============================================================================
@@ -81,7 +79,7 @@ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4
81
  MultiTaskAgent.num_critics = 6
82
  MultiTaskAgent.num_critics_td = 2
83
  MultiTaskAgent.offline_coeff = 1.0
84
- MultiTaskAgent.online_coeff = 0.1
85
  MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
86
  MultiTaskAgent.popart = True
87
  MultiTaskAgent.reward_multiplier = 10.0
@@ -100,6 +98,10 @@ NCriticsTwoHot.n_layers = 2
100
  NCriticsTwoHot.output_bins = 96
101
  NCriticsTwoHot.use_symlog = False
102
 
 
 
 
 
103
  # Parameters for PopArtLayer:
104
  # ==============================================================================
105
  PopArtLayer.beta = 0.0005
@@ -109,10 +111,15 @@ PopArtLayer.init_nu = 100.0
109
  # ==============================================================================
110
  # None.
111
 
 
 
 
 
 
112
  # Parameters for TformerTrajEncoder:
113
  # ==============================================================================
114
  TformerTrajEncoder.activation = 'leaky_relu'
115
- TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention
116
  TformerTrajEncoder.d_ff = 3072
117
  TformerTrajEncoder.d_model = 768
118
  TformerTrajEncoder.dropout_attn = 0.0
@@ -121,12 +128,8 @@ TformerTrajEncoder.dropout_ff = 0.05
121
  TformerTrajEncoder.dropout_qkv = 0.0
122
  TformerTrajEncoder.head_scaling = True
123
  TformerTrajEncoder.n_heads = 12
124
- TformerTrajEncoder.n_layers = 6
125
  TformerTrajEncoder.norm = 'layer'
126
  TformerTrajEncoder.normformer_norms = True
127
  TformerTrajEncoder.pos_emb = 'fixed'
128
- TformerTrajEncoder.sigma_reparam = False
129
-
130
- # Parameters for TransformerTurnEmbedding:
131
- # ==============================================================================
132
- # None.
 
8
  # ==============================================================================
9
  # None.
10
 
 
 
 
 
 
 
 
11
  # Parameters for Discrete:
12
  # ==============================================================================
13
  Discrete.clip_prob_high = 0.99
 
23
  Experiment.traj_save_len = 10000000000.0
24
  Experiment.wandb_group_name = None
25
 
26
+ # Parameters for FlashAttention:
27
+ # ==============================================================================
28
+ FlashAttention.window_size = (96, 0)
29
+
30
  # Parameters for MetamonAMAGOExperiment:
31
  # ==============================================================================
32
  MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
 
34
  MetamonAMAGOExperiment.grad_clip = 1.5
35
  MetamonAMAGOExperiment.l2_coeff = 0.0001
36
  MetamonAMAGOExperiment.learning_rate = 0.000125
37
+ MetamonAMAGOExperiment.lr_warmup_steps = 2000
38
+ MetamonAMAGOExperiment.max_seq_len = 128
39
  MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
40
+ MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder
41
 
42
  # Parameters for MetamonMaskedResidualActor:
43
  # ==============================================================================
44
  MetamonMaskedResidualActor.activation = 'leaky_relu'
45
  MetamonMaskedResidualActor.continuous_dist_type = None
46
  MetamonMaskedResidualActor.dropout_p = 0.0
47
+ MetamonMaskedResidualActor.feature_dim = 450
48
  MetamonMaskedResidualActor.mask_illegal_actions = True
49
  MetamonMaskedResidualActor.normalization = 'layer'
50
  MetamonMaskedResidualActor.residual_blocks = 2
51
  MetamonMaskedResidualActor.residual_ff_dim = 512
52
 
53
+ # Parameters for MetamonPerceiverTstepEncoder:
54
  # ==============================================================================
55
+ MetamonPerceiverTstepEncoder.d_model = 108
56
+ MetamonPerceiverTstepEncoder.dropout = 0.05
57
+ MetamonPerceiverTstepEncoder.extra_emb_dim = 18
58
+ MetamonPerceiverTstepEncoder.latent_tokens = 8
59
+ MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128
60
+ MetamonPerceiverTstepEncoder.n_heads = 6
61
+ MetamonPerceiverTstepEncoder.n_layers = 8
62
+ MetamonPerceiverTstepEncoder.numerical_tokens = 6
63
+ MetamonPerceiverTstepEncoder.token_mask_aug = False
64
 
65
  # Parameters for Multigammas:
66
  # ==============================================================================
 
79
  MultiTaskAgent.num_critics = 6
80
  MultiTaskAgent.num_critics_td = 2
81
  MultiTaskAgent.offline_coeff = 1.0
82
+ MultiTaskAgent.online_coeff = 0.2
83
  MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
84
  MultiTaskAgent.popart = True
85
  MultiTaskAgent.reward_multiplier = 10.0
 
98
  NCriticsTwoHot.output_bins = 96
99
  NCriticsTwoHot.use_symlog = False
100
 
101
+ # Parameters for PerceiverTurnEmbedding:
102
+ # ==============================================================================
103
+ # None.
104
+
105
  # Parameters for PopArtLayer:
106
  # ==============================================================================
107
  PopArtLayer.beta = 0.0005
 
111
  # ==============================================================================
112
  # None.
113
 
114
+ # Parameters for SigmaReparam:
115
+ # ==============================================================================
116
+ SigmaReparam.bias = True
117
+ SigmaReparam.fast_init = False
118
+
119
  # Parameters for TformerTrajEncoder:
120
  # ==============================================================================
121
  TformerTrajEncoder.activation = 'leaky_relu'
122
+ TformerTrajEncoder.attention_type = @transformer.FlashAttention
123
  TformerTrajEncoder.d_ff = 3072
124
  TformerTrajEncoder.d_model = 768
125
  TformerTrajEncoder.dropout_attn = 0.0
 
128
  TformerTrajEncoder.dropout_qkv = 0.0
129
  TformerTrajEncoder.head_scaling = True
130
  TformerTrajEncoder.n_heads = 12
131
+ TformerTrajEncoder.n_layers = 8
132
  TformerTrajEncoder.norm = 'layer'
133
  TformerTrajEncoder.normformer_norms = True
134
  TformerTrajEncoder.pos_emb = 'fixed'
135
+ TformerTrajEncoder.sigma_reparam = True
 
 
 
 
alakazam/ckpts/latest/policy.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68b9d7b7dbb389dc8fb66e92392384feb2a11b302b6e1130bfe32790cd1c177c
3
- size 233767193
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:263e04fa26f1fde0df2fbe959f94b18245a3387d98918701f9f532202c4f888f
3
+ size 304148153
alakazam/ckpts/policy_weights/policy_epoch_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23066ec5e029b29b95d8c2a41c18e3793a69a765bfd630814b5eaaf5b290bf65
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe0bcfa730996a37c126e4e7bd52d57f756293ba71ed1c458f9c5ab755d6dc9
3
+ size 304154601
alakazam/ckpts/policy_weights/policy_epoch_10.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb1dee5aa36e76bacdf1bb72c5428fa4c08865756573a27192f632df6b5634a3
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a69dc023d1f1068858bfe89b7aaafdb7b609ca18aa8a26a06f9e9a72a8722b
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_12.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ed66cb98aa85736f05b94c939dc22e1885570d33326bf413bf73b938d26bca5
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d82e78b0c9e622308056217288419278d1fdc94a14a0aaf8ad06e56b6e7eb3b
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_14.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de4aa18f93025d31bba32170207c1c09c712dd9b4ca0bd411bb23b3297900e01
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b258755bd773a01cfeb2fe87fd5512a2559aec9cff05220c4bfb41627cac61de
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_16.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a07b872b14ff15a303a37a05beef6ffd72e56871056b465bfd012260b579b0f8
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:422c854d0c144b43a851a09b4da32f674b207ca7b8f8eeb3c5d2e59431c4ef9f
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_18.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cffb486f4256c6a82be7df6ac68e275f4a121d2516e37a6f5ac6bbdd0608d56
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bba20b8d8dd3b6429f7c532120e55a34737a5f6379ec5179de897ceed3ad3737
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92421ef958cb85f73f43172431d7d648103efde6bddb6cc128258c284f874bd7
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f72fb7645fa54b8fb4ae83f94444f3da49be1220f3a8ccd051375c1ce25541d
3
+ size 304154601
alakazam/ckpts/policy_weights/policy_epoch_20.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fe5a8dc3a349176445ae8a82ab605180de6bdfd431a6c692eb38d4a16a74b8b
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6639d34acc91cd0eddaa86028875bf6b59813e3a5faf64ea89fb85c35e16aef
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_22.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbc1c4c1091337fa7eeb24adff88405d0794bc319ad536335922a2327fa44a68
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:118f44a65b00d1dac61ca0cf4ae5ec9e136bced8a5a748b5aa3dfadcb2da5981
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_24.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d85d042459182e9510e5c1d806fc7bec25510d76af846d87b6c6612e8d55c4d
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14c16320f64608e8700d50280587b9038e26b49180d04e15e98b6a60f4d55217
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_26.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:134397c7107ce1278bf2021e571aa01340bb59270c17b7f4b052e79aee903e88
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f26608bec87988179827df538c0f29d3df551a8da1942c0a9f59d66e8307b3cf
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_28.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:864d54aeeb99b5a78c9c2b447a4d20427c1f89ae0bff57476fe7ec14e9cc331d
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77320c39c973ce8413461708b59d0fdb19a84413ab06b86aef5ee44aa0e2e78c
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_30.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0549977f771affff1b8509e44f24bf48990fea824ae4d287090442a06634a637
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a08830d881f36926d79d20c14bd82e16563385d9f7c0b724608851419d0d6e68
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_32.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04fd9a00c67e19b3aceff2248a6e5ba12aba59d56db9731ee40b6d03f5976a84
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b66f40eb8da147158d506579ddd0cebc5959e38c70ee4d853e17c234e68f05
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_34.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35e3edcc5115e7e0911cf63171a0daa7f103ef5efb1235b19e09d8cc403921e1
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fd288a05ae901b98e6685b6a656190f50cb8d4eff614055527482063ea3841a
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_36.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68e79bf875d6ed5c0303504509f35d451c95bd1155f38b7dbe541c96dc8b4227
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:023787563c166b5a163f3c9b13eb8fa7ecbd8089fe1a850ca412a82c1c35b1ba
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_38.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f7c199ee69f72250c88d4d5ff69efd2931bbfad19fd3304bf7669cccbe0060b
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61089d65dc794f5326a1de700363c201b6ac6de6d424363635fb87af0dad8835
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_4.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3af3e365fdc6db33abd761b7c192d84f4dad32adf0ab7979229c799d64a0fbce
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d35584822a75d9b3886a1b9405a847e421386338c7398ea5ed40117b72d886
3
+ size 304154601
alakazam/ckpts/policy_weights/policy_epoch_40.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d14a7aecd49e3a201d9354f17c7d0b3d5161b8d1546168b31d878a0818b7e82
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e6849caa9426c47885fb2834056d244b3137d3235bd30f0ec87309ec95641d4
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_42.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:468d24a4ae25a6d5a1aa58e9234403259c8d4e544727da5f1f7c3083f8c6af66
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1cf6c7e5679788f20cebc4cabc3414c297e61a1b60b5fc875503bfd2da4e577
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_44.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba5a20cd3df727a11681e4a554ce8adc9e6cf5f81d38f0a59a3e661a987ec7f2
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:177d63ce5452caf3e81809c0cf211e8816be8b79ae56f5a87a7d48d5a6587556
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_46.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc4ae28f50214281a494bebce96b85d3b58b20b07b7d742e9bb72f3f4f983c50
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48764d855cded18a77977f198f3915991ba86e25a1f3ae5e9755b7c00a177ece
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_48.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d8f9eff4f41fc552c5e154fa0659bcf6b4038706f166d34d35370be614df3cd
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a16589fc9feb4da418b9c75bc86bde419f9a83b0ee2506a4e172580d3f4cb13
3
+ size 304155119
alakazam/ckpts/policy_weights/policy_epoch_50.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af3a6fe709abf8c057c43c8e09672c9614a29808ef7cdf3277ed98a2502cd933
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b0db60998cb863039edaf0c31fdf0e409dcc11ac7fe36b3bd8b651e2e046e9
3
+ size 304155119
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_52.pt RENAMED
File without changes
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_54.pt RENAMED
File without changes
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_56.pt RENAMED
File without changes
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_58.pt RENAMED
File without changes
alakazam/ckpts/policy_weights/policy_epoch_6.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cc117703af7788d33cb011bbffd621ac26467298020e39262e22511dfba9751
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44d3bfcd674a29a7ea4e114103e4b4b7253a819b4b26ab349d91b00515dd67b2
3
+ size 304154601
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_60.pt RENAMED
File without changes
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_62.pt RENAMED
File without changes
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_64.pt RENAMED
File without changes
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_66.pt RENAMED
File without changes
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_68.pt RENAMED
File without changes
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_70.pt RENAMED
File without changes
{alakazam_patched → alakazam}/ckpts/policy_weights/policy_epoch_72.pt RENAMED
File without changes
alakazam/ckpts/policy_weights/policy_epoch_8.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:466bb352d16d9191fb32271e4504f43ba8d9aa7115c279006818984f7f1533e0
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a98516df7b346118fa824f9137e6aaaa1428d5a7150af01bf606c1ea935944
3
+ size 304154601
{alakazam → kadabra2}/alakazam/ckpts/config.txt RENAMED
File without changes
{alakazam_patched → kadabra2}/ckpts/config.txt RENAMED
@@ -8,6 +8,13 @@ import amago.nets.transformer as amago5
8
  # ==============================================================================
9
  # None.
10
 
 
 
 
 
 
 
 
11
  # Parameters for Discrete:
12
  # ==============================================================================
13
  Discrete.clip_prob_high = 0.99
@@ -23,10 +30,6 @@ Experiment.stagger_traj_file_lengths = True
23
  Experiment.traj_save_len = 10000000000.0
24
  Experiment.wandb_group_name = None
25
 
26
- # Parameters for FlashAttention:
27
- # ==============================================================================
28
- FlashAttention.window_size = (96, 0)
29
-
30
  # Parameters for MetamonAMAGOExperiment:
31
  # ==============================================================================
32
  MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
@@ -34,33 +37,32 @@ MetamonAMAGOExperiment.critic_loss_weight = 13.5
34
  MetamonAMAGOExperiment.grad_clip = 1.5
35
  MetamonAMAGOExperiment.l2_coeff = 0.0001
36
  MetamonAMAGOExperiment.learning_rate = 0.000125
37
- MetamonAMAGOExperiment.lr_warmup_steps = 2000
38
- MetamonAMAGOExperiment.max_seq_len = 128
39
  MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
40
- MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder
41
 
42
  # Parameters for MetamonMaskedResidualActor:
43
  # ==============================================================================
44
  MetamonMaskedResidualActor.activation = 'leaky_relu'
45
  MetamonMaskedResidualActor.continuous_dist_type = None
46
  MetamonMaskedResidualActor.dropout_p = 0.0
47
- MetamonMaskedResidualActor.feature_dim = 450
48
  MetamonMaskedResidualActor.mask_illegal_actions = True
49
  MetamonMaskedResidualActor.normalization = 'layer'
50
  MetamonMaskedResidualActor.residual_blocks = 2
51
  MetamonMaskedResidualActor.residual_ff_dim = 512
52
 
53
- # Parameters for MetamonPerceiverTstepEncoder:
54
  # ==============================================================================
55
- MetamonPerceiverTstepEncoder.d_model = 108
56
- MetamonPerceiverTstepEncoder.dropout = 0.05
57
- MetamonPerceiverTstepEncoder.extra_emb_dim = 18
58
- MetamonPerceiverTstepEncoder.latent_tokens = 8
59
- MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128
60
- MetamonPerceiverTstepEncoder.n_heads = 6
61
- MetamonPerceiverTstepEncoder.n_layers = 8
62
- MetamonPerceiverTstepEncoder.numerical_tokens = 6
63
- MetamonPerceiverTstepEncoder.token_mask_aug = False
64
 
65
  # Parameters for Multigammas:
66
  # ==============================================================================
@@ -79,7 +81,7 @@ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4
79
  MultiTaskAgent.num_critics = 6
80
  MultiTaskAgent.num_critics_td = 2
81
  MultiTaskAgent.offline_coeff = 1.0
82
- MultiTaskAgent.online_coeff = 0.2
83
  MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
84
  MultiTaskAgent.popart = True
85
  MultiTaskAgent.reward_multiplier = 10.0
@@ -98,10 +100,6 @@ NCriticsTwoHot.n_layers = 2
98
  NCriticsTwoHot.output_bins = 96
99
  NCriticsTwoHot.use_symlog = False
100
 
101
- # Parameters for PerceiverTurnEmbedding:
102
- # ==============================================================================
103
- # None.
104
-
105
  # Parameters for PopArtLayer:
106
  # ==============================================================================
107
  PopArtLayer.beta = 0.0005
@@ -111,15 +109,10 @@ PopArtLayer.init_nu = 100.0
111
  # ==============================================================================
112
  # None.
113
 
114
- # Parameters for SigmaReparam:
115
- # ==============================================================================
116
- SigmaReparam.bias = True
117
- SigmaReparam.fast_init = False
118
-
119
  # Parameters for TformerTrajEncoder:
120
  # ==============================================================================
121
  TformerTrajEncoder.activation = 'leaky_relu'
122
- TformerTrajEncoder.attention_type = @transformer.FlashAttention
123
  TformerTrajEncoder.d_ff = 3072
124
  TformerTrajEncoder.d_model = 768
125
  TformerTrajEncoder.dropout_attn = 0.0
@@ -128,8 +121,12 @@ TformerTrajEncoder.dropout_ff = 0.05
128
  TformerTrajEncoder.dropout_qkv = 0.0
129
  TformerTrajEncoder.head_scaling = True
130
  TformerTrajEncoder.n_heads = 12
131
- TformerTrajEncoder.n_layers = 8
132
  TformerTrajEncoder.norm = 'layer'
133
  TformerTrajEncoder.normformer_norms = True
134
  TformerTrajEncoder.pos_emb = 'fixed'
135
- TformerTrajEncoder.sigma_reparam = True
 
 
 
 
 
8
  # ==============================================================================
9
  # None.
10
 
11
+ # Parameters for ClippedSlidingSinkAttention:
12
+ # ==============================================================================
13
+ ClippedSlidingSinkAttention.logit_clip = 50
14
+ ClippedSlidingSinkAttention.sink_bias = 0.0
15
+ ClippedSlidingSinkAttention.sink_size = 5
16
+ ClippedSlidingSinkAttention.window_size = 96
17
+
18
  # Parameters for Discrete:
19
  # ==============================================================================
20
  Discrete.clip_prob_high = 0.99
 
30
  Experiment.traj_save_len = 10000000000.0
31
  Experiment.wandb_group_name = None
32
 
 
 
 
 
33
  # Parameters for MetamonAMAGOExperiment:
34
  # ==============================================================================
35
  MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
 
37
  MetamonAMAGOExperiment.grad_clip = 1.5
38
  MetamonAMAGOExperiment.l2_coeff = 0.0001
39
  MetamonAMAGOExperiment.learning_rate = 0.000125
40
+ MetamonAMAGOExperiment.lr_warmup_steps = 1500
41
+ MetamonAMAGOExperiment.max_seq_len = 200
42
  MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
43
+ MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder
44
 
45
  # Parameters for MetamonMaskedResidualActor:
46
  # ==============================================================================
47
  MetamonMaskedResidualActor.activation = 'leaky_relu'
48
  MetamonMaskedResidualActor.continuous_dist_type = None
49
  MetamonMaskedResidualActor.dropout_p = 0.0
50
+ MetamonMaskedResidualActor.feature_dim = 256
51
  MetamonMaskedResidualActor.mask_illegal_actions = True
52
  MetamonMaskedResidualActor.normalization = 'layer'
53
  MetamonMaskedResidualActor.residual_blocks = 2
54
  MetamonMaskedResidualActor.residual_ff_dim = 512
55
 
56
+ # Parameters for MetamonTstepEncoder:
57
  # ==============================================================================
58
+ MetamonTstepEncoder.d_model = 108
59
+ MetamonTstepEncoder.dropout = 0.05
60
+ MetamonTstepEncoder.extra_emb_dim = 18
61
+ MetamonTstepEncoder.n_heads = 6
62
+ MetamonTstepEncoder.n_layers = 4
63
+ MetamonTstepEncoder.numerical_tokens = 6
64
+ MetamonTstepEncoder.scratch_tokens = 6
65
+ MetamonTstepEncoder.token_mask_aug = False
 
66
 
67
  # Parameters for Multigammas:
68
  # ==============================================================================
 
81
  MultiTaskAgent.num_critics = 6
82
  MultiTaskAgent.num_critics_td = 2
83
  MultiTaskAgent.offline_coeff = 1.0
84
+ MultiTaskAgent.online_coeff = 0.1
85
  MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
86
  MultiTaskAgent.popart = True
87
  MultiTaskAgent.reward_multiplier = 10.0
 
100
  NCriticsTwoHot.output_bins = 96
101
  NCriticsTwoHot.use_symlog = False
102
 
 
 
 
 
103
  # Parameters for PopArtLayer:
104
  # ==============================================================================
105
  PopArtLayer.beta = 0.0005
 
109
  # ==============================================================================
110
  # None.
111
 
 
 
 
 
 
112
  # Parameters for TformerTrajEncoder:
113
  # ==============================================================================
114
  TformerTrajEncoder.activation = 'leaky_relu'
115
+ TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention
116
  TformerTrajEncoder.d_ff = 3072
117
  TformerTrajEncoder.d_model = 768
118
  TformerTrajEncoder.dropout_attn = 0.0
 
121
  TformerTrajEncoder.dropout_qkv = 0.0
122
  TformerTrajEncoder.head_scaling = True
123
  TformerTrajEncoder.n_heads = 12
124
+ TformerTrajEncoder.n_layers = 6
125
  TformerTrajEncoder.norm = 'layer'
126
  TformerTrajEncoder.normformer_norms = True
127
  TformerTrajEncoder.pos_emb = 'fixed'
128
+ TformerTrajEncoder.sigma_reparam = False
129
+
130
+ # Parameters for TransformerTurnEmbedding:
131
+ # ==============================================================================
132
+ # None.
{alakazam_patched → kadabra2}/ckpts/latest/policy.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:263e04fa26f1fde0df2fbe959f94b18245a3387d98918701f9f532202c4f888f
3
- size 304148153
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b9d7b7dbb389dc8fb66e92392384feb2a11b302b6e1130bfe32790cd1c177c
3
+ size 233767193
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_0.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fe0bcfa730996a37c126e4e7bd52d57f756293ba71ed1c458f9c5ab755d6dc9
3
- size 304154601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23066ec5e029b29b95d8c2a41c18e3793a69a765bfd630814b5eaaf5b290bf65
3
+ size 233772873
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_10.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51a69dc023d1f1068858bfe89b7aaafdb7b609ca18aa8a26a06f9e9a72a8722b
3
- size 304155119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb1dee5aa36e76bacdf1bb72c5428fa4c08865756573a27192f632df6b5634a3
3
+ size 233773103
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_12.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d82e78b0c9e622308056217288419278d1fdc94a14a0aaf8ad06e56b6e7eb3b
3
- size 304155119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ed66cb98aa85736f05b94c939dc22e1885570d33326bf413bf73b938d26bca5
3
+ size 233773103
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_14.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b258755bd773a01cfeb2fe87fd5512a2559aec9cff05220c4bfb41627cac61de
3
- size 304155119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de4aa18f93025d31bba32170207c1c09c712dd9b4ca0bd411bb23b3297900e01
3
+ size 233773103
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_16.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:422c854d0c144b43a851a09b4da32f674b207ca7b8f8eeb3c5d2e59431c4ef9f
3
- size 304155119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a07b872b14ff15a303a37a05beef6ffd72e56871056b465bfd012260b579b0f8
3
+ size 233773103
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_18.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bba20b8d8dd3b6429f7c532120e55a34737a5f6379ec5179de897ceed3ad3737
3
- size 304155119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cffb486f4256c6a82be7df6ac68e275f4a121d2516e37a6f5ac6bbdd0608d56
3
+ size 233773103
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_2.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f72fb7645fa54b8fb4ae83f94444f3da49be1220f3a8ccd051375c1ce25541d
3
- size 304154601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92421ef958cb85f73f43172431d7d648103efde6bddb6cc128258c284f874bd7
3
+ size 233772873
{alakazam_patched → kadabra2}/ckpts/policy_weights/policy_epoch_20.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6639d34acc91cd0eddaa86028875bf6b59813e3a5faf64ea89fb85c35e16aef
3
- size 304155119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe5a8dc3a349176445ae8a82ab605180de6bdfd431a6c692eb38d4a16a74b8b
3
+ size 233773103