jakegrigsby commited on
Commit
d2329d5
·
1 Parent(s): 2de4913

remove failed model with experimental attn mechanism

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. kadabra2/alakazam/ckpts/config.txt +0 -134
  2. kadabra2/ckpts/config.txt +27 -24
  3. kadabra2/ckpts/latest/policy.pt +2 -2
  4. kadabra2/ckpts/policy_weights/policy_epoch_0.pt +2 -2
  5. kadabra2/ckpts/policy_weights/policy_epoch_10.pt +2 -2
  6. kadabra2/ckpts/policy_weights/policy_epoch_12.pt +2 -2
  7. kadabra2/ckpts/policy_weights/policy_epoch_14.pt +2 -2
  8. kadabra2/ckpts/policy_weights/policy_epoch_16.pt +2 -2
  9. kadabra2/ckpts/policy_weights/policy_epoch_18.pt +2 -2
  10. kadabra2/ckpts/policy_weights/policy_epoch_2.pt +2 -2
  11. kadabra2/ckpts/policy_weights/policy_epoch_20.pt +2 -2
  12. kadabra2/ckpts/policy_weights/policy_epoch_22.pt +2 -2
  13. kadabra2/ckpts/policy_weights/policy_epoch_24.pt +2 -2
  14. kadabra2/ckpts/policy_weights/policy_epoch_26.pt +2 -2
  15. kadabra2/ckpts/policy_weights/policy_epoch_28.pt +2 -2
  16. kadabra2/ckpts/policy_weights/policy_epoch_30.pt +2 -2
  17. kadabra2/ckpts/policy_weights/policy_epoch_32.pt +2 -2
  18. kadabra2/ckpts/policy_weights/policy_epoch_34.pt +2 -2
  19. kadabra2/ckpts/policy_weights/policy_epoch_36.pt +2 -2
  20. kadabra2/ckpts/policy_weights/policy_epoch_38.pt +2 -2
  21. kadabra2/ckpts/policy_weights/policy_epoch_4.pt +2 -2
  22. kadabra2/ckpts/policy_weights/policy_epoch_40.pt +2 -2
  23. kadabra2/ckpts/policy_weights/policy_epoch_42.pt +2 -2
  24. kadabra2/ckpts/policy_weights/policy_epoch_44.pt +2 -2
  25. kadabra2/ckpts/policy_weights/policy_epoch_46.pt +2 -2
  26. kadabra2/ckpts/policy_weights/policy_epoch_48.pt +2 -2
  27. kadabra2/ckpts/policy_weights/policy_epoch_50.pt +0 -3
  28. kadabra2/ckpts/policy_weights/policy_epoch_6.pt +2 -2
  29. kadabra2/ckpts/policy_weights/policy_epoch_8.pt +2 -2
  30. kadabra3/ckpts/config.txt +2 -2
  31. kadabra3/ckpts/latest/policy.pt +1 -1
  32. kadabra3/ckpts/policy_weights/policy_epoch_0.pt +1 -1
  33. kadabra3/ckpts/policy_weights/policy_epoch_10.pt +1 -1
  34. kadabra3/ckpts/policy_weights/policy_epoch_12.pt +1 -1
  35. kadabra3/ckpts/policy_weights/policy_epoch_14.pt +1 -1
  36. kadabra3/ckpts/policy_weights/policy_epoch_16.pt +1 -1
  37. kadabra3/ckpts/policy_weights/policy_epoch_18.pt +1 -1
  38. kadabra3/ckpts/policy_weights/policy_epoch_2.pt +1 -1
  39. kadabra3/ckpts/policy_weights/policy_epoch_20.pt +1 -1
  40. kadabra3/ckpts/policy_weights/policy_epoch_22.pt +1 -1
  41. kadabra3/ckpts/policy_weights/policy_epoch_24.pt +0 -3
  42. kadabra3/ckpts/policy_weights/policy_epoch_26.pt +0 -3
  43. kadabra3/ckpts/policy_weights/policy_epoch_28.pt +0 -3
  44. kadabra3/ckpts/policy_weights/policy_epoch_30.pt +0 -3
  45. kadabra3/ckpts/policy_weights/policy_epoch_32.pt +0 -3
  46. kadabra3/ckpts/policy_weights/policy_epoch_34.pt +0 -3
  47. kadabra3/ckpts/policy_weights/policy_epoch_36.pt +0 -3
  48. kadabra3/ckpts/policy_weights/policy_epoch_38.pt +0 -3
  49. kadabra3/ckpts/policy_weights/policy_epoch_4.pt +1 -1
  50. kadabra3/ckpts/policy_weights/policy_epoch_40.pt +0 -3
kadabra2/alakazam/ckpts/config.txt DELETED
@@ -1,134 +0,0 @@
1
- import amago.agent
2
- import amago.experiment as amago2
3
- import amago.nets.actor_critic as amago3
4
- import amago.nets.traj_encoders as amago4
5
- import amago.nets.transformer as amago5
6
-
7
- # Parameters for Agent:
8
- # ==============================================================================
9
- # None.
10
-
11
- # Parameters for ClippedSlidingSinkAttention:
12
- # ==============================================================================
13
- ClippedSlidingSinkAttention.logit_clip = 50
14
- ClippedSlidingSinkAttention.sink_bias = 0.0
15
- ClippedSlidingSinkAttention.sink_size = 5
16
- ClippedSlidingSinkAttention.window_size = 96
17
-
18
- # Parameters for Discrete:
19
- # ==============================================================================
20
- Discrete.clip_prob_high = 0.99
21
- Discrete.clip_prob_low = 0.001
22
-
23
- # Parameters for Experiment:
24
- # ==============================================================================
25
- Experiment.batches_per_update = 1
26
- Experiment.force_reset_train_envs_every = None
27
- Experiment.has_dset_edit_rights = True
28
- Experiment.mixed_precision = 'no'
29
- Experiment.padded_sampling = 'none'
30
- Experiment.sample_actions = True
31
- Experiment.save_trajs_as = 'npz'
32
- Experiment.traj_save_len = 10000000000.0
33
- Experiment.wandb_group_name = None
34
-
35
- # Parameters for MetamonAMAGOExperiment:
36
- # ==============================================================================
37
- MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
38
- MetamonAMAGOExperiment.critic_loss_weight = 12.5
39
- MetamonAMAGOExperiment.grad_clip = 1.5
40
- MetamonAMAGOExperiment.l2_coeff = 0.0001
41
- MetamonAMAGOExperiment.learning_rate = 0.000125
42
- MetamonAMAGOExperiment.lr_warmup_steps = 1250
43
- MetamonAMAGOExperiment.max_seq_len = 200
44
- MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
45
- MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder
46
-
47
- # Parameters for MetamonMaskedResidualActor:
48
- # ==============================================================================
49
- MetamonMaskedResidualActor.activation = 'leaky_relu'
50
- MetamonMaskedResidualActor.continuous_dist_type = None
51
- MetamonMaskedResidualActor.dropout_p = 0.0
52
- MetamonMaskedResidualActor.feature_dim = 256
53
- MetamonMaskedResidualActor.mask_illegal_actions = True
54
- MetamonMaskedResidualActor.normalization = 'layer'
55
- MetamonMaskedResidualActor.residual_blocks = 2
56
- MetamonMaskedResidualActor.residual_ff_dim = 512
57
-
58
- # Parameters for MetamonTstepEncoder:
59
- # ==============================================================================
60
- MetamonTstepEncoder.d_model = 108
61
- MetamonTstepEncoder.dropout = 0.05
62
- MetamonTstepEncoder.extra_emb_dim = 18
63
- MetamonTstepEncoder.n_heads = 6
64
- MetamonTstepEncoder.n_layers = 4
65
- MetamonTstepEncoder.numerical_tokens = 6
66
- MetamonTstepEncoder.scratch_tokens = 6
67
- MetamonTstepEncoder.token_mask_aug = False
68
-
69
- # Parameters for Multigammas:
70
- # ==============================================================================
71
- Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
72
- Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
73
-
74
- # Parameters for MultiTaskAgent:
75
- # ==============================================================================
76
- MultiTaskAgent.actor_type = @MetamonMaskedResidualActor
77
- MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot
78
- MultiTaskAgent.fake_filter = False
79
- MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter
80
- MultiTaskAgent.gamma = 0.999
81
- MultiTaskAgent.num_actions_for_value_in_actor_loss = 4
82
- MultiTaskAgent.num_actions_for_value_in_critic_loss = 4
83
- MultiTaskAgent.num_critics = 6
84
- MultiTaskAgent.num_critics_td = 2
85
- MultiTaskAgent.offline_coeff = 1.0
86
- MultiTaskAgent.online_coeff = 0.0
87
- MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
88
- MultiTaskAgent.popart = True
89
- MultiTaskAgent.reward_multiplier = 10.0
90
- MultiTaskAgent.tau = 0.004
91
- MultiTaskAgent.use_multigamma = True
92
- MultiTaskAgent.use_target_actor = True
93
-
94
- # Parameters for NCriticsTwoHot:
95
- # ==============================================================================
96
- NCriticsTwoHot.activation = 'leaky_relu'
97
- NCriticsTwoHot.d_hidden = 512
98
- NCriticsTwoHot.dropout_p = 0.0
99
- NCriticsTwoHot.max_return = 2100
100
- NCriticsTwoHot.min_return = -100
101
- NCriticsTwoHot.n_layers = 2
102
- NCriticsTwoHot.output_bins = 96
103
- NCriticsTwoHot.use_symlog = False
104
-
105
- # Parameters for PopArtLayer:
106
- # ==============================================================================
107
- PopArtLayer.beta = 0.0005
108
- PopArtLayer.init_nu = 100.0
109
-
110
- # Parameters for ResidualActor:
111
- # ==============================================================================
112
- # None.
113
-
114
- # Parameters for TformerTrajEncoder:
115
- # ==============================================================================
116
- TformerTrajEncoder.activation = 'leaky_relu'
117
- TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention
118
- TformerTrajEncoder.d_ff = 3072
119
- TformerTrajEncoder.d_model = 768
120
- TformerTrajEncoder.dropout_attn = 0.0
121
- TformerTrajEncoder.dropout_emb = 0.05
122
- TformerTrajEncoder.dropout_ff = 0.05
123
- TformerTrajEncoder.dropout_qkv = 0.0
124
- TformerTrajEncoder.head_scaling = True
125
- TformerTrajEncoder.n_heads = 12
126
- TformerTrajEncoder.n_layers = 6
127
- TformerTrajEncoder.norm = 'layer'
128
- TformerTrajEncoder.normformer_norms = True
129
- TformerTrajEncoder.pos_emb = 'fixed'
130
- TformerTrajEncoder.sigma_reparam = False
131
-
132
- # Parameters for TransformerTurnEmbedding:
133
- # ==============================================================================
134
- # None.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
kadabra2/ckpts/config.txt CHANGED
@@ -8,13 +8,6 @@ import amago.nets.transformer as amago5
8
  # ==============================================================================
9
  # None.
10
 
11
- # Parameters for ClippedSlidingSinkAttention:
12
- # ==============================================================================
13
- ClippedSlidingSinkAttention.logit_clip = 50
14
- ClippedSlidingSinkAttention.sink_bias = 0.0
15
- ClippedSlidingSinkAttention.sink_size = 5
16
- ClippedSlidingSinkAttention.window_size = 96
17
-
18
  # Parameters for Discrete:
19
  # ==============================================================================
20
  Discrete.clip_prob_high = 0.99
@@ -30,6 +23,10 @@ Experiment.stagger_traj_file_lengths = True
30
  Experiment.traj_save_len = 10000000000.0
31
  Experiment.wandb_group_name = None
32
 
 
 
 
 
33
  # Parameters for MetamonAMAGOExperiment:
34
  # ==============================================================================
35
  MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
@@ -40,29 +37,30 @@ MetamonAMAGOExperiment.learning_rate = 0.000125
40
  MetamonAMAGOExperiment.lr_warmup_steps = 1500
41
  MetamonAMAGOExperiment.max_seq_len = 200
42
  MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
43
- MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder
44
 
45
  # Parameters for MetamonMaskedResidualActor:
46
  # ==============================================================================
47
  MetamonMaskedResidualActor.activation = 'leaky_relu'
48
  MetamonMaskedResidualActor.continuous_dist_type = None
49
  MetamonMaskedResidualActor.dropout_p = 0.0
50
- MetamonMaskedResidualActor.feature_dim = 256
51
  MetamonMaskedResidualActor.mask_illegal_actions = True
52
  MetamonMaskedResidualActor.normalization = 'layer'
53
  MetamonMaskedResidualActor.residual_blocks = 2
54
  MetamonMaskedResidualActor.residual_ff_dim = 512
55
 
56
- # Parameters for MetamonTstepEncoder:
57
  # ==============================================================================
58
- MetamonTstepEncoder.d_model = 108
59
- MetamonTstepEncoder.dropout = 0.05
60
- MetamonTstepEncoder.extra_emb_dim = 18
61
- MetamonTstepEncoder.n_heads = 6
62
- MetamonTstepEncoder.n_layers = 4
63
- MetamonTstepEncoder.numerical_tokens = 6
64
- MetamonTstepEncoder.scratch_tokens = 6
65
- MetamonTstepEncoder.token_mask_aug = False
 
66
 
67
  # Parameters for Multigammas:
68
  # ==============================================================================
@@ -100,6 +98,10 @@ NCriticsTwoHot.n_layers = 2
100
  NCriticsTwoHot.output_bins = 96
101
  NCriticsTwoHot.use_symlog = False
102
 
 
 
 
 
103
  # Parameters for PopArtLayer:
104
  # ==============================================================================
105
  PopArtLayer.beta = 0.0005
@@ -109,10 +111,15 @@ PopArtLayer.init_nu = 100.0
109
  # ==============================================================================
110
  # None.
111
 
 
 
 
 
 
112
  # Parameters for TformerTrajEncoder:
113
  # ==============================================================================
114
  TformerTrajEncoder.activation = 'leaky_relu'
115
- TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention
116
  TformerTrajEncoder.d_ff = 3072
117
  TformerTrajEncoder.d_model = 768
118
  TformerTrajEncoder.dropout_attn = 0.0
@@ -125,8 +132,4 @@ TformerTrajEncoder.n_layers = 6
125
  TformerTrajEncoder.norm = 'layer'
126
  TformerTrajEncoder.normformer_norms = True
127
  TformerTrajEncoder.pos_emb = 'fixed'
128
- TformerTrajEncoder.sigma_reparam = False
129
-
130
- # Parameters for TransformerTurnEmbedding:
131
- # ==============================================================================
132
- # None.
 
8
  # ==============================================================================
9
  # None.
10
 
 
 
 
 
 
 
 
11
  # Parameters for Discrete:
12
  # ==============================================================================
13
  Discrete.clip_prob_high = 0.99
 
23
  Experiment.traj_save_len = 10000000000.0
24
  Experiment.wandb_group_name = None
25
 
26
+ # Parameters for FlashAttention:
27
+ # ==============================================================================
28
+ FlashAttention.window_size = (96, 0)
29
+
30
  # Parameters for MetamonAMAGOExperiment:
31
  # ==============================================================================
32
  MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
 
37
  MetamonAMAGOExperiment.lr_warmup_steps = 1500
38
  MetamonAMAGOExperiment.max_seq_len = 200
39
  MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
40
+ MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder
41
 
42
  # Parameters for MetamonMaskedResidualActor:
43
  # ==============================================================================
44
  MetamonMaskedResidualActor.activation = 'leaky_relu'
45
  MetamonMaskedResidualActor.continuous_dist_type = None
46
  MetamonMaskedResidualActor.dropout_p = 0.0
47
+ MetamonMaskedResidualActor.feature_dim = 400
48
  MetamonMaskedResidualActor.mask_illegal_actions = True
49
  MetamonMaskedResidualActor.normalization = 'layer'
50
  MetamonMaskedResidualActor.residual_blocks = 2
51
  MetamonMaskedResidualActor.residual_ff_dim = 512
52
 
53
+ # Parameters for MetamonPerceiverTstepEncoder:
54
  # ==============================================================================
55
+ MetamonPerceiverTstepEncoder.d_model = 108
56
+ MetamonPerceiverTstepEncoder.dropout = 0.05
57
+ MetamonPerceiverTstepEncoder.extra_emb_dim = 18
58
+ MetamonPerceiverTstepEncoder.latent_tokens = 8
59
+ MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128
60
+ MetamonPerceiverTstepEncoder.n_heads = 6
61
+ MetamonPerceiverTstepEncoder.n_layers = 5
62
+ MetamonPerceiverTstepEncoder.numerical_tokens = 6
63
+ MetamonPerceiverTstepEncoder.token_mask_aug = False
64
 
65
  # Parameters for Multigammas:
66
  # ==============================================================================
 
98
  NCriticsTwoHot.output_bins = 96
99
  NCriticsTwoHot.use_symlog = False
100
 
101
+ # Parameters for PerceiverTurnEmbedding:
102
+ # ==============================================================================
103
+ # None.
104
+
105
  # Parameters for PopArtLayer:
106
  # ==============================================================================
107
  PopArtLayer.beta = 0.0005
 
111
  # ==============================================================================
112
  # None.
113
 
114
+ # Parameters for SigmaReparam:
115
+ # ==============================================================================
116
+ SigmaReparam.bias = True
117
+ SigmaReparam.fast_init = False
118
+
119
  # Parameters for TformerTrajEncoder:
120
  # ==============================================================================
121
  TformerTrajEncoder.activation = 'leaky_relu'
122
+ TformerTrajEncoder.attention_type = @transformer.FlashAttention
123
  TformerTrajEncoder.d_ff = 3072
124
  TformerTrajEncoder.d_model = 768
125
  TformerTrajEncoder.dropout_attn = 0.0
 
132
  TformerTrajEncoder.norm = 'layer'
133
  TformerTrajEncoder.normformer_norms = True
134
  TformerTrajEncoder.pos_emb = 'fixed'
135
+ TformerTrajEncoder.sigma_reparam = True
 
 
 
 
kadabra2/ckpts/latest/policy.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68b9d7b7dbb389dc8fb66e92392384feb2a11b302b6e1130bfe32790cd1c177c
3
- size 233767193
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5749e207e2844d32798abe1e50b4a8a9e0ceede488c78fb3d5eb7518af8f5bbd
3
+ size 242350603
kadabra2/ckpts/policy_weights/policy_epoch_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23066ec5e029b29b95d8c2a41c18e3793a69a765bfd630814b5eaaf5b290bf65
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af96830f111687d973c2fa35f169735cfdb9eb3e24bdfc63ff3ad37e4e2720c
3
+ size 242356043
kadabra2/ckpts/policy_weights/policy_epoch_10.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb1dee5aa36e76bacdf1bb72c5428fa4c08865756573a27192f632df6b5634a3
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d88f19f754cbce4fed328a67f2ad1c714ff1bcc598ff8891ee97e0758cc41d2
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_12.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ed66cb98aa85736f05b94c939dc22e1885570d33326bf413bf73b938d26bca5
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b24fe2be2ceea834e9744d1926f37d66653aaa00e9435735e16a4fb3ce625d8
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_14.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de4aa18f93025d31bba32170207c1c09c712dd9b4ca0bd411bb23b3297900e01
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7e0f6bc0793d4354e5a4602e94d4a2af091349029b935545148036cef32419
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_16.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a07b872b14ff15a303a37a05beef6ffd72e56871056b465bfd012260b579b0f8
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79780c94dfad447bf7ece49ff9bbe5d39648dd79a50783d6681474391bc3b3e8
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_18.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cffb486f4256c6a82be7df6ac68e275f4a121d2516e37a6f5ac6bbdd0608d56
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c8f76c362ba6c8c71259f9b7e0a0e8985dae57ced4613e30fb4f19061d4c65
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92421ef958cb85f73f43172431d7d648103efde6bddb6cc128258c284f874bd7
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:907a87152482eb3301d29a1b47f0aac6a6bc10512e78831629f4e6f86600ae50
3
+ size 242356043
kadabra2/ckpts/policy_weights/policy_epoch_20.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fe5a8dc3a349176445ae8a82ab605180de6bdfd431a6c692eb38d4a16a74b8b
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b56aeb44738de6ffd9a8eb700033ce2bdb3d750255653aba3d35f1849721d09
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_22.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbc1c4c1091337fa7eeb24adff88405d0794bc319ad536335922a2327fa44a68
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684c23b11feb1d3dd461686aa751801d477d326cdc02e64f9f407ca90fbb168b
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_24.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d85d042459182e9510e5c1d806fc7bec25510d76af846d87b6c6612e8d55c4d
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:666557150f2ccc31c9ae7cae90282720eb6fad874ea9c7fcbbbd913313b8112d
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_26.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:134397c7107ce1278bf2021e571aa01340bb59270c17b7f4b052e79aee903e88
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91227a839e1b49c18cec8390eb2b1393d04c00198b04f471472adf1588615167
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_28.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:864d54aeeb99b5a78c9c2b447a4d20427c1f89ae0bff57476fe7ec14e9cc331d
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0114dd16ae4a25de2c95d64a4e18dd457908cfb9d81e8da60cd4c57b4488f813
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_30.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0549977f771affff1b8509e44f24bf48990fea824ae4d287090442a06634a637
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:471900ef9eebfee75ccd6f8f4fec1aaa2e9362731781c63fb8ea6c2ed721bda9
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_32.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04fd9a00c67e19b3aceff2248a6e5ba12aba59d56db9731ee40b6d03f5976a84
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081cf299f97c95bd2413943a1098741d565f25f449e2b030d47d40828be408c4
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_34.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35e3edcc5115e7e0911cf63171a0daa7f103ef5efb1235b19e09d8cc403921e1
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb4198d9774888e130de045f20b49dc00067d1ff80be099ee7510f4ddf2d534
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_36.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68e79bf875d6ed5c0303504509f35d451c95bd1155f38b7dbe541c96dc8b4227
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d7acbd7c048a3112c6d6460aea15e69f3c38a66bb52fba18b469188ea767900
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_38.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f7c199ee69f72250c88d4d5ff69efd2931bbfad19fd3304bf7669cccbe0060b
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03724e7ee53ee469aeea2469233c89eef616361067dc9646b0a6fe4dd3b2bfb7
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_4.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3af3e365fdc6db33abd761b7c192d84f4dad32adf0ab7979229c799d64a0fbce
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd907066db0bc80a556ccf983c129d5b2cfbb530ed4ba35bdfb17e71f3da8313
3
+ size 242356043
kadabra2/ckpts/policy_weights/policy_epoch_40.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d14a7aecd49e3a201d9354f17c7d0b3d5161b8d1546168b31d878a0818b7e82
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deaf36d8d2f07b0f2d03539f4e248432a2f372682f38bc8ff9d37a1fe983a0d3
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_42.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:468d24a4ae25a6d5a1aa58e9234403259c8d4e544727da5f1f7c3083f8c6af66
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe0b1324135a72207c4dc56a46273b9545f1059e16d4f04b260cb2e3d3e1d38
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_44.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba5a20cd3df727a11681e4a554ce8adc9e6cf5f81d38f0a59a3e661a987ec7f2
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1f1aa044874414b9af74fadd97a5a5a608b1d811f995409413f654d54c803d2
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_46.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc4ae28f50214281a494bebce96b85d3b58b20b07b7d742e9bb72f3f4f983c50
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da34f42d2c93355818f2b231258e7eca8a1886bfa7adc6998291e154b46cabd
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_48.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d8f9eff4f41fc552c5e154fa0659bcf6b4038706f166d34d35370be614df3cd
3
- size 233773103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c8bc82f45ecfe4a840ac885e352a81db36cf9149a3c598d22f15ec3cc60bea0
3
+ size 242356427
kadabra2/ckpts/policy_weights/policy_epoch_50.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:af3a6fe709abf8c057c43c8e09672c9614a29808ef7cdf3277ed98a2502cd933
3
- size 233773103
 
 
 
 
kadabra2/ckpts/policy_weights/policy_epoch_6.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cc117703af7788d33cb011bbffd621ac26467298020e39262e22511dfba9751
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb8ffb48da90b8cbf70e5367b97abd740614fcd3d8e0710f175513ea0490414a
3
+ size 242356043
kadabra2/ckpts/policy_weights/policy_epoch_8.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:466bb352d16d9191fb32271e4504f43ba8d9aa7115c279006818984f7f1533e0
3
- size 233772873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b92405dd27f4533a381de89f401d939d027a549fdd0242a4063c942c67db934
3
+ size 242356043
kadabra3/ckpts/config.txt CHANGED
@@ -34,7 +34,7 @@ MetamonAMAGOExperiment.critic_loss_weight = 13.5
34
  MetamonAMAGOExperiment.grad_clip = 1.5
35
  MetamonAMAGOExperiment.l2_coeff = 0.0001
36
  MetamonAMAGOExperiment.learning_rate = 0.000125
37
- MetamonAMAGOExperiment.lr_warmup_steps = 1500
38
  MetamonAMAGOExperiment.max_seq_len = 200
39
  MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
40
  MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder
@@ -79,7 +79,7 @@ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4
79
  MultiTaskAgent.num_critics = 6
80
  MultiTaskAgent.num_critics_td = 2
81
  MultiTaskAgent.offline_coeff = 1.0
82
- MultiTaskAgent.online_coeff = 0.1
83
  MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
84
  MultiTaskAgent.popart = True
85
  MultiTaskAgent.reward_multiplier = 10.0
 
34
  MetamonAMAGOExperiment.grad_clip = 1.5
35
  MetamonAMAGOExperiment.l2_coeff = 0.0001
36
  MetamonAMAGOExperiment.learning_rate = 0.000125
37
+ MetamonAMAGOExperiment.lr_warmup_steps = 2000
38
  MetamonAMAGOExperiment.max_seq_len = 200
39
  MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
40
  MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder
 
79
  MultiTaskAgent.num_critics = 6
80
  MultiTaskAgent.num_critics_td = 2
81
  MultiTaskAgent.offline_coeff = 1.0
82
+ MultiTaskAgent.online_coeff = 0.2
83
  MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
84
  MultiTaskAgent.popart = True
85
  MultiTaskAgent.reward_multiplier = 10.0
kadabra3/ckpts/latest/policy.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5749e207e2844d32798abe1e50b4a8a9e0ceede488c78fb3d5eb7518af8f5bbd
3
  size 242350603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c8350d37ca298fdeb6da01af1139cda58f76fe15de13ccad3c7ce3e8db411e
3
  size 242350603
kadabra3/ckpts/policy_weights/policy_epoch_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4af96830f111687d973c2fa35f169735cfdb9eb3e24bdfc63ff3ad37e4e2720c
3
  size 242356043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b35d802e2eb7b93e9f14e948cff889185aa1d3c57e7690e1e7224f716e466aab
3
  size 242356043
kadabra3/ckpts/policy_weights/policy_epoch_10.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d88f19f754cbce4fed328a67f2ad1c714ff1bcc598ff8891ee97e0758cc41d2
3
  size 242356427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:338ad5ee014194bff81d88c1fea6f2ada901d1346475ae99307fa991ec58f299
3
  size 242356427
kadabra3/ckpts/policy_weights/policy_epoch_12.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b24fe2be2ceea834e9744d1926f37d66653aaa00e9435735e16a4fb3ce625d8
3
  size 242356427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:663c1a71b7ef93c8f9c1c8ef3590ada41d79901960e69ce08ced90de612580c8
3
  size 242356427
kadabra3/ckpts/policy_weights/policy_epoch_14.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b7e0f6bc0793d4354e5a4602e94d4a2af091349029b935545148036cef32419
3
  size 242356427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb38fc0259716ca96108deadd949a8c0b084c51d9aecb4aa2a843d5928c679a
3
  size 242356427
kadabra3/ckpts/policy_weights/policy_epoch_16.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79780c94dfad447bf7ece49ff9bbe5d39648dd79a50783d6681474391bc3b3e8
3
  size 242356427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dfa1ca92e5d5c903fde654ff6dac6d54df8596b946bec141abcde3b0126a76
3
  size 242356427
kadabra3/ckpts/policy_weights/policy_epoch_18.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7c8f76c362ba6c8c71259f9b7e0a0e8985dae57ced4613e30fb4f19061d4c65
3
  size 242356427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ce128d73d8a7b9a8a7c0403dfb3c7cbf859d69395f46ed0d5d430ba16183e2
3
  size 242356427
kadabra3/ckpts/policy_weights/policy_epoch_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:907a87152482eb3301d29a1b47f0aac6a6bc10512e78831629f4e6f86600ae50
3
  size 242356043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3686a5fc89a3d23c5b3a4da0ec0accd088318708c3b0ab2236b42a1e790d899
3
  size 242356043
kadabra3/ckpts/policy_weights/policy_epoch_20.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b56aeb44738de6ffd9a8eb700033ce2bdb3d750255653aba3d35f1849721d09
3
  size 242356427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6531a3da103f12611a9e582376fd1b3abedd846f6b384473b63c3a79c03354a7
3
  size 242356427
kadabra3/ckpts/policy_weights/policy_epoch_22.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:684c23b11feb1d3dd461686aa751801d477d326cdc02e64f9f407ca90fbb168b
3
  size 242356427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f896a0f123c0746ac24780b4b9db19234bc353641c83d304bf1fcec75589ebc
3
  size 242356427
kadabra3/ckpts/policy_weights/policy_epoch_24.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:666557150f2ccc31c9ae7cae90282720eb6fad874ea9c7fcbbbd913313b8112d
3
- size 242356427
 
 
 
 
kadabra3/ckpts/policy_weights/policy_epoch_26.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:91227a839e1b49c18cec8390eb2b1393d04c00198b04f471472adf1588615167
3
- size 242356427
 
 
 
 
kadabra3/ckpts/policy_weights/policy_epoch_28.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0114dd16ae4a25de2c95d64a4e18dd457908cfb9d81e8da60cd4c57b4488f813
3
- size 242356427
 
 
 
 
kadabra3/ckpts/policy_weights/policy_epoch_30.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:471900ef9eebfee75ccd6f8f4fec1aaa2e9362731781c63fb8ea6c2ed721bda9
3
- size 242356427
 
 
 
 
kadabra3/ckpts/policy_weights/policy_epoch_32.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:081cf299f97c95bd2413943a1098741d565f25f449e2b030d47d40828be408c4
3
- size 242356427
 
 
 
 
kadabra3/ckpts/policy_weights/policy_epoch_34.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cb4198d9774888e130de045f20b49dc00067d1ff80be099ee7510f4ddf2d534
3
- size 242356427
 
 
 
 
kadabra3/ckpts/policy_weights/policy_epoch_36.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d7acbd7c048a3112c6d6460aea15e69f3c38a66bb52fba18b469188ea767900
3
- size 242356427
 
 
 
 
kadabra3/ckpts/policy_weights/policy_epoch_38.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:03724e7ee53ee469aeea2469233c89eef616361067dc9646b0a6fe4dd3b2bfb7
3
- size 242356427
 
 
 
 
kadabra3/ckpts/policy_weights/policy_epoch_4.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd907066db0bc80a556ccf983c129d5b2cfbb530ed4ba35bdfb17e71f3da8313
3
  size 242356043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5d9a6a446bb9d4009160473a206e2ccfd618f591bdf4f3b8a14a638a1f5e220
3
  size 242356043
kadabra3/ckpts/policy_weights/policy_epoch_40.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:deaf36d8d2f07b0f2d03539f4e248432a2f372682f38bc8ff9d37a1fe983a0d3
3
- size 242356427