jakegrigsby commited on
Commit
717dcf8
·
1 Parent(s): c5d1cb9
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. alakazam/alakazam/ckpts/config.txt +134 -0
  2. alakazam/ckpts/config.txt +132 -0
  3. alakazam/ckpts/latest/policy.pt +3 -0
  4. alakazam/ckpts/policy_weights/policy_epoch_0.pt +3 -0
  5. alakazam/ckpts/policy_weights/policy_epoch_10.pt +3 -0
  6. alakazam/ckpts/policy_weights/policy_epoch_12.pt +3 -0
  7. alakazam/ckpts/policy_weights/policy_epoch_14.pt +3 -0
  8. alakazam/ckpts/policy_weights/policy_epoch_16.pt +3 -0
  9. alakazam/ckpts/policy_weights/policy_epoch_18.pt +3 -0
  10. alakazam/ckpts/policy_weights/policy_epoch_2.pt +3 -0
  11. alakazam/ckpts/policy_weights/policy_epoch_20.pt +3 -0
  12. alakazam/ckpts/policy_weights/policy_epoch_22.pt +3 -0
  13. alakazam/ckpts/policy_weights/policy_epoch_24.pt +3 -0
  14. alakazam/ckpts/policy_weights/policy_epoch_26.pt +3 -0
  15. alakazam/ckpts/policy_weights/policy_epoch_28.pt +3 -0
  16. alakazam/ckpts/policy_weights/policy_epoch_30.pt +3 -0
  17. alakazam/ckpts/policy_weights/policy_epoch_32.pt +3 -0
  18. alakazam/ckpts/policy_weights/policy_epoch_34.pt +3 -0
  19. alakazam/ckpts/policy_weights/policy_epoch_36.pt +3 -0
  20. alakazam/ckpts/policy_weights/policy_epoch_38.pt +3 -0
  21. alakazam/ckpts/policy_weights/policy_epoch_4.pt +3 -0
  22. alakazam/ckpts/policy_weights/policy_epoch_40.pt +3 -0
  23. alakazam/ckpts/policy_weights/policy_epoch_42.pt +3 -0
  24. alakazam/ckpts/policy_weights/policy_epoch_44.pt +3 -0
  25. alakazam/ckpts/policy_weights/policy_epoch_46.pt +3 -0
  26. alakazam/ckpts/policy_weights/policy_epoch_48.pt +3 -0
  27. alakazam/ckpts/policy_weights/policy_epoch_50.pt +3 -0
  28. alakazam/ckpts/policy_weights/policy_epoch_6.pt +3 -0
  29. alakazam/ckpts/policy_weights/policy_epoch_8.pt +3 -0
  30. alakazam2/ckpts/config.txt +135 -0
  31. alakazam2/ckpts/latest/policy.pt +3 -0
  32. alakazam2/ckpts/policy_weights/policy_epoch_0.pt +3 -0
  33. alakazam2/ckpts/policy_weights/policy_epoch_10.pt +3 -0
  34. alakazam2/ckpts/policy_weights/policy_epoch_12.pt +3 -0
  35. alakazam2/ckpts/policy_weights/policy_epoch_14.pt +3 -0
  36. alakazam2/ckpts/policy_weights/policy_epoch_16.pt +3 -0
  37. alakazam2/ckpts/policy_weights/policy_epoch_18.pt +3 -0
  38. alakazam2/ckpts/policy_weights/policy_epoch_2.pt +3 -0
  39. alakazam2/ckpts/policy_weights/policy_epoch_20.pt +3 -0
  40. alakazam2/ckpts/policy_weights/policy_epoch_22.pt +3 -0
  41. alakazam2/ckpts/policy_weights/policy_epoch_24.pt +3 -0
  42. alakazam2/ckpts/policy_weights/policy_epoch_26.pt +3 -0
  43. alakazam2/ckpts/policy_weights/policy_epoch_28.pt +3 -0
  44. alakazam2/ckpts/policy_weights/policy_epoch_30.pt +3 -0
  45. alakazam2/ckpts/policy_weights/policy_epoch_32.pt +3 -0
  46. alakazam2/ckpts/policy_weights/policy_epoch_34.pt +3 -0
  47. alakazam2/ckpts/policy_weights/policy_epoch_36.pt +3 -0
  48. alakazam2/ckpts/policy_weights/policy_epoch_38.pt +3 -0
  49. alakazam2/ckpts/policy_weights/policy_epoch_4.pt +3 -0
  50. alakazam2/ckpts/policy_weights/policy_epoch_40.pt +3 -0
alakazam/alakazam/ckpts/config.txt ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import amago.agent
2
+ import amago.experiment as amago2
3
+ import amago.nets.actor_critic as amago3
4
+ import amago.nets.traj_encoders as amago4
5
+ import amago.nets.transformer as amago5
6
+
7
+ # Parameters for Agent:
8
+ # ==============================================================================
9
+ # None.
10
+
11
+ # Parameters for ClippedSlidingSinkAttention:
12
+ # ==============================================================================
13
+ ClippedSlidingSinkAttention.logit_clip = 50
14
+ ClippedSlidingSinkAttention.sink_bias = 0.0
15
+ ClippedSlidingSinkAttention.sink_size = 5
16
+ ClippedSlidingSinkAttention.window_size = 96
17
+
18
+ # Parameters for Discrete:
19
+ # ==============================================================================
20
+ Discrete.clip_prob_high = 0.99
21
+ Discrete.clip_prob_low = 0.001
22
+
23
+ # Parameters for Experiment:
24
+ # ==============================================================================
25
+ Experiment.batches_per_update = 1
26
+ Experiment.force_reset_train_envs_every = None
27
+ Experiment.has_dset_edit_rights = True
28
+ Experiment.mixed_precision = 'no'
29
+ Experiment.padded_sampling = 'none'
30
+ Experiment.sample_actions = True
31
+ Experiment.save_trajs_as = 'npz'
32
+ Experiment.traj_save_len = 10000000000.0
33
+ Experiment.wandb_group_name = None
34
+
35
+ # Parameters for MetamonAMAGOExperiment:
36
+ # ==============================================================================
37
+ MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
38
+ MetamonAMAGOExperiment.critic_loss_weight = 12.5
39
+ MetamonAMAGOExperiment.grad_clip = 1.5
40
+ MetamonAMAGOExperiment.l2_coeff = 0.0001
41
+ MetamonAMAGOExperiment.learning_rate = 0.000125
42
+ MetamonAMAGOExperiment.lr_warmup_steps = 1250
43
+ MetamonAMAGOExperiment.max_seq_len = 200
44
+ MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
45
+ MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder
46
+
47
+ # Parameters for MetamonMaskedResidualActor:
48
+ # ==============================================================================
49
+ MetamonMaskedResidualActor.activation = 'leaky_relu'
50
+ MetamonMaskedResidualActor.continuous_dist_type = None
51
+ MetamonMaskedResidualActor.dropout_p = 0.0
52
+ MetamonMaskedResidualActor.feature_dim = 256
53
+ MetamonMaskedResidualActor.mask_illegal_actions = True
54
+ MetamonMaskedResidualActor.normalization = 'layer'
55
+ MetamonMaskedResidualActor.residual_blocks = 2
56
+ MetamonMaskedResidualActor.residual_ff_dim = 512
57
+
58
+ # Parameters for MetamonTstepEncoder:
59
+ # ==============================================================================
60
+ MetamonTstepEncoder.d_model = 108
61
+ MetamonTstepEncoder.dropout = 0.05
62
+ MetamonTstepEncoder.extra_emb_dim = 18
63
+ MetamonTstepEncoder.n_heads = 6
64
+ MetamonTstepEncoder.n_layers = 4
65
+ MetamonTstepEncoder.numerical_tokens = 6
66
+ MetamonTstepEncoder.scratch_tokens = 6
67
+ MetamonTstepEncoder.token_mask_aug = False
68
+
69
+ # Parameters for Multigammas:
70
+ # ==============================================================================
71
+ Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
72
+ Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
73
+
74
+ # Parameters for MultiTaskAgent:
75
+ # ==============================================================================
76
+ MultiTaskAgent.actor_type = @MetamonMaskedResidualActor
77
+ MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot
78
+ MultiTaskAgent.fake_filter = False
79
+ MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter
80
+ MultiTaskAgent.gamma = 0.999
81
+ MultiTaskAgent.num_actions_for_value_in_actor_loss = 4
82
+ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4
83
+ MultiTaskAgent.num_critics = 6
84
+ MultiTaskAgent.num_critics_td = 2
85
+ MultiTaskAgent.offline_coeff = 1.0
86
+ MultiTaskAgent.online_coeff = 0.0
87
+ MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
88
+ MultiTaskAgent.popart = True
89
+ MultiTaskAgent.reward_multiplier = 10.0
90
+ MultiTaskAgent.tau = 0.004
91
+ MultiTaskAgent.use_multigamma = True
92
+ MultiTaskAgent.use_target_actor = True
93
+
94
+ # Parameters for NCriticsTwoHot:
95
+ # ==============================================================================
96
+ NCriticsTwoHot.activation = 'leaky_relu'
97
+ NCriticsTwoHot.d_hidden = 512
98
+ NCriticsTwoHot.dropout_p = 0.0
99
+ NCriticsTwoHot.max_return = 2100
100
+ NCriticsTwoHot.min_return = -100
101
+ NCriticsTwoHot.n_layers = 2
102
+ NCriticsTwoHot.output_bins = 96
103
+ NCriticsTwoHot.use_symlog = False
104
+
105
+ # Parameters for PopArtLayer:
106
+ # ==============================================================================
107
+ PopArtLayer.beta = 0.0005
108
+ PopArtLayer.init_nu = 100.0
109
+
110
+ # Parameters for ResidualActor:
111
+ # ==============================================================================
112
+ # None.
113
+
114
+ # Parameters for TformerTrajEncoder:
115
+ # ==============================================================================
116
+ TformerTrajEncoder.activation = 'leaky_relu'
117
+ TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention
118
+ TformerTrajEncoder.d_ff = 3072
119
+ TformerTrajEncoder.d_model = 768
120
+ TformerTrajEncoder.dropout_attn = 0.0
121
+ TformerTrajEncoder.dropout_emb = 0.05
122
+ TformerTrajEncoder.dropout_ff = 0.05
123
+ TformerTrajEncoder.dropout_qkv = 0.0
124
+ TformerTrajEncoder.head_scaling = True
125
+ TformerTrajEncoder.n_heads = 12
126
+ TformerTrajEncoder.n_layers = 6
127
+ TformerTrajEncoder.norm = 'layer'
128
+ TformerTrajEncoder.normformer_norms = True
129
+ TformerTrajEncoder.pos_emb = 'fixed'
130
+ TformerTrajEncoder.sigma_reparam = False
131
+
132
+ # Parameters for TransformerTurnEmbedding:
133
+ # ==============================================================================
134
+ # None.
alakazam/ckpts/config.txt ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import amago.agent
2
+ import amago.experiment as amago2
3
+ import amago.nets.actor_critic as amago3
4
+ import amago.nets.traj_encoders as amago4
5
+ import amago.nets.transformer as amago5
6
+
7
+ # Parameters for Agent:
8
+ # ==============================================================================
9
+ # None.
10
+
11
+ # Parameters for ClippedSlidingSinkAttention:
12
+ # ==============================================================================
13
+ ClippedSlidingSinkAttention.logit_clip = 50
14
+ ClippedSlidingSinkAttention.sink_bias = 0.0
15
+ ClippedSlidingSinkAttention.sink_size = 5
16
+ ClippedSlidingSinkAttention.window_size = 96
17
+
18
+ # Parameters for Discrete:
19
+ # ==============================================================================
20
+ Discrete.clip_prob_high = 0.99
21
+ Discrete.clip_prob_low = 0.001
22
+
23
+ # Parameters for Experiment:
24
+ # ==============================================================================
25
+ Experiment.always_load_latest = False
26
+ Experiment.always_save_latest = True
27
+ Experiment.has_dset_edit_rights = True
28
+ Experiment.save_trajs_as = 'npz'
29
+ Experiment.stagger_traj_file_lengths = True
30
+ Experiment.traj_save_len = 10000000000.0
31
+ Experiment.wandb_group_name = None
32
+
33
+ # Parameters for MetamonAMAGOExperiment:
34
+ # ==============================================================================
35
+ MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
36
+ MetamonAMAGOExperiment.critic_loss_weight = 13.5
37
+ MetamonAMAGOExperiment.grad_clip = 1.5
38
+ MetamonAMAGOExperiment.l2_coeff = 0.0001
39
+ MetamonAMAGOExperiment.learning_rate = 0.000125
40
+ MetamonAMAGOExperiment.lr_warmup_steps = 1500
41
+ MetamonAMAGOExperiment.max_seq_len = 200
42
+ MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
43
+ MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder
44
+
45
+ # Parameters for MetamonMaskedResidualActor:
46
+ # ==============================================================================
47
+ MetamonMaskedResidualActor.activation = 'leaky_relu'
48
+ MetamonMaskedResidualActor.continuous_dist_type = None
49
+ MetamonMaskedResidualActor.dropout_p = 0.0
50
+ MetamonMaskedResidualActor.feature_dim = 256
51
+ MetamonMaskedResidualActor.mask_illegal_actions = True
52
+ MetamonMaskedResidualActor.normalization = 'layer'
53
+ MetamonMaskedResidualActor.residual_blocks = 2
54
+ MetamonMaskedResidualActor.residual_ff_dim = 512
55
+
56
+ # Parameters for MetamonTstepEncoder:
57
+ # ==============================================================================
58
+ MetamonTstepEncoder.d_model = 108
59
+ MetamonTstepEncoder.dropout = 0.05
60
+ MetamonTstepEncoder.extra_emb_dim = 18
61
+ MetamonTstepEncoder.n_heads = 6
62
+ MetamonTstepEncoder.n_layers = 4
63
+ MetamonTstepEncoder.numerical_tokens = 6
64
+ MetamonTstepEncoder.scratch_tokens = 6
65
+ MetamonTstepEncoder.token_mask_aug = False
66
+
67
+ # Parameters for Multigammas:
68
+ # ==============================================================================
69
+ Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
70
+ Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
71
+
72
+ # Parameters for MultiTaskAgent:
73
+ # ==============================================================================
74
+ MultiTaskAgent.actor_type = @MetamonMaskedResidualActor
75
+ MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot
76
+ MultiTaskAgent.fake_filter = False
77
+ MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter
78
+ MultiTaskAgent.gamma = 0.999
79
+ MultiTaskAgent.num_actions_for_value_in_actor_loss = 4
80
+ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4
81
+ MultiTaskAgent.num_critics = 6
82
+ MultiTaskAgent.num_critics_td = 2
83
+ MultiTaskAgent.offline_coeff = 1.0
84
+ MultiTaskAgent.online_coeff = 0.1
85
+ MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
86
+ MultiTaskAgent.popart = True
87
+ MultiTaskAgent.reward_multiplier = 10.0
88
+ MultiTaskAgent.tau = 0.008
89
+ MultiTaskAgent.use_multigamma = True
90
+ MultiTaskAgent.use_target_actor = True
91
+
92
+ # Parameters for NCriticsTwoHot:
93
+ # ==============================================================================
94
+ NCriticsTwoHot.activation = 'leaky_relu'
95
+ NCriticsTwoHot.d_hidden = 512
96
+ NCriticsTwoHot.dropout_p = 0.0
97
+ NCriticsTwoHot.max_return = 2100
98
+ NCriticsTwoHot.min_return = -100
99
+ NCriticsTwoHot.n_layers = 2
100
+ NCriticsTwoHot.output_bins = 96
101
+ NCriticsTwoHot.use_symlog = False
102
+
103
+ # Parameters for PopArtLayer:
104
+ # ==============================================================================
105
+ PopArtLayer.beta = 0.0005
106
+ PopArtLayer.init_nu = 100.0
107
+
108
+ # Parameters for ResidualActor:
109
+ # ==============================================================================
110
+ # None.
111
+
112
+ # Parameters for TformerTrajEncoder:
113
+ # ==============================================================================
114
+ TformerTrajEncoder.activation = 'leaky_relu'
115
+ TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention
116
+ TformerTrajEncoder.d_ff = 3072
117
+ TformerTrajEncoder.d_model = 768
118
+ TformerTrajEncoder.dropout_attn = 0.0
119
+ TformerTrajEncoder.dropout_emb = 0.05
120
+ TformerTrajEncoder.dropout_ff = 0.05
121
+ TformerTrajEncoder.dropout_qkv = 0.0
122
+ TformerTrajEncoder.head_scaling = True
123
+ TformerTrajEncoder.n_heads = 12
124
+ TformerTrajEncoder.n_layers = 6
125
+ TformerTrajEncoder.norm = 'layer'
126
+ TformerTrajEncoder.normformer_norms = True
127
+ TformerTrajEncoder.pos_emb = 'fixed'
128
+ TformerTrajEncoder.sigma_reparam = False
129
+
130
+ # Parameters for TransformerTurnEmbedding:
131
+ # ==============================================================================
132
+ # None.
alakazam/ckpts/latest/policy.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b9d7b7dbb389dc8fb66e92392384feb2a11b302b6e1130bfe32790cd1c177c
3
+ size 233767193
alakazam/ckpts/policy_weights/policy_epoch_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23066ec5e029b29b95d8c2a41c18e3793a69a765bfd630814b5eaaf5b290bf65
3
+ size 233772873
alakazam/ckpts/policy_weights/policy_epoch_10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb1dee5aa36e76bacdf1bb72c5428fa4c08865756573a27192f632df6b5634a3
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_12.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ed66cb98aa85736f05b94c939dc22e1885570d33326bf413bf73b938d26bca5
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de4aa18f93025d31bba32170207c1c09c712dd9b4ca0bd411bb23b3297900e01
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_16.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a07b872b14ff15a303a37a05beef6ffd72e56871056b465bfd012260b579b0f8
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_18.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cffb486f4256c6a82be7df6ac68e275f4a121d2516e37a6f5ac6bbdd0608d56
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92421ef958cb85f73f43172431d7d648103efde6bddb6cc128258c284f874bd7
3
+ size 233772873
alakazam/ckpts/policy_weights/policy_epoch_20.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe5a8dc3a349176445ae8a82ab605180de6bdfd431a6c692eb38d4a16a74b8b
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_22.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc1c4c1091337fa7eeb24adff88405d0794bc319ad536335922a2327fa44a68
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_24.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d85d042459182e9510e5c1d806fc7bec25510d76af846d87b6c6612e8d55c4d
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_26.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:134397c7107ce1278bf2021e571aa01340bb59270c17b7f4b052e79aee903e88
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_28.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:864d54aeeb99b5a78c9c2b447a4d20427c1f89ae0bff57476fe7ec14e9cc331d
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_30.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0549977f771affff1b8509e44f24bf48990fea824ae4d287090442a06634a637
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04fd9a00c67e19b3aceff2248a6e5ba12aba59d56db9731ee40b6d03f5976a84
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_34.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35e3edcc5115e7e0911cf63171a0daa7f103ef5efb1235b19e09d8cc403921e1
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_36.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e79bf875d6ed5c0303504509f35d451c95bd1155f38b7dbe541c96dc8b4227
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_38.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f7c199ee69f72250c88d4d5ff69efd2931bbfad19fd3304bf7669cccbe0060b
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3af3e365fdc6db33abd761b7c192d84f4dad32adf0ab7979229c799d64a0fbce
3
+ size 233772873
alakazam/ckpts/policy_weights/policy_epoch_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d14a7aecd49e3a201d9354f17c7d0b3d5161b8d1546168b31d878a0818b7e82
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_42.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:468d24a4ae25a6d5a1aa58e9234403259c8d4e544727da5f1f7c3083f8c6af66
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_44.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba5a20cd3df727a11681e4a554ce8adc9e6cf5f81d38f0a59a3e661a987ec7f2
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_46.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc4ae28f50214281a494bebce96b85d3b58b20b07b7d742e9bb72f3f4f983c50
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_48.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d8f9eff4f41fc552c5e154fa0659bcf6b4038706f166d34d35370be614df3cd
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_50.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af3a6fe709abf8c057c43c8e09672c9614a29808ef7cdf3277ed98a2502cd933
3
+ size 233773103
alakazam/ckpts/policy_weights/policy_epoch_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cc117703af7788d33cb011bbffd621ac26467298020e39262e22511dfba9751
3
+ size 233772873
alakazam/ckpts/policy_weights/policy_epoch_8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466bb352d16d9191fb32271e4504f43ba8d9aa7115c279006818984f7f1533e0
3
+ size 233772873
alakazam2/ckpts/config.txt ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import amago.agent
2
+ import amago.experiment as amago2
3
+ import amago.nets.actor_critic as amago3
4
+ import amago.nets.traj_encoders as amago4
5
+ import amago.nets.transformer as amago5
6
+
7
+ # Parameters for Agent:
8
+ # ==============================================================================
9
+ # None.
10
+
11
+ # Parameters for Discrete:
12
+ # ==============================================================================
13
+ Discrete.clip_prob_high = 0.99
14
+ Discrete.clip_prob_low = 0.001
15
+
16
+ # Parameters for Experiment:
17
+ # ==============================================================================
18
+ Experiment.always_load_latest = False
19
+ Experiment.always_save_latest = True
20
+ Experiment.has_dset_edit_rights = True
21
+ Experiment.save_trajs_as = 'npz'
22
+ Experiment.stagger_traj_file_lengths = True
23
+ Experiment.traj_save_len = 10000000000.0
24
+ Experiment.wandb_group_name = None
25
+
26
+ # Parameters for FlashAttention:
27
+ # ==============================================================================
28
+ FlashAttention.window_size = (96, 0)
29
+
30
+ # Parameters for MetamonAMAGOExperiment:
31
+ # ==============================================================================
32
+ MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
33
+ MetamonAMAGOExperiment.critic_loss_weight = 13.5
34
+ MetamonAMAGOExperiment.grad_clip = 1.5
35
+ MetamonAMAGOExperiment.l2_coeff = 0.0001
36
+ MetamonAMAGOExperiment.learning_rate = 0.000125
37
+ MetamonAMAGOExperiment.lr_warmup_steps = 1500
38
+ MetamonAMAGOExperiment.max_seq_len = 200
39
+ MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
40
+ MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder
41
+
42
+ # Parameters for MetamonMaskedResidualActor:
43
+ # ==============================================================================
44
+ MetamonMaskedResidualActor.activation = 'leaky_relu'
45
+ MetamonMaskedResidualActor.continuous_dist_type = None
46
+ MetamonMaskedResidualActor.dropout_p = 0.0
47
+ MetamonMaskedResidualActor.feature_dim = 400
48
+ MetamonMaskedResidualActor.mask_illegal_actions = True
49
+ MetamonMaskedResidualActor.normalization = 'layer'
50
+ MetamonMaskedResidualActor.residual_blocks = 2
51
+ MetamonMaskedResidualActor.residual_ff_dim = 512
52
+
53
+ # Parameters for MetamonPerceiverTstepEncoder:
54
+ # ==============================================================================
55
+ MetamonPerceiverTstepEncoder.d_model = 108
56
+ MetamonPerceiverTstepEncoder.dropout = 0.05
57
+ MetamonPerceiverTstepEncoder.extra_emb_dim = 18
58
+ MetamonPerceiverTstepEncoder.latent_tokens = 8
59
+ MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128
60
+ MetamonPerceiverTstepEncoder.n_heads = 6
61
+ MetamonPerceiverTstepEncoder.n_layers = 5
62
+ MetamonPerceiverTstepEncoder.numerical_tokens = 6
63
+ MetamonPerceiverTstepEncoder.token_mask_aug = False
64
+
65
+ # Parameters for Multigammas:
66
+ # ==============================================================================
67
+ Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
68
+ Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
69
+
70
+ # Parameters for MultiTaskAgent:
71
+ # ==============================================================================
72
+ MultiTaskAgent.actor_type = @MetamonMaskedResidualActor
73
+ MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot
74
+ MultiTaskAgent.fake_filter = False
75
+ MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter
76
+ MultiTaskAgent.gamma = 0.999
77
+ MultiTaskAgent.num_actions_for_value_in_actor_loss = 4
78
+ MultiTaskAgent.num_actions_for_value_in_critic_loss = 4
79
+ MultiTaskAgent.num_critics = 6
80
+ MultiTaskAgent.num_critics_td = 2
81
+ MultiTaskAgent.offline_coeff = 1.0
82
+ MultiTaskAgent.online_coeff = 0.1
83
+ MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
84
+ MultiTaskAgent.popart = True
85
+ MultiTaskAgent.reward_multiplier = 10.0
86
+ MultiTaskAgent.tau = 0.008
87
+ MultiTaskAgent.use_multigamma = True
88
+ MultiTaskAgent.use_target_actor = True
89
+
90
+ # Parameters for NCriticsTwoHot:
91
+ # ==============================================================================
92
+ NCriticsTwoHot.activation = 'leaky_relu'
93
+ NCriticsTwoHot.d_hidden = 512
94
+ NCriticsTwoHot.dropout_p = 0.0
95
+ NCriticsTwoHot.max_return = 2100
96
+ NCriticsTwoHot.min_return = -100
97
+ NCriticsTwoHot.n_layers = 2
98
+ NCriticsTwoHot.output_bins = 96
99
+ NCriticsTwoHot.use_symlog = False
100
+
101
+ # Parameters for PerceiverTurnEmbedding:
102
+ # ==============================================================================
103
+ # None.
104
+
105
+ # Parameters for PopArtLayer:
106
+ # ==============================================================================
107
+ PopArtLayer.beta = 0.0005
108
+ PopArtLayer.init_nu = 100.0
109
+
110
+ # Parameters for ResidualActor:
111
+ # ==============================================================================
112
+ # None.
113
+
114
+ # Parameters for SigmaReparam:
115
+ # ==============================================================================
116
+ SigmaReparam.bias = True
117
+ SigmaReparam.fast_init = False
118
+
119
+ # Parameters for TformerTrajEncoder:
120
+ # ==============================================================================
121
+ TformerTrajEncoder.activation = 'leaky_relu'
122
+ TformerTrajEncoder.attention_type = @transformer.FlashAttention
123
+ TformerTrajEncoder.d_ff = 3072
124
+ TformerTrajEncoder.d_model = 768
125
+ TformerTrajEncoder.dropout_attn = 0.0
126
+ TformerTrajEncoder.dropout_emb = 0.05
127
+ TformerTrajEncoder.dropout_ff = 0.05
128
+ TformerTrajEncoder.dropout_qkv = 0.0
129
+ TformerTrajEncoder.head_scaling = True
130
+ TformerTrajEncoder.n_heads = 12
131
+ TformerTrajEncoder.n_layers = 6
132
+ TformerTrajEncoder.norm = 'layer'
133
+ TformerTrajEncoder.normformer_norms = True
134
+ TformerTrajEncoder.pos_emb = 'fixed'
135
+ TformerTrajEncoder.sigma_reparam = True
alakazam2/ckpts/latest/policy.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5749e207e2844d32798abe1e50b4a8a9e0ceede488c78fb3d5eb7518af8f5bbd
3
+ size 242350603
alakazam2/ckpts/policy_weights/policy_epoch_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af96830f111687d973c2fa35f169735cfdb9eb3e24bdfc63ff3ad37e4e2720c
3
+ size 242356043
alakazam2/ckpts/policy_weights/policy_epoch_10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d88f19f754cbce4fed328a67f2ad1c714ff1bcc598ff8891ee97e0758cc41d2
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_12.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b24fe2be2ceea834e9744d1926f37d66653aaa00e9435735e16a4fb3ce625d8
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7e0f6bc0793d4354e5a4602e94d4a2af091349029b935545148036cef32419
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_16.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79780c94dfad447bf7ece49ff9bbe5d39648dd79a50783d6681474391bc3b3e8
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_18.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c8f76c362ba6c8c71259f9b7e0a0e8985dae57ced4613e30fb4f19061d4c65
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:907a87152482eb3301d29a1b47f0aac6a6bc10512e78831629f4e6f86600ae50
3
+ size 242356043
alakazam2/ckpts/policy_weights/policy_epoch_20.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b56aeb44738de6ffd9a8eb700033ce2bdb3d750255653aba3d35f1849721d09
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_22.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684c23b11feb1d3dd461686aa751801d477d326cdc02e64f9f407ca90fbb168b
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_24.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:666557150f2ccc31c9ae7cae90282720eb6fad874ea9c7fcbbbd913313b8112d
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_26.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91227a839e1b49c18cec8390eb2b1393d04c00198b04f471472adf1588615167
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_28.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0114dd16ae4a25de2c95d64a4e18dd457908cfb9d81e8da60cd4c57b4488f813
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_30.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:471900ef9eebfee75ccd6f8f4fec1aaa2e9362731781c63fb8ea6c2ed721bda9
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081cf299f97c95bd2413943a1098741d565f25f449e2b030d47d40828be408c4
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_34.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb4198d9774888e130de045f20b49dc00067d1ff80be099ee7510f4ddf2d534
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_36.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d7acbd7c048a3112c6d6460aea15e69f3c38a66bb52fba18b469188ea767900
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_38.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03724e7ee53ee469aeea2469233c89eef616361067dc9646b0a6fe4dd3b2bfb7
3
+ size 242356427
alakazam2/ckpts/policy_weights/policy_epoch_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd907066db0bc80a556ccf983c129d5b2cfbb530ed4ba35bdfb17e71f3da8313
3
+ size 242356043
alakazam2/ckpts/policy_weights/policy_epoch_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deaf36d8d2f07b0f2d03539f4e248432a2f372682f38bc8ff9d37a1fe983a0d3
3
+ size 242356427