diff --git a/alakazam/alakazam/ckpts/config.txt b/alakazam/alakazam/ckpts/config.txt new file mode 100755 index 0000000000000000000000000000000000000000..978c33778add791d2e229f370bc71e06b25be9ff --- /dev/null +++ b/alakazam/alakazam/ckpts/config.txt @@ -0,0 +1,134 @@ +import amago.agent +import amago.experiment as amago2 +import amago.nets.actor_critic as amago3 +import amago.nets.traj_encoders as amago4 +import amago.nets.transformer as amago5 + +# Parameters for Agent: +# ============================================================================== +# None. + +# Parameters for ClippedSlidingSinkAttention: +# ============================================================================== +ClippedSlidingSinkAttention.logit_clip = 50 +ClippedSlidingSinkAttention.sink_bias = 0.0 +ClippedSlidingSinkAttention.sink_size = 5 +ClippedSlidingSinkAttention.window_size = 96 + +# Parameters for Discrete: +# ============================================================================== +Discrete.clip_prob_high = 0.99 +Discrete.clip_prob_low = 0.001 + +# Parameters for Experiment: +# ============================================================================== +Experiment.batches_per_update = 1 +Experiment.force_reset_train_envs_every = None +Experiment.has_dset_edit_rights = True +Experiment.mixed_precision = 'no' +Experiment.padded_sampling = 'none' +Experiment.sample_actions = True +Experiment.save_trajs_as = 'npz' +Experiment.traj_save_len = 10000000000.0 +Experiment.wandb_group_name = None + +# Parameters for MetamonAMAGOExperiment: +# ============================================================================== +MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent +MetamonAMAGOExperiment.critic_loss_weight = 12.5 +MetamonAMAGOExperiment.grad_clip = 1.5 +MetamonAMAGOExperiment.l2_coeff = 0.0001 +MetamonAMAGOExperiment.learning_rate = 0.000125 +MetamonAMAGOExperiment.lr_warmup_steps = 1250 +MetamonAMAGOExperiment.max_seq_len = 200 +MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder + +# Parameters for MetamonMaskedResidualActor: +# ============================================================================== +MetamonMaskedResidualActor.activation = 'leaky_relu' +MetamonMaskedResidualActor.continuous_dist_type = None +MetamonMaskedResidualActor.dropout_p = 0.0 +MetamonMaskedResidualActor.feature_dim = 256 +MetamonMaskedResidualActor.mask_illegal_actions = True +MetamonMaskedResidualActor.normalization = 'layer' +MetamonMaskedResidualActor.residual_blocks = 2 +MetamonMaskedResidualActor.residual_ff_dim = 512 + +# Parameters for MetamonTstepEncoder: +# ============================================================================== +MetamonTstepEncoder.d_model = 108 +MetamonTstepEncoder.dropout = 0.05 +MetamonTstepEncoder.extra_emb_dim = 18 +MetamonTstepEncoder.n_heads = 6 +MetamonTstepEncoder.n_layers = 4 +MetamonTstepEncoder.numerical_tokens = 6 +MetamonTstepEncoder.scratch_tokens = 6 +MetamonTstepEncoder.token_mask_aug = False + +# Parameters for Multigammas: +# ============================================================================== +Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] +Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] + +# Parameters for MultiTaskAgent: +# ============================================================================== +MultiTaskAgent.actor_type = @MetamonMaskedResidualActor +MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot +MultiTaskAgent.fake_filter = False +MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter +MultiTaskAgent.gamma = 0.999 +MultiTaskAgent.num_actions_for_value_in_actor_loss = 4 +MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 +MultiTaskAgent.num_critics = 6 +MultiTaskAgent.num_critics_td = 2 +MultiTaskAgent.offline_coeff = 1.0 +MultiTaskAgent.online_coeff = 0.0 +MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] +MultiTaskAgent.popart = True +MultiTaskAgent.reward_multiplier = 10.0 +MultiTaskAgent.tau = 0.004 +MultiTaskAgent.use_multigamma = True +MultiTaskAgent.use_target_actor = True + +# Parameters for NCriticsTwoHot: +# ============================================================================== +NCriticsTwoHot.activation = 'leaky_relu' +NCriticsTwoHot.d_hidden = 512 +NCriticsTwoHot.dropout_p = 0.0 +NCriticsTwoHot.max_return = 2100 +NCriticsTwoHot.min_return = -100 +NCriticsTwoHot.n_layers = 2 +NCriticsTwoHot.output_bins = 96 +NCriticsTwoHot.use_symlog = False + +# Parameters for PopArtLayer: +# ============================================================================== +PopArtLayer.beta = 0.0005 +PopArtLayer.init_nu = 100.0 + +# Parameters for ResidualActor: +# ============================================================================== +# None. + +# Parameters for TformerTrajEncoder: +# ============================================================================== +TformerTrajEncoder.activation = 'leaky_relu' +TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention +TformerTrajEncoder.d_ff = 3072 +TformerTrajEncoder.d_model = 768 +TformerTrajEncoder.dropout_attn = 0.0 +TformerTrajEncoder.dropout_emb = 0.05 +TformerTrajEncoder.dropout_ff = 0.05 +TformerTrajEncoder.dropout_qkv = 0.0 +TformerTrajEncoder.head_scaling = True +TformerTrajEncoder.n_heads = 12 +TformerTrajEncoder.n_layers = 6 +TformerTrajEncoder.norm = 'layer' +TformerTrajEncoder.normformer_norms = True +TformerTrajEncoder.pos_emb = 'fixed' +TformerTrajEncoder.sigma_reparam = False + +# Parameters for TransformerTurnEmbedding: +# ============================================================================== +# None. diff --git a/alakazam/ckpts/config.txt b/alakazam/ckpts/config.txt new file mode 100755 index 0000000000000000000000000000000000000000..f1961e50085a6f3a24d443740fda8bd81080eb12 --- /dev/null +++ b/alakazam/ckpts/config.txt @@ -0,0 +1,132 @@ +import amago.agent +import amago.experiment as amago2 +import amago.nets.actor_critic as amago3 +import amago.nets.traj_encoders as amago4 +import amago.nets.transformer as amago5 + +# Parameters for Agent: +# ============================================================================== +# None. + +# Parameters for ClippedSlidingSinkAttention: +# ============================================================================== +ClippedSlidingSinkAttention.logit_clip = 50 +ClippedSlidingSinkAttention.sink_bias = 0.0 +ClippedSlidingSinkAttention.sink_size = 5 +ClippedSlidingSinkAttention.window_size = 96 + +# Parameters for Discrete: +# ============================================================================== +Discrete.clip_prob_high = 0.99 +Discrete.clip_prob_low = 0.001 + +# Parameters for Experiment: +# ============================================================================== +Experiment.always_load_latest = False +Experiment.always_save_latest = True +Experiment.has_dset_edit_rights = True +Experiment.save_trajs_as = 'npz' +Experiment.stagger_traj_file_lengths = True +Experiment.traj_save_len = 10000000000.0 +Experiment.wandb_group_name = None + +# Parameters for MetamonAMAGOExperiment: +# ============================================================================== +MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent +MetamonAMAGOExperiment.critic_loss_weight = 13.5 +MetamonAMAGOExperiment.grad_clip = 1.5 +MetamonAMAGOExperiment.l2_coeff = 0.0001 +MetamonAMAGOExperiment.learning_rate = 0.000125 +MetamonAMAGOExperiment.lr_warmup_steps = 1500 +MetamonAMAGOExperiment.max_seq_len = 200 +MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder + +# Parameters for MetamonMaskedResidualActor: +# ============================================================================== +MetamonMaskedResidualActor.activation = 'leaky_relu' +MetamonMaskedResidualActor.continuous_dist_type = None +MetamonMaskedResidualActor.dropout_p = 0.0 +MetamonMaskedResidualActor.feature_dim = 256 +MetamonMaskedResidualActor.mask_illegal_actions = True +MetamonMaskedResidualActor.normalization = 'layer' +MetamonMaskedResidualActor.residual_blocks = 2 +MetamonMaskedResidualActor.residual_ff_dim = 512 + +# Parameters for MetamonTstepEncoder: +# ============================================================================== +MetamonTstepEncoder.d_model = 108 +MetamonTstepEncoder.dropout = 0.05 +MetamonTstepEncoder.extra_emb_dim = 18 +MetamonTstepEncoder.n_heads = 6 +MetamonTstepEncoder.n_layers = 4 +MetamonTstepEncoder.numerical_tokens = 6 +MetamonTstepEncoder.scratch_tokens = 6 +MetamonTstepEncoder.token_mask_aug = False + +# Parameters for Multigammas: +# ============================================================================== +Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] +Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] + +# Parameters for MultiTaskAgent: +# ============================================================================== +MultiTaskAgent.actor_type = @MetamonMaskedResidualActor +MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot +MultiTaskAgent.fake_filter = False +MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter +MultiTaskAgent.gamma = 0.999 +MultiTaskAgent.num_actions_for_value_in_actor_loss = 4 +MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 +MultiTaskAgent.num_critics = 6 +MultiTaskAgent.num_critics_td = 2 +MultiTaskAgent.offline_coeff = 1.0 +MultiTaskAgent.online_coeff = 0.1 +MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] +MultiTaskAgent.popart = True +MultiTaskAgent.reward_multiplier = 10.0 +MultiTaskAgent.tau = 0.008 +MultiTaskAgent.use_multigamma = True +MultiTaskAgent.use_target_actor = True + +# Parameters for NCriticsTwoHot: +# ============================================================================== +NCriticsTwoHot.activation = 'leaky_relu' +NCriticsTwoHot.d_hidden = 512 +NCriticsTwoHot.dropout_p = 0.0 +NCriticsTwoHot.max_return = 2100 +NCriticsTwoHot.min_return = -100 +NCriticsTwoHot.n_layers = 2 +NCriticsTwoHot.output_bins = 96 +NCriticsTwoHot.use_symlog = False + +# Parameters for PopArtLayer: +# ============================================================================== +PopArtLayer.beta = 0.0005 +PopArtLayer.init_nu = 100.0 + +# Parameters for ResidualActor: +# ============================================================================== +# None. + +# Parameters for TformerTrajEncoder: +# ============================================================================== +TformerTrajEncoder.activation = 'leaky_relu' +TformerTrajEncoder.attention_type = @transformer.ClippedSlidingSinkAttention +TformerTrajEncoder.d_ff = 3072 +TformerTrajEncoder.d_model = 768 +TformerTrajEncoder.dropout_attn = 0.0 +TformerTrajEncoder.dropout_emb = 0.05 +TformerTrajEncoder.dropout_ff = 0.05 +TformerTrajEncoder.dropout_qkv = 0.0 +TformerTrajEncoder.head_scaling = True +TformerTrajEncoder.n_heads = 12 +TformerTrajEncoder.n_layers = 6 +TformerTrajEncoder.norm = 'layer' +TformerTrajEncoder.normformer_norms = True +TformerTrajEncoder.pos_emb = 'fixed' +TformerTrajEncoder.sigma_reparam = False + +# Parameters for TransformerTurnEmbedding: +# ============================================================================== +# None. diff --git a/alakazam/ckpts/latest/policy.pt b/alakazam/ckpts/latest/policy.pt new file mode 100755 index 0000000000000000000000000000000000000000..d3c0c7e685f9de430f82275c0f7c7d26b93d5a78 --- /dev/null +++ b/alakazam/ckpts/latest/policy.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b9d7b7dbb389dc8fb66e92392384feb2a11b302b6e1130bfe32790cd1c177c +size 233767193 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_0.pt b/alakazam/ckpts/policy_weights/policy_epoch_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..67b27f7269a0793e722ecb4731c8d9bb171ec071 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23066ec5e029b29b95d8c2a41c18e3793a69a765bfd630814b5eaaf5b290bf65 +size 233772873 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_10.pt b/alakazam/ckpts/policy_weights/policy_epoch_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..9e44cc543c4ce9336131291ef5c517d502b97b50 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb1dee5aa36e76bacdf1bb72c5428fa4c08865756573a27192f632df6b5634a3 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_12.pt b/alakazam/ckpts/policy_weights/policy_epoch_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..dee1392283f80302f90c1defa73ba8c21c739389 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed66cb98aa85736f05b94c939dc22e1885570d33326bf413bf73b938d26bca5 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_14.pt b/alakazam/ckpts/policy_weights/policy_epoch_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..fae750487bffb29190849ee9aba34a16088c4648 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de4aa18f93025d31bba32170207c1c09c712dd9b4ca0bd411bb23b3297900e01 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_16.pt b/alakazam/ckpts/policy_weights/policy_epoch_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..2749bfbc503f72b1d8e755b0a3e4dfbc6c491561 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07b872b14ff15a303a37a05beef6ffd72e56871056b465bfd012260b579b0f8 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_18.pt b/alakazam/ckpts/policy_weights/policy_epoch_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..9e804ac1b0eeb482669093eb00b7cb7b592155d8 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cffb486f4256c6a82be7df6ac68e275f4a121d2516e37a6f5ac6bbdd0608d56 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_2.pt b/alakazam/ckpts/policy_weights/policy_epoch_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..c6dd8b1bc34560f82ebb242d3a2c50f94c29b15e --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92421ef958cb85f73f43172431d7d648103efde6bddb6cc128258c284f874bd7 +size 233772873 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_20.pt b/alakazam/ckpts/policy_weights/policy_epoch_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..d4660356d646e858a4f12c509f8bcfbd79a0ac54 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe5a8dc3a349176445ae8a82ab605180de6bdfd431a6c692eb38d4a16a74b8b +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_22.pt b/alakazam/ckpts/policy_weights/policy_epoch_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..6f8a486c773c10fdc24f8dd08e25fb6bfb9923fd --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc1c4c1091337fa7eeb24adff88405d0794bc319ad536335922a2327fa44a68 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_24.pt b/alakazam/ckpts/policy_weights/policy_epoch_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..cdd3e43a1398d0ffdcc867fccb6cc35505595d26 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d85d042459182e9510e5c1d806fc7bec25510d76af846d87b6c6612e8d55c4d +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_26.pt b/alakazam/ckpts/policy_weights/policy_epoch_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..ccc18af63284b2de688d55b0cc1f87e029a280f8 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:134397c7107ce1278bf2021e571aa01340bb59270c17b7f4b052e79aee903e88 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_28.pt b/alakazam/ckpts/policy_weights/policy_epoch_28.pt new file mode 100755 index 0000000000000000000000000000000000000000..8c48e56a4b009dc1d70f13df8223bcfc12fad18f --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864d54aeeb99b5a78c9c2b447a4d20427c1f89ae0bff57476fe7ec14e9cc331d +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_30.pt b/alakazam/ckpts/policy_weights/policy_epoch_30.pt new file mode 100755 index 0000000000000000000000000000000000000000..dc4678c3131be02b264168c8d4843f119d56f91a --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0549977f771affff1b8509e44f24bf48990fea824ae4d287090442a06634a637 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_32.pt b/alakazam/ckpts/policy_weights/policy_epoch_32.pt new file mode 100755 index 0000000000000000000000000000000000000000..d67ff08f13f0ade509f80035083a7c2e77878e8b --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fd9a00c67e19b3aceff2248a6e5ba12aba59d56db9731ee40b6d03f5976a84 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_34.pt b/alakazam/ckpts/policy_weights/policy_epoch_34.pt new file mode 100755 index 0000000000000000000000000000000000000000..482da30d51e97b2db952abdb62792f40ef3f3fdd --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e3edcc5115e7e0911cf63171a0daa7f103ef5efb1235b19e09d8cc403921e1 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_36.pt b/alakazam/ckpts/policy_weights/policy_epoch_36.pt new file mode 100755 index 0000000000000000000000000000000000000000..544f69fff7c1b40353c980dd01b26c5f1fc19782 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e79bf875d6ed5c0303504509f35d451c95bd1155f38b7dbe541c96dc8b4227 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_38.pt b/alakazam/ckpts/policy_weights/policy_epoch_38.pt new file mode 100755 index 0000000000000000000000000000000000000000..32cca650f7afb80e9d409472cb8e4d7fb17a6195 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7c199ee69f72250c88d4d5ff69efd2931bbfad19fd3304bf7669cccbe0060b +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_4.pt b/alakazam/ckpts/policy_weights/policy_epoch_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..3f3260378f9e212773beb752a6bca1da9608316a --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3af3e365fdc6db33abd761b7c192d84f4dad32adf0ab7979229c799d64a0fbce +size 233772873 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_40.pt b/alakazam/ckpts/policy_weights/policy_epoch_40.pt new file mode 100755 index 0000000000000000000000000000000000000000..97c17107b0718930317a5f8dc35b5ae1eec73a7e --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d14a7aecd49e3a201d9354f17c7d0b3d5161b8d1546168b31d878a0818b7e82 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_42.pt b/alakazam/ckpts/policy_weights/policy_epoch_42.pt new file mode 100755 index 0000000000000000000000000000000000000000..387e80fe9316d8f25a56bc48992d6566fe6f58fd --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_42.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:468d24a4ae25a6d5a1aa58e9234403259c8d4e544727da5f1f7c3083f8c6af66 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_44.pt b/alakazam/ckpts/policy_weights/policy_epoch_44.pt new file mode 100755 index 0000000000000000000000000000000000000000..f391934d8512fad663283193b3c6a6289ee3d2ae --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_44.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5a20cd3df727a11681e4a554ce8adc9e6cf5f81d38f0a59a3e661a987ec7f2 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_46.pt b/alakazam/ckpts/policy_weights/policy_epoch_46.pt new file mode 100755 index 0000000000000000000000000000000000000000..8bea842695a5b087237cebe75f6521c0d0f24528 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_46.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc4ae28f50214281a494bebce96b85d3b58b20b07b7d742e9bb72f3f4f983c50 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_48.pt b/alakazam/ckpts/policy_weights/policy_epoch_48.pt new file mode 100755 index 0000000000000000000000000000000000000000..4f1f42312454800f92eb9bc968ca2724c45ab48c --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_48.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d8f9eff4f41fc552c5e154fa0659bcf6b4038706f166d34d35370be614df3cd +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_50.pt b/alakazam/ckpts/policy_weights/policy_epoch_50.pt new file mode 100755 index 0000000000000000000000000000000000000000..175fee701df731ec7c3ab87a7373b2b404b79878 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_50.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af3a6fe709abf8c057c43c8e09672c9614a29808ef7cdf3277ed98a2502cd933 +size 233773103 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_6.pt b/alakazam/ckpts/policy_weights/policy_epoch_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..83c467d590c0fc3db3bb71475558245196bc6dac --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc117703af7788d33cb011bbffd621ac26467298020e39262e22511dfba9751 +size 233772873 diff --git a/alakazam/ckpts/policy_weights/policy_epoch_8.pt b/alakazam/ckpts/policy_weights/policy_epoch_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..98cf07c617269ddb562432b4d037ef6f24c4b6b5 --- /dev/null +++ b/alakazam/ckpts/policy_weights/policy_epoch_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466bb352d16d9191fb32271e4504f43ba8d9aa7115c279006818984f7f1533e0 +size 233772873 diff --git a/alakazam2/ckpts/config.txt b/alakazam2/ckpts/config.txt new file mode 100755 index 0000000000000000000000000000000000000000..7420865f963dccd1cfd5249d2380c846c6fc638c --- /dev/null +++ b/alakazam2/ckpts/config.txt @@ -0,0 +1,135 @@ +import amago.agent +import amago.experiment as amago2 +import amago.nets.actor_critic as amago3 +import amago.nets.traj_encoders as amago4 +import amago.nets.transformer as amago5 + +# Parameters for Agent: +# ============================================================================== +# None. + +# Parameters for Discrete: +# ============================================================================== +Discrete.clip_prob_high = 0.99 +Discrete.clip_prob_low = 0.001 + +# Parameters for Experiment: +# ============================================================================== +Experiment.always_load_latest = False +Experiment.always_save_latest = True +Experiment.has_dset_edit_rights = True +Experiment.save_trajs_as = 'npz' +Experiment.stagger_traj_file_lengths = True +Experiment.traj_save_len = 10000000000.0 +Experiment.wandb_group_name = None + +# Parameters for FlashAttention: +# ============================================================================== +FlashAttention.window_size = (96, 0) + +# Parameters for MetamonAMAGOExperiment: +# ============================================================================== +MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent +MetamonAMAGOExperiment.critic_loss_weight = 13.5 +MetamonAMAGOExperiment.grad_clip = 1.5 +MetamonAMAGOExperiment.l2_coeff = 0.0001 +MetamonAMAGOExperiment.learning_rate = 0.000125 +MetamonAMAGOExperiment.lr_warmup_steps = 1500 +MetamonAMAGOExperiment.max_seq_len = 200 +MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder + +# Parameters for MetamonMaskedResidualActor: +# ============================================================================== +MetamonMaskedResidualActor.activation = 'leaky_relu' +MetamonMaskedResidualActor.continuous_dist_type = None +MetamonMaskedResidualActor.dropout_p = 0.0 +MetamonMaskedResidualActor.feature_dim = 400 +MetamonMaskedResidualActor.mask_illegal_actions = True +MetamonMaskedResidualActor.normalization = 'layer' +MetamonMaskedResidualActor.residual_blocks = 2 +MetamonMaskedResidualActor.residual_ff_dim = 512 + +# Parameters for MetamonPerceiverTstepEncoder: +# ============================================================================== +MetamonPerceiverTstepEncoder.d_model = 108 +MetamonPerceiverTstepEncoder.dropout = 0.05 +MetamonPerceiverTstepEncoder.extra_emb_dim = 18 +MetamonPerceiverTstepEncoder.latent_tokens = 8 +MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128 +MetamonPerceiverTstepEncoder.n_heads = 6 +MetamonPerceiverTstepEncoder.n_layers = 5 +MetamonPerceiverTstepEncoder.numerical_tokens = 6 +MetamonPerceiverTstepEncoder.token_mask_aug = False + +# Parameters for Multigammas: +# ============================================================================== +Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] +Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] + +# Parameters for MultiTaskAgent: +# ============================================================================== +MultiTaskAgent.actor_type = @MetamonMaskedResidualActor +MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot +MultiTaskAgent.fake_filter = False +MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter +MultiTaskAgent.gamma = 0.999 +MultiTaskAgent.num_actions_for_value_in_actor_loss = 4 +MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 +MultiTaskAgent.num_critics = 6 +MultiTaskAgent.num_critics_td = 2 +MultiTaskAgent.offline_coeff = 1.0 +MultiTaskAgent.online_coeff = 0.1 +MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] +MultiTaskAgent.popart = True +MultiTaskAgent.reward_multiplier = 10.0 +MultiTaskAgent.tau = 0.008 +MultiTaskAgent.use_multigamma = True +MultiTaskAgent.use_target_actor = True + +# Parameters for NCriticsTwoHot: +# ============================================================================== +NCriticsTwoHot.activation = 'leaky_relu' +NCriticsTwoHot.d_hidden = 512 +NCriticsTwoHot.dropout_p = 0.0 +NCriticsTwoHot.max_return = 2100 +NCriticsTwoHot.min_return = -100 +NCriticsTwoHot.n_layers = 2 +NCriticsTwoHot.output_bins = 96 +NCriticsTwoHot.use_symlog = False + +# Parameters for PerceiverTurnEmbedding: +# ============================================================================== +# None. + +# Parameters for PopArtLayer: +# ============================================================================== +PopArtLayer.beta = 0.0005 +PopArtLayer.init_nu = 100.0 + +# Parameters for ResidualActor: +# ============================================================================== +# None. + +# Parameters for SigmaReparam: +# ============================================================================== +SigmaReparam.bias = True +SigmaReparam.fast_init = False + +# Parameters for TformerTrajEncoder: +# ============================================================================== +TformerTrajEncoder.activation = 'leaky_relu' +TformerTrajEncoder.attention_type = @transformer.FlashAttention +TformerTrajEncoder.d_ff = 3072 +TformerTrajEncoder.d_model = 768 +TformerTrajEncoder.dropout_attn = 0.0 +TformerTrajEncoder.dropout_emb = 0.05 +TformerTrajEncoder.dropout_ff = 0.05 +TformerTrajEncoder.dropout_qkv = 0.0 +TformerTrajEncoder.head_scaling = True +TformerTrajEncoder.n_heads = 12 +TformerTrajEncoder.n_layers = 6 +TformerTrajEncoder.norm = 'layer' +TformerTrajEncoder.normformer_norms = True +TformerTrajEncoder.pos_emb = 'fixed' +TformerTrajEncoder.sigma_reparam = True diff --git a/alakazam2/ckpts/latest/policy.pt b/alakazam2/ckpts/latest/policy.pt new file mode 100755 index 0000000000000000000000000000000000000000..8d5f646b9384eab781af009a17d0a083133b7428 --- /dev/null +++ b/alakazam2/ckpts/latest/policy.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5749e207e2844d32798abe1e50b4a8a9e0ceede488c78fb3d5eb7518af8f5bbd +size 242350603 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_0.pt b/alakazam2/ckpts/policy_weights/policy_epoch_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..7fa520bf8d79d3a7f18d45f9158ac637c084a3f2 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af96830f111687d973c2fa35f169735cfdb9eb3e24bdfc63ff3ad37e4e2720c +size 242356043 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_10.pt b/alakazam2/ckpts/policy_weights/policy_epoch_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..2c211269bcb5d6aba6533511be39c2ca1ca1ee99 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d88f19f754cbce4fed328a67f2ad1c714ff1bcc598ff8891ee97e0758cc41d2 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_12.pt b/alakazam2/ckpts/policy_weights/policy_epoch_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..c60c87a3abc2a1634d1af880ec9f3fddb503bdbb --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b24fe2be2ceea834e9744d1926f37d66653aaa00e9435735e16a4fb3ce625d8 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_14.pt b/alakazam2/ckpts/policy_weights/policy_epoch_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..dca22dc5dda427b174652d5cf2de1b634ea9e724 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7e0f6bc0793d4354e5a4602e94d4a2af091349029b935545148036cef32419 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_16.pt b/alakazam2/ckpts/policy_weights/policy_epoch_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..5b5c4258939bbaf4c1e77ed2504f12f73e02afe6 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79780c94dfad447bf7ece49ff9bbe5d39648dd79a50783d6681474391bc3b3e8 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_18.pt b/alakazam2/ckpts/policy_weights/policy_epoch_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..a182e3d18347b44ccaf931c197a4128cd19860e5 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c8f76c362ba6c8c71259f9b7e0a0e8985dae57ced4613e30fb4f19061d4c65 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_2.pt b/alakazam2/ckpts/policy_weights/policy_epoch_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..5f2d76519101dfc7b6020ba568bf82e9c6746899 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907a87152482eb3301d29a1b47f0aac6a6bc10512e78831629f4e6f86600ae50 +size 242356043 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_20.pt b/alakazam2/ckpts/policy_weights/policy_epoch_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..31ca13288b4e6e929c7518a25671f3dc712c4a61 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b56aeb44738de6ffd9a8eb700033ce2bdb3d750255653aba3d35f1849721d09 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_22.pt b/alakazam2/ckpts/policy_weights/policy_epoch_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..205678a93367196c8eac77d46e79876d93167e0a --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:684c23b11feb1d3dd461686aa751801d477d326cdc02e64f9f407ca90fbb168b +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_24.pt b/alakazam2/ckpts/policy_weights/policy_epoch_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..96cd683045b8957775c5b917f5a44ddff77a0118 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666557150f2ccc31c9ae7cae90282720eb6fad874ea9c7fcbbbd913313b8112d +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_26.pt b/alakazam2/ckpts/policy_weights/policy_epoch_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..830c07fa5ca38ee2bad3c695a41d561d32dd4f0a --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91227a839e1b49c18cec8390eb2b1393d04c00198b04f471472adf1588615167 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_28.pt b/alakazam2/ckpts/policy_weights/policy_epoch_28.pt new file mode 100755 index 0000000000000000000000000000000000000000..c2a16955544ff3d8121296d814a1363b1f4f0996 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0114dd16ae4a25de2c95d64a4e18dd457908cfb9d81e8da60cd4c57b4488f813 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_30.pt b/alakazam2/ckpts/policy_weights/policy_epoch_30.pt new file mode 100755 index 0000000000000000000000000000000000000000..3891ac01aabe2c10583f199077c536f5c306e495 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471900ef9eebfee75ccd6f8f4fec1aaa2e9362731781c63fb8ea6c2ed721bda9 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_32.pt b/alakazam2/ckpts/policy_weights/policy_epoch_32.pt new file mode 100755 index 0000000000000000000000000000000000000000..51ec4abc90c4bd7741441d15452b9d09cbee3e2c --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081cf299f97c95bd2413943a1098741d565f25f449e2b030d47d40828be408c4 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_34.pt b/alakazam2/ckpts/policy_weights/policy_epoch_34.pt new file mode 100755 index 0000000000000000000000000000000000000000..adaeb5f87c592180e77618235edca76f8d43c3f1 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb4198d9774888e130de045f20b49dc00067d1ff80be099ee7510f4ddf2d534 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_36.pt b/alakazam2/ckpts/policy_weights/policy_epoch_36.pt new file mode 100755 index 0000000000000000000000000000000000000000..7cd110c07aee1711fe58d4e8f24c544428d145fa --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d7acbd7c048a3112c6d6460aea15e69f3c38a66bb52fba18b469188ea767900 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_38.pt b/alakazam2/ckpts/policy_weights/policy_epoch_38.pt new file mode 100755 index 0000000000000000000000000000000000000000..2f17b7dcb9eafdfe11a985b67ce5c8ebaacd6589 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03724e7ee53ee469aeea2469233c89eef616361067dc9646b0a6fe4dd3b2bfb7 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_4.pt b/alakazam2/ckpts/policy_weights/policy_epoch_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..ca628c00188e9b36fc0bcce39a306173112daa96 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd907066db0bc80a556ccf983c129d5b2cfbb530ed4ba35bdfb17e71f3da8313 +size 242356043 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_40.pt b/alakazam2/ckpts/policy_weights/policy_epoch_40.pt new file mode 100755 index 0000000000000000000000000000000000000000..e7679c2b93a5a83876f219985112106130086164 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deaf36d8d2f07b0f2d03539f4e248432a2f372682f38bc8ff9d37a1fe983a0d3 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_42.pt b/alakazam2/ckpts/policy_weights/policy_epoch_42.pt new file mode 100755 index 0000000000000000000000000000000000000000..ba3097da7b20bcb75a4f1084a79f8eb14a299406 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_42.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe0b1324135a72207c4dc56a46273b9545f1059e16d4f04b260cb2e3d3e1d38 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_44.pt b/alakazam2/ckpts/policy_weights/policy_epoch_44.pt new file mode 100755 index 0000000000000000000000000000000000000000..79bb13dcbe37feea6a801ce77a43c01b6f031b06 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_44.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f1aa044874414b9af74fadd97a5a5a608b1d811f995409413f654d54c803d2 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_46.pt b/alakazam2/ckpts/policy_weights/policy_epoch_46.pt new file mode 100755 index 0000000000000000000000000000000000000000..f028fa263f0904b3f9adc03190148c17cfa1ca63 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_46.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da34f42d2c93355818f2b231258e7eca8a1886bfa7adc6998291e154b46cabd +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_48.pt b/alakazam2/ckpts/policy_weights/policy_epoch_48.pt new file mode 100755 index 0000000000000000000000000000000000000000..39fb065ab91d8cca7b9332e0f9d6747f09aa27b0 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_48.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c8bc82f45ecfe4a840ac885e352a81db36cf9149a3c598d22f15ec3cc60bea0 +size 242356427 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_6.pt b/alakazam2/ckpts/policy_weights/policy_epoch_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..ad5d274babfb7b51eaaa986b743723e76c430c54 --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb8ffb48da90b8cbf70e5367b97abd740614fcd3d8e0710f175513ea0490414a +size 242356043 diff --git a/alakazam2/ckpts/policy_weights/policy_epoch_8.pt b/alakazam2/ckpts/policy_weights/policy_epoch_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..778fa5a9c4b0f56c07593084113904cf9ea742ca --- /dev/null +++ b/alakazam2/ckpts/policy_weights/policy_epoch_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b92405dd27f4533a381de89f401d939d027a549fdd0242a4063c942c67db934 +size 242356043 diff --git a/alakazam3/ckpts/config.txt b/alakazam3/ckpts/config.txt new file mode 100755 index 0000000000000000000000000000000000000000..7880806d78dcf0070e214533dfed99b20208520b --- /dev/null +++ b/alakazam3/ckpts/config.txt @@ -0,0 +1,135 @@ +import amago.agent +import amago.experiment as amago2 +import amago.nets.actor_critic as amago3 +import amago.nets.traj_encoders as amago4 +import amago.nets.transformer as amago5 + +# Parameters for Agent: +# ============================================================================== +# None. + +# Parameters for Discrete: +# ============================================================================== +Discrete.clip_prob_high = 0.99 +Discrete.clip_prob_low = 0.001 + +# Parameters for Experiment: +# ============================================================================== +Experiment.always_load_latest = False +Experiment.always_save_latest = True +Experiment.has_dset_edit_rights = True +Experiment.save_trajs_as = 'npz' +Experiment.stagger_traj_file_lengths = True +Experiment.traj_save_len = 10000000000.0 +Experiment.wandb_group_name = None + +# Parameters for FlashAttention: +# ============================================================================== +FlashAttention.window_size = (96, 0) + +# Parameters for MetamonAMAGOExperiment: +# ============================================================================== +MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent +MetamonAMAGOExperiment.critic_loss_weight = 13.5 +MetamonAMAGOExperiment.grad_clip = 1.5 +MetamonAMAGOExperiment.l2_coeff = 0.0001 +MetamonAMAGOExperiment.learning_rate = 0.000125 +MetamonAMAGOExperiment.lr_warmup_steps = 2000 +MetamonAMAGOExperiment.max_seq_len = 200 +MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder + +# Parameters for MetamonMaskedResidualActor: +# ============================================================================== +MetamonMaskedResidualActor.activation = 'leaky_relu' +MetamonMaskedResidualActor.continuous_dist_type = None +MetamonMaskedResidualActor.dropout_p = 0.0 +MetamonMaskedResidualActor.feature_dim = 400 +MetamonMaskedResidualActor.mask_illegal_actions = True +MetamonMaskedResidualActor.normalization = 'layer' +MetamonMaskedResidualActor.residual_blocks = 2 +MetamonMaskedResidualActor.residual_ff_dim = 512 + +# Parameters for MetamonPerceiverTstepEncoder: +# ============================================================================== +MetamonPerceiverTstepEncoder.d_model = 108 +MetamonPerceiverTstepEncoder.dropout = 0.05 +MetamonPerceiverTstepEncoder.extra_emb_dim = 18 +MetamonPerceiverTstepEncoder.latent_tokens = 8 +MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128 +MetamonPerceiverTstepEncoder.n_heads = 6 +MetamonPerceiverTstepEncoder.n_layers = 5 +MetamonPerceiverTstepEncoder.numerical_tokens = 6 +MetamonPerceiverTstepEncoder.token_mask_aug = False + +# Parameters for Multigammas: +# ============================================================================== +Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] +Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] + +# Parameters for MultiTaskAgent: +# ============================================================================== +MultiTaskAgent.actor_type = @MetamonMaskedResidualActor +MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot +MultiTaskAgent.fake_filter = False +MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter +MultiTaskAgent.gamma = 0.999 +MultiTaskAgent.num_actions_for_value_in_actor_loss = 4 +MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 +MultiTaskAgent.num_critics = 6 +MultiTaskAgent.num_critics_td = 2 +MultiTaskAgent.offline_coeff = 1.0 +MultiTaskAgent.online_coeff = 0.2 +MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] +MultiTaskAgent.popart = True +MultiTaskAgent.reward_multiplier = 10.0 +MultiTaskAgent.tau = 0.008 +MultiTaskAgent.use_multigamma = True +MultiTaskAgent.use_target_actor = True + +# Parameters for NCriticsTwoHot: +# ============================================================================== +NCriticsTwoHot.activation = 'leaky_relu' +NCriticsTwoHot.d_hidden = 512 +NCriticsTwoHot.dropout_p = 0.0 +NCriticsTwoHot.max_return = 2100 +NCriticsTwoHot.min_return = -100 +NCriticsTwoHot.n_layers = 2 +NCriticsTwoHot.output_bins = 96 +NCriticsTwoHot.use_symlog = False + +# Parameters for PerceiverTurnEmbedding: +# ============================================================================== +# None. + +# Parameters for PopArtLayer: +# ============================================================================== +PopArtLayer.beta = 0.0005 +PopArtLayer.init_nu = 100.0 + +# Parameters for ResidualActor: +# ============================================================================== +# None. + +# Parameters for SigmaReparam: +# ============================================================================== +SigmaReparam.bias = True +SigmaReparam.fast_init = False + +# Parameters for TformerTrajEncoder: +# ============================================================================== +TformerTrajEncoder.activation = 'leaky_relu' +TformerTrajEncoder.attention_type = @transformer.FlashAttention +TformerTrajEncoder.d_ff = 3072 +TformerTrajEncoder.d_model = 768 +TformerTrajEncoder.dropout_attn = 0.0 +TformerTrajEncoder.dropout_emb = 0.05 +TformerTrajEncoder.dropout_ff = 0.05 +TformerTrajEncoder.dropout_qkv = 0.0 +TformerTrajEncoder.head_scaling = True +TformerTrajEncoder.n_heads = 12 +TformerTrajEncoder.n_layers = 6 +TformerTrajEncoder.norm = 'layer' +TformerTrajEncoder.normformer_norms = True +TformerTrajEncoder.pos_emb = 'fixed' +TformerTrajEncoder.sigma_reparam = True diff --git a/alakazam3/ckpts/latest/policy.pt b/alakazam3/ckpts/latest/policy.pt new file mode 100755 index 0000000000000000000000000000000000000000..95bf97ab9c285b26496e490e4a300800342f44bc --- /dev/null +++ b/alakazam3/ckpts/latest/policy.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c8350d37ca298fdeb6da01af1139cda58f76fe15de13ccad3c7ce3e8db411e +size 242350603 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_0.pt b/alakazam3/ckpts/policy_weights/policy_epoch_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..b96b1fe6607f1313f644a64e620a40dffd363c6d --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b35d802e2eb7b93e9f14e948cff889185aa1d3c57e7690e1e7224f716e466aab +size 242356043 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_10.pt b/alakazam3/ckpts/policy_weights/policy_epoch_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..c7328396f86a8aa27184b3881696190931c27778 --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338ad5ee014194bff81d88c1fea6f2ada901d1346475ae99307fa991ec58f299 +size 242356427 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_12.pt b/alakazam3/ckpts/policy_weights/policy_epoch_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..150b6615de39b2444ad5d9930aca20abfd2900fc --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663c1a71b7ef93c8f9c1c8ef3590ada41d79901960e69ce08ced90de612580c8 +size 242356427 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_14.pt b/alakazam3/ckpts/policy_weights/policy_epoch_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..ef39dacf6430e2a575def071da4dfbf722ef4757 --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb38fc0259716ca96108deadd949a8c0b084c51d9aecb4aa2a843d5928c679a +size 242356427 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_16.pt b/alakazam3/ckpts/policy_weights/policy_epoch_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..5c431999934846748e8a8d983d5c8605b8b6e532 --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43dfa1ca92e5d5c903fde654ff6dac6d54df8596b946bec141abcde3b0126a76 +size 242356427 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_18.pt b/alakazam3/ckpts/policy_weights/policy_epoch_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..2f212f0913f2e1755d84b970b437e845c425dec8 --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ce128d73d8a7b9a8a7c0403dfb3c7cbf859d69395f46ed0d5d430ba16183e2 +size 242356427 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_2.pt b/alakazam3/ckpts/policy_weights/policy_epoch_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a3cfda8d596e060a309c985b6014d4a6e453d84 --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3686a5fc89a3d23c5b3a4da0ec0accd088318708c3b0ab2236b42a1e790d899 +size 242356043 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_20.pt b/alakazam3/ckpts/policy_weights/policy_epoch_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..217b2f9b7a4b9408827da38e7ce3c17489555279 --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6531a3da103f12611a9e582376fd1b3abedd846f6b384473b63c3a79c03354a7 +size 242356427 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_22.pt b/alakazam3/ckpts/policy_weights/policy_epoch_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..73f297b7efbedfc521e3783ed50d4c99259d1b5d --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f896a0f123c0746ac24780b4b9db19234bc353641c83d304bf1fcec75589ebc +size 242356427 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_4.pt b/alakazam3/ckpts/policy_weights/policy_epoch_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..7089e789c0ca71340a74b1a096f651e6f2b7bff4 --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5d9a6a446bb9d4009160473a206e2ccfd618f591bdf4f3b8a14a638a1f5e220 +size 242356043 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_6.pt b/alakazam3/ckpts/policy_weights/policy_epoch_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..04a3d1ba2322ddf6fb4e4e757cc47a551c483ca4 --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db01e5836b0100954478b7e0693aace13aa1be934f85e732b37a6ebe23caead1 +size 242356043 diff --git a/alakazam3/ckpts/policy_weights/policy_epoch_8.pt b/alakazam3/ckpts/policy_weights/policy_epoch_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..875166057b1d0877ed112f76efb6034367759ea0 --- /dev/null +++ b/alakazam3/ckpts/policy_weights/policy_epoch_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955ada9176ca90e764049f789e47c51ba8c3abfb783761d8a03feb9c3844bd21 +size 242356043 diff --git a/alakazam4/ckpts/config.txt b/alakazam4/ckpts/config.txt new file mode 100755 index 0000000000000000000000000000000000000000..7664a71d55595a33192386637ee1c3b1682b53a3 --- /dev/null +++ b/alakazam4/ckpts/config.txt @@ -0,0 +1,135 @@ +import amago.agent +import amago.experiment as amago2 +import amago.nets.actor_critic as amago3 +import amago.nets.traj_encoders as amago4 +import amago.nets.transformer as amago5 + +# Parameters for Agent: +# ============================================================================== +# None. + +# Parameters for Discrete: +# ============================================================================== +Discrete.clip_prob_high = 0.99 +Discrete.clip_prob_low = 0.001 + +# Parameters for Experiment: +# ============================================================================== +Experiment.always_load_latest = False +Experiment.always_save_latest = True +Experiment.has_dset_edit_rights = True +Experiment.save_trajs_as = 'npz' +Experiment.stagger_traj_file_lengths = True +Experiment.traj_save_len = 10000000000.0 +Experiment.wandb_group_name = None + +# Parameters for FlashAttention: +# ============================================================================== +FlashAttention.window_size = (96, 0) + +# Parameters for MetamonAMAGOExperiment: +# ============================================================================== +MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent +MetamonAMAGOExperiment.critic_loss_weight = 13.5 +MetamonAMAGOExperiment.grad_clip = 1.5 +MetamonAMAGOExperiment.l2_coeff = 0.0001 +MetamonAMAGOExperiment.learning_rate = 0.000125 +MetamonAMAGOExperiment.lr_warmup_steps = 2000 +MetamonAMAGOExperiment.max_seq_len = 128 +MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder + +# Parameters for MetamonMaskedResidualActor: +# ============================================================================== +MetamonMaskedResidualActor.activation = 'leaky_relu' +MetamonMaskedResidualActor.continuous_dist_type = None +MetamonMaskedResidualActor.dropout_p = 0.0 +MetamonMaskedResidualActor.feature_dim = 450 +MetamonMaskedResidualActor.mask_illegal_actions = True +MetamonMaskedResidualActor.normalization = 'layer' +MetamonMaskedResidualActor.residual_blocks = 2 +MetamonMaskedResidualActor.residual_ff_dim = 512 + +# Parameters for MetamonPerceiverTstepEncoder: +# ============================================================================== +MetamonPerceiverTstepEncoder.d_model = 108 +MetamonPerceiverTstepEncoder.dropout = 0.05 +MetamonPerceiverTstepEncoder.extra_emb_dim = 18 +MetamonPerceiverTstepEncoder.latent_tokens = 8 +MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128 +MetamonPerceiverTstepEncoder.n_heads = 6 +MetamonPerceiverTstepEncoder.n_layers = 8 +MetamonPerceiverTstepEncoder.numerical_tokens = 6 +MetamonPerceiverTstepEncoder.token_mask_aug = False + +# Parameters for Multigammas: +# ============================================================================== +Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] +Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] + +# Parameters for MultiTaskAgent: +# ============================================================================== +MultiTaskAgent.actor_type = @MetamonMaskedResidualActor +MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot +MultiTaskAgent.fake_filter = False +MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter +MultiTaskAgent.gamma = 0.999 +MultiTaskAgent.num_actions_for_value_in_actor_loss = 4 +MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 +MultiTaskAgent.num_critics = 6 +MultiTaskAgent.num_critics_td = 2 +MultiTaskAgent.offline_coeff = 1.0 +MultiTaskAgent.online_coeff = 0.2 +MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] +MultiTaskAgent.popart = True +MultiTaskAgent.reward_multiplier = 10.0 +MultiTaskAgent.tau = 0.008 +MultiTaskAgent.use_multigamma = True +MultiTaskAgent.use_target_actor = True + +# Parameters for NCriticsTwoHot: +# ============================================================================== +NCriticsTwoHot.activation = 'leaky_relu' +NCriticsTwoHot.d_hidden = 512 +NCriticsTwoHot.dropout_p = 0.0 +NCriticsTwoHot.max_return = 2100 +NCriticsTwoHot.min_return = -100 +NCriticsTwoHot.n_layers = 2 +NCriticsTwoHot.output_bins = 96 +NCriticsTwoHot.use_symlog = False + +# Parameters for PerceiverTurnEmbedding: +# ============================================================================== +# None. + +# Parameters for PopArtLayer: +# ============================================================================== +PopArtLayer.beta = 0.0005 +PopArtLayer.init_nu = 100.0 + +# Parameters for ResidualActor: +# ============================================================================== +# None. + +# Parameters for SigmaReparam: +# ============================================================================== +SigmaReparam.bias = True +SigmaReparam.fast_init = False + +# Parameters for TformerTrajEncoder: +# ============================================================================== +TformerTrajEncoder.activation = 'leaky_relu' +TformerTrajEncoder.attention_type = @transformer.FlashAttention +TformerTrajEncoder.d_ff = 3072 +TformerTrajEncoder.d_model = 768 +TformerTrajEncoder.dropout_attn = 0.0 +TformerTrajEncoder.dropout_emb = 0.05 +TformerTrajEncoder.dropout_ff = 0.05 +TformerTrajEncoder.dropout_qkv = 0.0 +TformerTrajEncoder.head_scaling = True +TformerTrajEncoder.n_heads = 12 +TformerTrajEncoder.n_layers = 8 +TformerTrajEncoder.norm = 'layer' +TformerTrajEncoder.normformer_norms = True +TformerTrajEncoder.pos_emb = 'fixed' +TformerTrajEncoder.sigma_reparam = True diff --git a/alakazam4/ckpts/latest/policy.pt b/alakazam4/ckpts/latest/policy.pt new file mode 100755 index 0000000000000000000000000000000000000000..284c5d433ed8600f3887da42eb4fee23e22d1b95 --- /dev/null +++ b/alakazam4/ckpts/latest/policy.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5601c8ee0d6f72f92cf23cab55de04ac0864ead2fda4677e0f2a3207ae24a28 +size 304148153 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_0.pt b/alakazam4/ckpts/policy_weights/policy_epoch_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..6712fb5f8660003deae20b57c7ef46c1a1eb5791 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78005d9fdeff0f09b0d8712637f09727fecf6b2a4a960efcbd8e8b3e7260c813 +size 304154601 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_10.pt b/alakazam4/ckpts/policy_weights/policy_epoch_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..e324cc848c25640cde8a3da05899b0a7a35d9575 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ebed55e52054402b510aaa4c5372d0789c24c4ab11ac41e721bbd1e5dc5d35 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_12.pt b/alakazam4/ckpts/policy_weights/policy_epoch_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..dcf1b02be6c661d2efd0c6cb74d8d154d18faa1e --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fd533a9c3e931eb332d9b2c94930d7b08749137065534832316981a75d17a1f +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_14.pt b/alakazam4/ckpts/policy_weights/policy_epoch_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..330bff4c30e7fd74969d78467712aa64a0ae416b --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6421b358ee8d2f824323422181a03a150c1402b34dfbc196f850954c1509bb +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_16.pt b/alakazam4/ckpts/policy_weights/policy_epoch_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..80fd2c27cd030841da5f8a71a6783dea3c25d141 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e3c503ae61d380eaf438e4fb6fb860f58d6983b92d04af0a62534e108140fa +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_18.pt b/alakazam4/ckpts/policy_weights/policy_epoch_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..d3c5759f9a63d55c0124fc2ce713e506530bf9ef --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67f39fd9e538fa76af1c33ae971d30bdbed3e101eabca73d9ed8b0266cd1924 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_2.pt b/alakazam4/ckpts/policy_weights/policy_epoch_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..0725aa1fd6aa99af97f9ef5a2c49f96fdef0bb8a --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c20fef8bb762133687107334e8b105fab9a80e63a7aea66e3930936cb1252503 +size 304154601 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_20.pt b/alakazam4/ckpts/policy_weights/policy_epoch_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..4571dd2557bba538a07397ecf96e6160c28d6fb7 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe48b436c542b1e18d9766259873af2d3ac98f67983f6df16f0341846bac4474 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_22.pt b/alakazam4/ckpts/policy_weights/policy_epoch_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..d9ad304a0c76230933eb892918bbba9983a72be5 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b78191528682688dcc570a5c9232c4effec516b2e48fc75cb86fe76c6cbe2bb +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_24.pt b/alakazam4/ckpts/policy_weights/policy_epoch_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..2f1256ad71bdba509ead3eb0b12928bf12e5d57d --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e24b63fe3c2524b1510e051805980992f8f847bd69e82e111b12c6794a3abdb +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_26.pt b/alakazam4/ckpts/policy_weights/policy_epoch_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..08fd7ad6d7756b049e167aeefffece15c477c731 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb5f01964f0a22b99fb751aa26594182733f7224dffd59ea262d7fae306ab48 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_28.pt b/alakazam4/ckpts/policy_weights/policy_epoch_28.pt new file mode 100755 index 0000000000000000000000000000000000000000..dc207ff6d9f6f6e5cfc96f518cc42ce66727c431 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f9b20c3d91c7d935a3d4abe02d98633494292b2f4294446c989cbebcce6a400 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_30.pt b/alakazam4/ckpts/policy_weights/policy_epoch_30.pt new file mode 100755 index 0000000000000000000000000000000000000000..ab642520592e99aed831ac93dbfa07198f8854a6 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad02e39ac72fc50d97e4f2a368640a37f5b394b130daa9e35ac2bd523dcca81 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_32.pt b/alakazam4/ckpts/policy_weights/policy_epoch_32.pt new file mode 100755 index 0000000000000000000000000000000000000000..d426b111f7312786e7a9de6148533a7c8eeb36b7 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:710e89ed49fbe69c2f1f85643c23d5bf0a3691e3d4d52c974d022b36f108d3e4 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_34.pt b/alakazam4/ckpts/policy_weights/policy_epoch_34.pt new file mode 100755 index 0000000000000000000000000000000000000000..1c2ac2fce314185f28bb5ff747e1ab1a7d526a42 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d33078a31edd1dab9298ed97bc1e9b07bcecb2c3b80adb5f8262211fafd0afc +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_36.pt b/alakazam4/ckpts/policy_weights/policy_epoch_36.pt new file mode 100755 index 0000000000000000000000000000000000000000..505171e491079f186e1e325a670bf2b95eaa7d75 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d18bf41e058c2e0458d3d54e9f39f3102fd073412009dd71b3024602a9b4da1 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_38.pt b/alakazam4/ckpts/policy_weights/policy_epoch_38.pt new file mode 100755 index 0000000000000000000000000000000000000000..600de2bc283e3b1c5e1ec3a8299ee618cd54cfa5 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7e29be87b95d42ed73c0467ba19be54a7284f4f9da76d629b21c6b9e655f2a4 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_4.pt b/alakazam4/ckpts/policy_weights/policy_epoch_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..5c65eb0b53b30240432802f4bca291396fc5e081 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc051f0c45cd5b80b963ed56e9386f53c701bf443c524668a6d76686ba77fd0f +size 304154601 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_40.pt b/alakazam4/ckpts/policy_weights/policy_epoch_40.pt new file mode 100755 index 0000000000000000000000000000000000000000..1e4f40badac3ed0300432024e625174c80558fd2 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30936b45a508545acf3f9775217f7824d2e85102d5930e7b4316c49c105f0cc0 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_42.pt b/alakazam4/ckpts/policy_weights/policy_epoch_42.pt new file mode 100755 index 0000000000000000000000000000000000000000..e9b1029db8b7c4b0e97c25457b6b26de984c7dc7 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_42.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd6387452732fd6d7ee8d8432d5e31621da4b4d1f5a1999bad01d9486b99b51b +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_44.pt b/alakazam4/ckpts/policy_weights/policy_epoch_44.pt new file mode 100755 index 0000000000000000000000000000000000000000..bb16c06b1cd329e5ad55b3335c8e6ec432acec66 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_44.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1348d652f9813e228acf71f6bd4cb5cf341b0648d25e942141f5667202c3ba5 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_46.pt b/alakazam4/ckpts/policy_weights/policy_epoch_46.pt new file mode 100755 index 0000000000000000000000000000000000000000..7b0f9481a6fdd7e92a084962bb636f6ffbabd952 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_46.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae885fb42bc9b0a5d1707a100ec1f65b1af57e05e8c1bbc3dcd2131d2daf939 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_48.pt b/alakazam4/ckpts/policy_weights/policy_epoch_48.pt new file mode 100755 index 0000000000000000000000000000000000000000..035c8dd4fe1ad9743caf0319c5763a151443b274 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_48.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbae93d41495eab651e6972e16d957a9e5c92e941ebb8354504a287389b921fd +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_50.pt b/alakazam4/ckpts/policy_weights/policy_epoch_50.pt new file mode 100755 index 0000000000000000000000000000000000000000..6226f1723f7e82e84d94a365ab6f17c6b27982f4 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_50.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f266999409eb4c8cb9bf18a8bbd873585a3dca0b275e95017ab63d7fb92eab2 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_52.pt b/alakazam4/ckpts/policy_weights/policy_epoch_52.pt new file mode 100755 index 0000000000000000000000000000000000000000..1ce46c40da3eecef69039a02570518f1c255affc --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_52.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ab6b1243c8004b617dce6be97276d7f8a4a02c910c9ae33b36e5e43bef4302 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_54.pt b/alakazam4/ckpts/policy_weights/policy_epoch_54.pt new file mode 100755 index 0000000000000000000000000000000000000000..a0d2b51696afafcab81e35b4ebef80183a4d8144 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_54.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a958d42d59ca1f96feb41cb3aad60b49233ce93260d3aa9a9bf70b5776e6355 +size 304155119 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_6.pt b/alakazam4/ckpts/policy_weights/policy_epoch_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..b06437dd7960f5684a8cdc49c63df388acdfb8f3 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dee52fe5ca609258287bd33723ce6e7ca19dc0d0791499ffb2b0c2c635a5064 +size 304154601 diff --git a/alakazam4/ckpts/policy_weights/policy_epoch_8.pt b/alakazam4/ckpts/policy_weights/policy_epoch_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..a01d11d0b914672fa1b7a9ad8ad314ca97a2c440 --- /dev/null +++ b/alakazam4/ckpts/policy_weights/policy_epoch_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81030d00db190fc7a7b934cbd18758f34c2ae2feac8a7fffec62e5f1857fb344 +size 304154601 diff --git a/alakazam_patched/ckpts/config.txt b/alakazam_patched/ckpts/config.txt new file mode 100755 index 0000000000000000000000000000000000000000..7664a71d55595a33192386637ee1c3b1682b53a3 --- /dev/null +++ b/alakazam_patched/ckpts/config.txt @@ -0,0 +1,135 @@ +import amago.agent +import amago.experiment as amago2 +import amago.nets.actor_critic as amago3 +import amago.nets.traj_encoders as amago4 +import amago.nets.transformer as amago5 + +# Parameters for Agent: +# ============================================================================== +# None. + +# Parameters for Discrete: +# ============================================================================== +Discrete.clip_prob_high = 0.99 +Discrete.clip_prob_low = 0.001 + +# Parameters for Experiment: +# ============================================================================== +Experiment.always_load_latest = False +Experiment.always_save_latest = True +Experiment.has_dset_edit_rights = True +Experiment.save_trajs_as = 'npz' +Experiment.stagger_traj_file_lengths = True +Experiment.traj_save_len = 10000000000.0 +Experiment.wandb_group_name = None + +# Parameters for FlashAttention: +# ============================================================================== +FlashAttention.window_size = (96, 0) + +# Parameters for MetamonAMAGOExperiment: +# ============================================================================== +MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent +MetamonAMAGOExperiment.critic_loss_weight = 13.5 +MetamonAMAGOExperiment.grad_clip = 1.5 +MetamonAMAGOExperiment.l2_coeff = 0.0001 +MetamonAMAGOExperiment.learning_rate = 0.000125 +MetamonAMAGOExperiment.lr_warmup_steps = 2000 +MetamonAMAGOExperiment.max_seq_len = 128 +MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder + +# Parameters for MetamonMaskedResidualActor: +# ============================================================================== +MetamonMaskedResidualActor.activation = 'leaky_relu' +MetamonMaskedResidualActor.continuous_dist_type = None +MetamonMaskedResidualActor.dropout_p = 0.0 +MetamonMaskedResidualActor.feature_dim = 450 +MetamonMaskedResidualActor.mask_illegal_actions = True +MetamonMaskedResidualActor.normalization = 'layer' +MetamonMaskedResidualActor.residual_blocks = 2 +MetamonMaskedResidualActor.residual_ff_dim = 512 + +# Parameters for MetamonPerceiverTstepEncoder: +# ============================================================================== +MetamonPerceiverTstepEncoder.d_model = 108 +MetamonPerceiverTstepEncoder.dropout = 0.05 +MetamonPerceiverTstepEncoder.extra_emb_dim = 18 +MetamonPerceiverTstepEncoder.latent_tokens = 8 +MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128 +MetamonPerceiverTstepEncoder.n_heads = 6 +MetamonPerceiverTstepEncoder.n_layers = 8 +MetamonPerceiverTstepEncoder.numerical_tokens = 6 +MetamonPerceiverTstepEncoder.token_mask_aug = False + +# Parameters for Multigammas: +# ============================================================================== +Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] +Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] + +# Parameters for MultiTaskAgent: +# ============================================================================== +MultiTaskAgent.actor_type = @MetamonMaskedResidualActor +MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot +MultiTaskAgent.fake_filter = False +MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter +MultiTaskAgent.gamma = 0.999 +MultiTaskAgent.num_actions_for_value_in_actor_loss = 4 +MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 +MultiTaskAgent.num_critics = 6 +MultiTaskAgent.num_critics_td = 2 +MultiTaskAgent.offline_coeff = 1.0 +MultiTaskAgent.online_coeff = 0.2 +MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] +MultiTaskAgent.popart = True +MultiTaskAgent.reward_multiplier = 10.0 +MultiTaskAgent.tau = 0.008 +MultiTaskAgent.use_multigamma = True +MultiTaskAgent.use_target_actor = True + +# Parameters for NCriticsTwoHot: +# ============================================================================== +NCriticsTwoHot.activation = 'leaky_relu' +NCriticsTwoHot.d_hidden = 512 +NCriticsTwoHot.dropout_p = 0.0 +NCriticsTwoHot.max_return = 2100 +NCriticsTwoHot.min_return = -100 +NCriticsTwoHot.n_layers = 2 +NCriticsTwoHot.output_bins = 96 +NCriticsTwoHot.use_symlog = False + +# Parameters for PerceiverTurnEmbedding: +# ============================================================================== +# None. + +# Parameters for PopArtLayer: +# ============================================================================== +PopArtLayer.beta = 0.0005 +PopArtLayer.init_nu = 100.0 + +# Parameters for ResidualActor: +# ============================================================================== +# None. + +# Parameters for SigmaReparam: +# ============================================================================== +SigmaReparam.bias = True +SigmaReparam.fast_init = False + +# Parameters for TformerTrajEncoder: +# ============================================================================== +TformerTrajEncoder.activation = 'leaky_relu' +TformerTrajEncoder.attention_type = @transformer.FlashAttention +TformerTrajEncoder.d_ff = 3072 +TformerTrajEncoder.d_model = 768 +TformerTrajEncoder.dropout_attn = 0.0 +TformerTrajEncoder.dropout_emb = 0.05 +TformerTrajEncoder.dropout_ff = 0.05 +TformerTrajEncoder.dropout_qkv = 0.0 +TformerTrajEncoder.head_scaling = True +TformerTrajEncoder.n_heads = 12 +TformerTrajEncoder.n_layers = 8 +TformerTrajEncoder.norm = 'layer' +TformerTrajEncoder.normformer_norms = True +TformerTrajEncoder.pos_emb = 'fixed' +TformerTrajEncoder.sigma_reparam = True diff --git a/alakazam_patched/ckpts/latest/policy.pt b/alakazam_patched/ckpts/latest/policy.pt new file mode 100755 index 0000000000000000000000000000000000000000..e4b2feafa94abadfdf9edfc5e4292fb666d305ed --- /dev/null +++ b/alakazam_patched/ckpts/latest/policy.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:263e04fa26f1fde0df2fbe959f94b18245a3387d98918701f9f532202c4f888f +size 304148153 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_0.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..41605cbe5975d3660df6e5cd70fb1a2a7c1020e2 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe0bcfa730996a37c126e4e7bd52d57f756293ba71ed1c458f9c5ab755d6dc9 +size 304154601 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_10.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..838b2fb27b02947686f480c27271c21dd3196c1c --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a69dc023d1f1068858bfe89b7aaafdb7b609ca18aa8a26a06f9e9a72a8722b +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_12.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..336d3e09948360bb71b83b075c6f42d47b6130f8 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d82e78b0c9e622308056217288419278d1fdc94a14a0aaf8ad06e56b6e7eb3b +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_14.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..c71d7501c4aa816f128636c203b5f39f835a5261 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b258755bd773a01cfeb2fe87fd5512a2559aec9cff05220c4bfb41627cac61de +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_16.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..383cbd6d7f60da34507caacfed71e8753e78e49d --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:422c854d0c144b43a851a09b4da32f674b207ca7b8f8eeb3c5d2e59431c4ef9f +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_18.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..2c5275ae5560c5bba0de71fb1563705a5b2b5140 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bba20b8d8dd3b6429f7c532120e55a34737a5f6379ec5179de897ceed3ad3737 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_2.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..08cb5ce94501339ed342c2cd3fe779f735e9e05e --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f72fb7645fa54b8fb4ae83f94444f3da49be1220f3a8ccd051375c1ce25541d +size 304154601 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_20.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..11ec3e954421ffeed8d70d85b0c468910d2252de --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6639d34acc91cd0eddaa86028875bf6b59813e3a5faf64ea89fb85c35e16aef +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_22.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..b26d92e754bf944f1486cc84317d8cb9eb157472 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118f44a65b00d1dac61ca0cf4ae5ec9e136bced8a5a748b5aa3dfadcb2da5981 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_24.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..200f313367ac707617831725b9dfe6d3f1625193 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c16320f64608e8700d50280587b9038e26b49180d04e15e98b6a60f4d55217 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_26.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..f7580774bd3f9e317bd6a18bea3b45ade1088270 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26608bec87988179827df538c0f29d3df551a8da1942c0a9f59d66e8307b3cf +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_28.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_28.pt new file mode 100755 index 0000000000000000000000000000000000000000..912d7aa7d3ee571a1f81f49d7c006834be0b3e8d --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77320c39c973ce8413461708b59d0fdb19a84413ab06b86aef5ee44aa0e2e78c +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_30.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_30.pt new file mode 100755 index 0000000000000000000000000000000000000000..1555c03589456656d88065f24a4a4caed57a99d6 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a08830d881f36926d79d20c14bd82e16563385d9f7c0b724608851419d0d6e68 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_32.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_32.pt new file mode 100755 index 0000000000000000000000000000000000000000..e916e063f725b0a4ccfa370884bb7cf0ab71c748 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b66f40eb8da147158d506579ddd0cebc5959e38c70ee4d853e17c234e68f05 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_34.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_34.pt new file mode 100755 index 0000000000000000000000000000000000000000..e31f38c6a3aac3e2c34f5358b2d46756bb0db672 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fd288a05ae901b98e6685b6a656190f50cb8d4eff614055527482063ea3841a +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_36.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_36.pt new file mode 100755 index 0000000000000000000000000000000000000000..7ffcb86ae20c596379ecc68b117eb2d32ef2345d --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:023787563c166b5a163f3c9b13eb8fa7ecbd8089fe1a850ca412a82c1c35b1ba +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_38.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_38.pt new file mode 100755 index 0000000000000000000000000000000000000000..708f1fbeb5672bfe243ce17c2fcba21b99aee09d --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61089d65dc794f5326a1de700363c201b6ac6de6d424363635fb87af0dad8835 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_4.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..51b1757c71d57fed35c94f448dff6c8a6159cc25 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12d35584822a75d9b3886a1b9405a847e421386338c7398ea5ed40117b72d886 +size 304154601 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_40.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_40.pt new file mode 100755 index 0000000000000000000000000000000000000000..ec0cce54e58d9d03283424966eb750e6899b78d6 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e6849caa9426c47885fb2834056d244b3137d3235bd30f0ec87309ec95641d4 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_42.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_42.pt new file mode 100755 index 0000000000000000000000000000000000000000..70675eb826ad30649c0c0d226f79714600d5979d --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_42.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1cf6c7e5679788f20cebc4cabc3414c297e61a1b60b5fc875503bfd2da4e577 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_44.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_44.pt new file mode 100755 index 0000000000000000000000000000000000000000..ab04ea1ac651c6211ceb9b6d6060e4cc474daa2a --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_44.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:177d63ce5452caf3e81809c0cf211e8816be8b79ae56f5a87a7d48d5a6587556 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_46.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_46.pt new file mode 100755 index 0000000000000000000000000000000000000000..a6299f712df1473104482e4735ad973278eac2bf --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_46.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48764d855cded18a77977f198f3915991ba86e25a1f3ae5e9755b7c00a177ece +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_48.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_48.pt new file mode 100755 index 0000000000000000000000000000000000000000..c0433a8073aab27d2a2848206cc15cce4c9818b2 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_48.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a16589fc9feb4da418b9c75bc86bde419f9a83b0ee2506a4e172580d3f4cb13 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_50.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_50.pt new file mode 100755 index 0000000000000000000000000000000000000000..52413054b337ac18b73056f581791e3e1a0c83e0 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_50.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8b0db60998cb863039edaf0c31fdf0e409dcc11ac7fe36b3bd8b651e2e046e9 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_52.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_52.pt new file mode 100755 index 0000000000000000000000000000000000000000..21737dfb79a63294adcc77c85a84a45863e256dd --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_52.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d00814f1ff3acab01760d3847f488dfb42f7c955cd01d2acf335eb15157ee6 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_54.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_54.pt new file mode 100755 index 0000000000000000000000000000000000000000..92e5d1dbf67dab398eac79e8f96419b8088f1201 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_54.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7abdc23f44bdd8619e77aa637837cb7a7fe23aff6376d774e5aa8f1b13f94a37 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_56.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_56.pt new file mode 100755 index 0000000000000000000000000000000000000000..34e558dbb1a3b08e29b472de0a002a9768f706a8 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_56.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5370202fd19b768f83e5c3b8bfd3e8e19a0bf54795e077315db2ae72b387039 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_58.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_58.pt new file mode 100755 index 0000000000000000000000000000000000000000..9956316cc8d5752bdafb77743bde92f9ef3b0fab --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_58.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d000d54c33941f7426b7078aeb4bcb70dcc03df1d3fe9a1dc2c66333af0eb3aa +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_6.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..b0d1e0bf3ec01366c1ec9df47c4fc7b146369fb5 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d3bfcd674a29a7ea4e114103e4b4b7253a819b4b26ab349d91b00515dd67b2 +size 304154601 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_60.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_60.pt new file mode 100755 index 0000000000000000000000000000000000000000..5c1c7ee582baf8e4bedb2dab230c41f00f621a01 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_60.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7be99472b3077a9fd0250e2f2e49ecee0774a70c7f86016c42a016c8166ff12e +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_62.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_62.pt new file mode 100755 index 0000000000000000000000000000000000000000..72d569ac0fbd16047ee1a6613acab3f6e1b49661 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_62.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079912afcd995cae7185c6a7d0259612475012bd892bcb3360fa4089c48fc582 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_64.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_64.pt new file mode 100755 index 0000000000000000000000000000000000000000..daaf465bc6b818822770c40bb48e96aff58c6c7a --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_64.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492b05952ed05de841d05085b69e8addfad977bee4ef859b605cb48d62cbef99 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_66.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_66.pt new file mode 100755 index 0000000000000000000000000000000000000000..31b5ade90cd49f8c08a84769a8f6d65b18e96450 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_66.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc5cce9e67ca9b4c35e61955eb6d719d0db2e88d860276c078d2abf29be27a0 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_68.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_68.pt new file mode 100755 index 0000000000000000000000000000000000000000..41a13a4d36d295e272a48c91743b53c6eeb1e617 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_68.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22d585efaf8cb533f385f64c162a4e13e52806bec693f598b1aab9d80b603af6 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_70.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_70.pt new file mode 100755 index 0000000000000000000000000000000000000000..eed9b8e0ae4b8e5b6593a3ab6c98a513dc14ad20 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_70.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7761e66b4ded72420fe1c45fcaa98a93ff86db17d7464be18b05fa60546f50 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_72.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_72.pt new file mode 100755 index 0000000000000000000000000000000000000000..fe3aafd7b908ab2fb181f2bb5bd98219811c9a07 --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_72.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fafb0fedaf1aeec719918c0713fdfdb3bbd874831c544f8304433cd02da12d5 +size 304155119 diff --git a/alakazam_patched/ckpts/policy_weights/policy_epoch_8.pt b/alakazam_patched/ckpts/policy_weights/policy_epoch_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..f81081abe4b12af4cb0953d3fb5760f8c0085dae --- /dev/null +++ b/alakazam_patched/ckpts/policy_weights/policy_epoch_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47a98516df7b346118fa824f9137e6aaaa1428d5a7150af01bf606c1ea935944 +size 304154601 diff --git a/kadabra/ckpts/config.txt b/kadabra/ckpts/config.txt new file mode 100755 index 0000000000000000000000000000000000000000..1ed3a585a460fb303a93fde35aea5adaccca0cbf --- /dev/null +++ b/kadabra/ckpts/config.txt @@ -0,0 +1,130 @@ +import amago.agent +import amago.experiment as amago2 +import amago.nets.actor_critic as amago3 +import amago.nets.traj_encoders as amago4 + +# Parameters for Actor: +# ============================================================================== +# None. + +# Parameters for Agent: +# ============================================================================== +# None. + +# Parameters for Discrete: +# ============================================================================== +Discrete.clip_prob_high = 0.99 +Discrete.clip_prob_low = 0.001 + +# Parameters for Experiment: +# ============================================================================== +Experiment.always_load_latest = False +Experiment.always_save_latest = True +Experiment.has_dset_edit_rights = True +Experiment.save_trajs_as = 'npz' +Experiment.stagger_traj_file_lengths = True +Experiment.traj_save_len = 10000000000.0 +Experiment.wandb_group_name = None + +# Parameters for FlashAttention: +# ============================================================================== +FlashAttention.window_size = (32, 0) + +# Parameters for MetamonAMAGOExperiment: +# ============================================================================== +MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent +MetamonAMAGOExperiment.critic_loss_weight = 10.0 +MetamonAMAGOExperiment.grad_clip = 1.5 +MetamonAMAGOExperiment.l2_coeff = 0.0001 +MetamonAMAGOExperiment.learning_rate = 0.00015 +MetamonAMAGOExperiment.lr_warmup_steps = 1000 +MetamonAMAGOExperiment.max_seq_len = 200 +MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder + +# Parameters for MetamonMaskedActor: +# ============================================================================== +MetamonMaskedActor.activation = 'leaky_relu' +MetamonMaskedActor.continuous_dist_type = None +MetamonMaskedActor.d_hidden = 400 +MetamonMaskedActor.dropout_p = 0.0 +MetamonMaskedActor.mask_illegal_actions = True +MetamonMaskedActor.n_layers = 2 + +# Parameters for MetamonTstepEncoder: +# ============================================================================== +MetamonTstepEncoder.d_model = 100 +MetamonTstepEncoder.dropout = 0.05 +MetamonTstepEncoder.extra_emb_dim = 18 +MetamonTstepEncoder.n_heads = 5 +MetamonTstepEncoder.n_layers = 3 +MetamonTstepEncoder.numerical_tokens = 6 +MetamonTstepEncoder.scratch_tokens = 6 +MetamonTstepEncoder.token_mask_aug = False + +# Parameters for Multigammas: +# ============================================================================== +Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] +Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] + +# Parameters for MultiTaskAgent: +# ============================================================================== +MultiTaskAgent.actor_type = @MetamonMaskedActor +MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot +MultiTaskAgent.fake_filter = False +MultiTaskAgent.fbc_filter_func = @agent.binary_filter +MultiTaskAgent.gamma = 0.999 +MultiTaskAgent.num_actions_for_value_in_actor_loss = 3 +MultiTaskAgent.num_actions_for_value_in_critic_loss = 3 +MultiTaskAgent.num_critics = 6 +MultiTaskAgent.num_critics_td = 2 +MultiTaskAgent.offline_coeff = 1.0 +MultiTaskAgent.online_coeff = 0.0 +MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] +MultiTaskAgent.popart = True +MultiTaskAgent.reward_multiplier = 10.0 +MultiTaskAgent.tau = 0.004 +MultiTaskAgent.use_multigamma = True +MultiTaskAgent.use_target_actor = True + +# Parameters for NCriticsTwoHot: +# ============================================================================== +NCriticsTwoHot.activation = 'leaky_relu' +NCriticsTwoHot.d_hidden = 512 +NCriticsTwoHot.dropout_p = 0.0 +NCriticsTwoHot.max_return = 1100 +NCriticsTwoHot.min_return = -1100 +NCriticsTwoHot.n_layers = 2 +NCriticsTwoHot.output_bins = 96 +NCriticsTwoHot.use_symlog = False + +# Parameters for PopArtLayer: +# ============================================================================== +PopArtLayer.beta = 0.0005 +PopArtLayer.init_nu = 100.0 + +# Parameters for SigmaReparam: +# ============================================================================== +SigmaReparam.bias = True +SigmaReparam.fast_init = False + +# Parameters for TformerTrajEncoder: +# ============================================================================== +TformerTrajEncoder.activation = 'leaky_relu' +TformerTrajEncoder.d_ff = 3072 +TformerTrajEncoder.d_model = 768 +TformerTrajEncoder.dropout_attn = 0.0 +TformerTrajEncoder.dropout_emb = 0.05 +TformerTrajEncoder.dropout_ff = 0.05 +TformerTrajEncoder.dropout_qkv = 0.0 +TformerTrajEncoder.head_scaling = True +TformerTrajEncoder.n_heads = 8 +TformerTrajEncoder.n_layers = 6 +TformerTrajEncoder.norm = 'layer' +TformerTrajEncoder.normformer_norms = True +TformerTrajEncoder.pos_emb = 'fixed' +TformerTrajEncoder.sigma_reparam = True + +# Parameters for TransformerTurnEmbedding: +# ============================================================================== +# None. diff --git a/kadabra/ckpts/latest/policy.pt b/kadabra/ckpts/latest/policy.pt new file mode 100755 index 0000000000000000000000000000000000000000..da49803abe96098ec4601fbfbf1263f83fa1ea79 --- /dev/null +++ b/kadabra/ckpts/latest/policy.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8633fe34e8c6c7c8fb81b82c0f987fc82e41372a7f07d229aa81f951127f6c15 +size 230529797 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_0.pt b/kadabra/ckpts/policy_weights/policy_epoch_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..d0280b686e17822c62c08d545403e4515990c0a1 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d88df6e5b158a2238f982752191383dbfb0331165daa1e5624fc0b3448a306 +size 230535701 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_10.pt b/kadabra/ckpts/policy_weights/policy_epoch_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..5903202ede1a638bb2309a7889afd7a57e66cd41 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75bdaccab2c4a40dd7c3a143c582f539ef01130adc87d97166fa4c6bed03538 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_12.pt b/kadabra/ckpts/policy_weights/policy_epoch_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..f53a07146ae264ad54192c8ad44cef7994aee9c1 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeeca95c924680525014d576b575e9c8f34e3eacec67d9b659fcab143805231c +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_14.pt b/kadabra/ckpts/policy_weights/policy_epoch_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..c790488ac9cd1d5903235377ac6e7b5cf31baab9 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf223d9811f056397a3b6a97274afdd7df9e431a9da64f3bf9e8e1493482ff38 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_16.pt b/kadabra/ckpts/policy_weights/policy_epoch_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..356e9140bbcfae73979b908582c3d51688b4cb97 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a2161f6b3c4a60ef56d1badd3e9180a8a104ac9d2286740eac28200bdd6825 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_18.pt b/kadabra/ckpts/policy_weights/policy_epoch_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..f6ef3b727b01bce6d78d2cccdb248eaaeb2f3f39 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5420ac47c9430ff823bf4728c3ec9bb81695d121f4940eeeb84f1342c284fb95 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_2.pt b/kadabra/ckpts/policy_weights/policy_epoch_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..5ac67fd6b5dc18572d05fb73732f5eaf067ae4b0 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:111f08ae7fbe33b2cb436b7a266f7d0cce1481a5713ba1c2cda77c8858863523 +size 230535701 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_20.pt b/kadabra/ckpts/policy_weights/policy_epoch_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..5c44340c32f39ae598f1c6001c2cfba931144b22 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d85e49b026dbff400296a2eec1d7dfe49823d58cb2332e3e8f20c1bd7654de08 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_22.pt b/kadabra/ckpts/policy_weights/policy_epoch_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..366c003e424de3b48e62b8d55f1b5677b9904a2b --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885647dde1f70f17d6f2495b2925dac4db25763a1a2c85685bc96b44657a409d +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_24.pt b/kadabra/ckpts/policy_weights/policy_epoch_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..5936abd9f2e4833a88018e25fa14eccebef45596 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c52bf02640c06ca07f1bc4ce156fd4c18df844cbd544a70ce0421a00f46f6d80 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_26.pt b/kadabra/ckpts/policy_weights/policy_epoch_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..12ecd19d0bd38cd832ee777aba281157894151d9 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e85f4bc8d955ecabc2204b452acfcc007315cacb856977089d6f566f31465a +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_28.pt b/kadabra/ckpts/policy_weights/policy_epoch_28.pt new file mode 100755 index 0000000000000000000000000000000000000000..c16a347af09b3d771d03c1ce0c82fb8001f91d4f --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f33d904d6fe182db9ae79b9c1ec090732756a5454c7a27fc4fd9d9df30b519de +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_30.pt b/kadabra/ckpts/policy_weights/policy_epoch_30.pt new file mode 100755 index 0000000000000000000000000000000000000000..952849cbe19bd16be5410e3e2b951900e457cd2f --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c45a76eb4a2f843ca1614a45decaf1186ffb45e1b530c10ef07933fba8ec845 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_32.pt b/kadabra/ckpts/policy_weights/policy_epoch_32.pt new file mode 100755 index 0000000000000000000000000000000000000000..17df0e13974b50e6301b85af6ca51e23f4e85fbb --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c894cbb2a386737811dd633110484a1591d7a0cca632dbec426abe44d785d173 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_34.pt b/kadabra/ckpts/policy_weights/policy_epoch_34.pt new file mode 100755 index 0000000000000000000000000000000000000000..5a9c64c49276788782814f819162bac6daea5196 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:674b3207682b6b517d28dadac441727ba5bf023e4c3d273c3aace5867592559e +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_36.pt b/kadabra/ckpts/policy_weights/policy_epoch_36.pt new file mode 100755 index 0000000000000000000000000000000000000000..7c2c60f1e629df57a011a09e52af96a6faa8b6c7 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3769678f10c5cd985eea6450bdecd76e003b404bfdf87e020a5226e17897e3d1 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_38.pt b/kadabra/ckpts/policy_weights/policy_epoch_38.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a7519feaa60083be13289bbb8ae0a181e1fee9a --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb063483ca977f6c8b08df1da04093a1daa2608d6d8b9556724cd9903104614 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_4.pt b/kadabra/ckpts/policy_weights/policy_epoch_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..40d0d894e292816c328a5c9e361e22534709dd67 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:030e691185b20504520d532ccc80d11d6b305a2e37d67b47e8b9c163b98c2b45 +size 230535701 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_40.pt b/kadabra/ckpts/policy_weights/policy_epoch_40.pt new file mode 100755 index 0000000000000000000000000000000000000000..df8cb70cd1751e3b22a7b39802eea4620c425305 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a0d793cbe28034a7a69a6bbb2f1a3beca36aaa8a26fe8f102914d10d4e00db +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_42.pt b/kadabra/ckpts/policy_weights/policy_epoch_42.pt new file mode 100755 index 0000000000000000000000000000000000000000..8b6ddeb772ba8ce874886a7f1a329dd970369fcd --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_42.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b242ca62eac248bb8da0c070e478374fafa0692813dec6428db682aa2ce4ce66 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_44.pt b/kadabra/ckpts/policy_weights/policy_epoch_44.pt new file mode 100755 index 0000000000000000000000000000000000000000..d6979667ebccadf5f32afca01523b6de514c6ef8 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_44.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc708b6ff40bbe2748066148b02d3c5a59c0170e6f736e916266cbb34cd2c334 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_46.pt b/kadabra/ckpts/policy_weights/policy_epoch_46.pt new file mode 100755 index 0000000000000000000000000000000000000000..221febb35f65b4467622921069d098af16efaf52 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_46.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b070cbeb427833a3d4a30aa6cb901e314c33600c48d7829d9b383ea8902dfcb +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_48.pt b/kadabra/ckpts/policy_weights/policy_epoch_48.pt new file mode 100755 index 0000000000000000000000000000000000000000..89f08aee9136902d3005df247ef5043bc31686f3 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_48.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdcf01dfdc377272c87f04791eaeb17546a68647fe975b490578b3e8ffff5575 +size 230535959 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_6.pt b/kadabra/ckpts/policy_weights/policy_epoch_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..0458cc1d560f0c48354acfe22e05d4c08d9958f2 --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc2444e2083612a598cd4cb3d5b4e893e931aef5a8aba62e801574b6f8f2918b +size 230535701 diff --git a/kadabra/ckpts/policy_weights/policy_epoch_8.pt b/kadabra/ckpts/policy_weights/policy_epoch_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..c7125698b742de42afc111d3b8fd9311b57ac12b --- /dev/null +++ b/kadabra/ckpts/policy_weights/policy_epoch_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d541a6c5cfe236db8bff57ea479fd13308b826899ae5261798f07d9dae5163 +size 230535701 diff --git a/kakuna/ckpts/config.txt b/kakuna/ckpts/config.txt new file mode 100755 index 0000000000000000000000000000000000000000..f206a4a1fb492ad6efa8f739d58bfeab58c88973 --- /dev/null +++ b/kakuna/ckpts/config.txt @@ -0,0 +1,135 @@ +import amago.agent +import amago.experiment as amago2 +import amago.nets.actor_critic as amago3 +import amago.nets.traj_encoders as amago4 +import amago.nets.transformer as amago5 + +# Parameters for Agent: +# ============================================================================== +# None. + +# Parameters for Discrete: +# ============================================================================== +Discrete.clip_prob_high = 0.99 +Discrete.clip_prob_low = 0.001 + +# Parameters for Experiment: +# ============================================================================== +Experiment.always_load_latest = False +Experiment.always_save_latest = True +Experiment.has_dset_edit_rights = True +Experiment.save_trajs_as = 'npz' +Experiment.stagger_traj_file_lengths = True +Experiment.traj_save_len = 10000000000.0 +Experiment.wandb_group_name = None + +# Parameters for FlashAttention: +# ============================================================================== +FlashAttention.window_size = (96, 0) + +# Parameters for MetamonAMAGOExperiment: +# ============================================================================== +MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent +MetamonAMAGOExperiment.critic_loss_weight = 12.5 +MetamonAMAGOExperiment.grad_clip = 1.0 +MetamonAMAGOExperiment.l2_coeff = 0.0001 +MetamonAMAGOExperiment.learning_rate = 0.0001 +MetamonAMAGOExperiment.lr_warmup_steps = 10000 +MetamonAMAGOExperiment.max_seq_len = 128 +MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder + +# Parameters for MetamonMaskedResidualActor: +# ============================================================================== +MetamonMaskedResidualActor.activation = 'leaky_relu' +MetamonMaskedResidualActor.continuous_dist_type = None +MetamonMaskedResidualActor.dropout_p = 0.0 +MetamonMaskedResidualActor.feature_dim = 500 +MetamonMaskedResidualActor.mask_illegal_actions = True +MetamonMaskedResidualActor.normalization = 'layer' +MetamonMaskedResidualActor.residual_blocks = 3 +MetamonMaskedResidualActor.residual_ff_dim = 800 + +# Parameters for MetamonPerceiverTstepEncoder: +# ============================================================================== +MetamonPerceiverTstepEncoder.d_model = 168 +MetamonPerceiverTstepEncoder.dropout = 0.08 +MetamonPerceiverTstepEncoder.extra_emb_dim = 18 +MetamonPerceiverTstepEncoder.latent_tokens = 8 +MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128 +MetamonPerceiverTstepEncoder.n_heads = 8 +MetamonPerceiverTstepEncoder.n_layers = 10 +MetamonPerceiverTstepEncoder.numerical_tokens = 6 +MetamonPerceiverTstepEncoder.token_mask_aug = True + +# Parameters for Multigammas: +# ============================================================================== +Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] +Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] + +# Parameters for MultiTaskAgent: +# ============================================================================== +MultiTaskAgent.actor_type = @MetamonMaskedResidualActor +MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot +MultiTaskAgent.fake_filter = False +MultiTaskAgent.fbc_filter_func = @agent.binary_filter +MultiTaskAgent.gamma = 0.999 +MultiTaskAgent.num_actions_for_value_in_actor_loss = 4 +MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 +MultiTaskAgent.num_critics = 6 +MultiTaskAgent.num_critics_td = 2 +MultiTaskAgent.offline_coeff = 1.0 +MultiTaskAgent.online_coeff = 0.5 +MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] +MultiTaskAgent.popart = True +MultiTaskAgent.reward_multiplier = 10.0 +MultiTaskAgent.tau = 0.008 +MultiTaskAgent.use_multigamma = True +MultiTaskAgent.use_target_actor = True + +# Parameters for NCriticsTwoHot: +# ============================================================================== +NCriticsTwoHot.activation = 'leaky_relu' +NCriticsTwoHot.d_hidden = 700 +NCriticsTwoHot.dropout_p = 0.0 +NCriticsTwoHot.max_return = 2100 +NCriticsTwoHot.min_return = -100 +NCriticsTwoHot.n_layers = 3 +NCriticsTwoHot.output_bins = 96 +NCriticsTwoHot.use_symlog = False + +# Parameters for PerceiverTurnEmbedding: +# ============================================================================== +# None. + +# Parameters for PopArtLayer: +# ============================================================================== +PopArtLayer.beta = 0.0005 +PopArtLayer.init_nu = 100.0 + +# Parameters for ResidualActor: +# ============================================================================== +# None. + +# Parameters for SigmaReparam: +# ============================================================================== +SigmaReparam.bias = True +SigmaReparam.fast_init = False + +# Parameters for TformerTrajEncoder: +# ============================================================================== +TformerTrajEncoder.activation = 'leaky_relu' +TformerTrajEncoder.attention_type = @transformer.FlashAttention +TformerTrajEncoder.d_ff = 3600 +TformerTrajEncoder.d_model = 900 +TformerTrajEncoder.dropout_attn = 0.0 +TformerTrajEncoder.dropout_emb = 0.05 +TformerTrajEncoder.dropout_ff = 0.05 +TformerTrajEncoder.dropout_qkv = 0.0 +TformerTrajEncoder.head_scaling = True +TformerTrajEncoder.n_heads = 12 +TformerTrajEncoder.n_layers = 10 +TformerTrajEncoder.norm = 'layer' +TformerTrajEncoder.normformer_norms = True +TformerTrajEncoder.pos_emb = 'fixed' +TformerTrajEncoder.sigma_reparam = True diff --git a/kakuna/ckpts/latest/policy.pt b/kakuna/ckpts/latest/policy.pt new file mode 100755 index 0000000000000000000000000000000000000000..5caf2dc7833e6dddb9773cf7df45e0b458c5f626 --- /dev/null +++ b/kakuna/ckpts/latest/policy.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653d4bc5ce2374897ed364be6694ff4ef1377602248bbda8516e4d80fecee776 +size 571573427 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_0.pt b/kakuna/ckpts/policy_weights/policy_epoch_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..deabbed079467d26f182b0a19b2cf45da4f9ed56 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe503e9b45a1fb759e8477f0e7a3d0895007d277cdc4ad9a4f039a820353c3a +size 571580979 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_10.pt b/kakuna/ckpts/policy_weights/policy_epoch_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..b2adc8fa5676fe92399308b7cdf91263dcfcc6b5 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e06076dd3295f193cfd4535c68a60fe835e2215c8ff24a5591514398d3d36f22 +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_12.pt b/kakuna/ckpts/policy_weights/policy_epoch_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..65cfc0c13fd49a625d4bf9b737255529c30e2049 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:463c9646c26c71870d684433ce5ff01f8ece0eb31a9ed759500ebd0ebcb673c8 +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_14.pt b/kakuna/ckpts/policy_weights/policy_epoch_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..0b37866620b13b185cf71192bdbfd05914e48c95 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4a5e40aebb4a5bfc990da6dc0cc64b1c9516d0b346cc1234ecd4212ebebf17 +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_16.pt b/kakuna/ckpts/policy_weights/policy_epoch_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..30708ac96a4f4c9f139959e59ea4d1d1e6b246bc --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bb67e733818ec808472e2b13e4d59f282a179f9124d6b8783e1414082d51543 +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_18.pt b/kakuna/ckpts/policy_weights/policy_epoch_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..860d49f1ad87baf3b9399bbb0519f0979bd1e95f --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2284d735e415b328c0ef5658c5cb42f890cceafa6b7a22123d1f52d3325a8e53 +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_2.pt b/kakuna/ckpts/policy_weights/policy_epoch_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..ff8638ffd5203a29efbd82f35de3aee290bad4d6 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597db0f727b39100f906d526fa8f4a779b3a5d6e6b3e7931e40cf0b1d4170e00 +size 571580979 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_20.pt b/kakuna/ckpts/policy_weights/policy_epoch_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..e01b02e7e76ac4cdf2f71aa3e6a404878c5f62df --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f945772a880a3f7a1ea66804b6ddc41f0f0cad3dd02cd2dd6015f065017988cc +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_22.pt b/kakuna/ckpts/policy_weights/policy_epoch_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..7f9cf32ae7efd45bb6a35a81a2a7ec0a07ecc27d --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf48f7880f30b96da04e5e23919badc84aca3bf7d63de2d8910e3edbbc47bbd +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_24.pt b/kakuna/ckpts/policy_weights/policy_epoch_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..c31874f89bc09036200e7642c47e558f17a7cf2e --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ebfa741c6be5aea1fb11ef7135244bf33c424ee942ce77ceb8458dce1e16b8 +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_26.pt b/kakuna/ckpts/policy_weights/policy_epoch_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..e224fa3762c35854ef925d1b1c7aa5fadd161633 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4436dfd9c0bf7f5c9836bd6d596486e4b56a15ae767ce0f6638288c18e991e4c +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_28.pt b/kakuna/ckpts/policy_weights/policy_epoch_28.pt new file mode 100755 index 0000000000000000000000000000000000000000..b467d3bc5697adb4f1e65dbdd31acefa127ccc30 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87c5450e41ddfadc6b17f7f14324d7fe67e7d74b27625d4b05276a9ad1893ef6 +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_30.pt b/kakuna/ckpts/policy_weights/policy_epoch_30.pt new file mode 100755 index 0000000000000000000000000000000000000000..399c344f6664a325f647be66e6075190736155b3 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d66a65ea74f1b02a6d3cf540d22a6ae14d77f9bd2d30e568062569415e9adc5 +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_32.pt b/kakuna/ckpts/policy_weights/policy_epoch_32.pt new file mode 100755 index 0000000000000000000000000000000000000000..21d92d8fde62f4d44e24a6909178e522c247b197 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388f2102cc194548fc874a30b96684d4b52597f38745a0db5f1a423a5215facc +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_34.pt b/kakuna/ckpts/policy_weights/policy_epoch_34.pt new file mode 100755 index 0000000000000000000000000000000000000000..51cff7322a2014f35a8b67f8dab319e1dbe595bc --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7474d5b2ec6624afdeb29a7cf1dc5c84817d5aee8ec39c1d35a153b18df26c +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_36.pt b/kakuna/ckpts/policy_weights/policy_epoch_36.pt new file mode 100755 index 0000000000000000000000000000000000000000..6da616ea3196cd510dd82f91cc9516cff383d439 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8330a569e4d99e136a6d3e55d562ecb9795b8785da57aadebef6553db0c45af +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_38.pt b/kakuna/ckpts/policy_weights/policy_epoch_38.pt new file mode 100755 index 0000000000000000000000000000000000000000..cece4e0436af1e3aa313cc019986f7d43a8ef79a --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a7bf8a81b1852d20c71bb026c0f79d7cc1ec1a546d8f5e4ce687faf7ae8f79 +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_4.pt b/kakuna/ckpts/policy_weights/policy_epoch_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..d73d9ed241b753cfee8dd9f4540d77120f785caa --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c2ae3fa1e2f93d34bffd6e069dec2af4daafb85f8602be7e22670541edeaa7 +size 571580979 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_40.pt b/kakuna/ckpts/policy_weights/policy_epoch_40.pt new file mode 100755 index 0000000000000000000000000000000000000000..46f82de6f9dce13b7c38bd7c8dc2ac078b724525 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e4725120d874384e14b7029240634376a2b85e86ee1658667fcd1e1ea03437a +size 571581627 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_6.pt b/kakuna/ckpts/policy_weights/policy_epoch_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..c78af083ce318da601c275dc0652871852ec80f9 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371e2b9e24639db89a4559b7978fc36f42622947be90da6f0f9422e39df44929 +size 571580979 diff --git a/kakuna/ckpts/policy_weights/policy_epoch_8.pt b/kakuna/ckpts/policy_weights/policy_epoch_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..84965dd735fecbf6e27da9ff33edf0869fe7e183 --- /dev/null +++ b/kakuna/ckpts/policy_weights/policy_epoch_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d6bfced21bd71caa38baecb4495e91a1437af0858346de4cb43a98186340e4 +size 571580979 diff --git a/superkazam/ckpts/config.txt b/superkazam/ckpts/config.txt new file mode 100755 index 0000000000000000000000000000000000000000..432703b12dbbd3708ee33eaa28c91feb24239278 --- /dev/null +++ b/superkazam/ckpts/config.txt @@ -0,0 +1,135 @@ +import amago.agent +import amago.experiment as amago2 +import amago.nets.actor_critic as amago3 +import amago.nets.traj_encoders as amago4 +import amago.nets.transformer as amago5 + +# Parameters for Agent: +# ============================================================================== +# None. + +# Parameters for Discrete: +# ============================================================================== +Discrete.clip_prob_high = 0.99 +Discrete.clip_prob_low = 0.001 + +# Parameters for Experiment: +# ============================================================================== +Experiment.always_load_latest = False +Experiment.always_save_latest = True +Experiment.has_dset_edit_rights = True +Experiment.save_trajs_as = 'npz' +Experiment.stagger_traj_file_lengths = True +Experiment.traj_save_len = 10000000000.0 +Experiment.wandb_group_name = None + +# Parameters for FlashAttention: +# ============================================================================== +FlashAttention.window_size = (96, 0) + +# Parameters for MetamonAMAGOExperiment: +# ============================================================================== +MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent +MetamonAMAGOExperiment.critic_loss_weight = 13.5 +MetamonAMAGOExperiment.grad_clip = 1.5 +MetamonAMAGOExperiment.l2_coeff = 0.0001 +MetamonAMAGOExperiment.learning_rate = 0.000125 +MetamonAMAGOExperiment.lr_warmup_steps = 2000 +MetamonAMAGOExperiment.max_seq_len = 128 +MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder +MetamonAMAGOExperiment.tstep_encoder_type = @MetamonPerceiverTstepEncoder + +# Parameters for MetamonMaskedResidualActor: +# ============================================================================== +MetamonMaskedResidualActor.activation = 'leaky_relu' +MetamonMaskedResidualActor.continuous_dist_type = None +MetamonMaskedResidualActor.dropout_p = 0.0 +MetamonMaskedResidualActor.feature_dim = 500 +MetamonMaskedResidualActor.mask_illegal_actions = True +MetamonMaskedResidualActor.normalization = 'layer' +MetamonMaskedResidualActor.residual_blocks = 3 +MetamonMaskedResidualActor.residual_ff_dim = 800 + +# Parameters for MetamonPerceiverTstepEncoder: +# ============================================================================== +MetamonPerceiverTstepEncoder.d_model = 168 +MetamonPerceiverTstepEncoder.dropout = 0.08 +MetamonPerceiverTstepEncoder.extra_emb_dim = 18 +MetamonPerceiverTstepEncoder.latent_tokens = 8 +MetamonPerceiverTstepEncoder.max_tokens_per_turn = 128 +MetamonPerceiverTstepEncoder.n_heads = 8 +MetamonPerceiverTstepEncoder.n_layers = 10 +MetamonPerceiverTstepEncoder.numerical_tokens = 6 +MetamonPerceiverTstepEncoder.token_mask_aug = True + +# Parameters for Multigammas: +# ============================================================================== +Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] +Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995] + +# Parameters for MultiTaskAgent: +# ============================================================================== +MultiTaskAgent.actor_type = @MetamonMaskedResidualActor +MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot +MultiTaskAgent.fake_filter = False +MultiTaskAgent.fbc_filter_func = @agent.leaky_relu_filter +MultiTaskAgent.gamma = 0.999 +MultiTaskAgent.num_actions_for_value_in_actor_loss = 4 +MultiTaskAgent.num_actions_for_value_in_critic_loss = 4 +MultiTaskAgent.num_critics = 6 +MultiTaskAgent.num_critics_td = 2 +MultiTaskAgent.offline_coeff = 1.0 +MultiTaskAgent.online_coeff = 0.2 +MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions'] +MultiTaskAgent.popart = True +MultiTaskAgent.reward_multiplier = 10.0 +MultiTaskAgent.tau = 0.008 +MultiTaskAgent.use_multigamma = True +MultiTaskAgent.use_target_actor = True + +# Parameters for NCriticsTwoHot: +# ============================================================================== +NCriticsTwoHot.activation = 'leaky_relu' +NCriticsTwoHot.d_hidden = 700 +NCriticsTwoHot.dropout_p = 0.0 +NCriticsTwoHot.max_return = 2100 +NCriticsTwoHot.min_return = -100 +NCriticsTwoHot.n_layers = 3 +NCriticsTwoHot.output_bins = 96 +NCriticsTwoHot.use_symlog = False + +# Parameters for PerceiverTurnEmbedding: +# ============================================================================== +# None. + +# Parameters for PopArtLayer: +# ============================================================================== +PopArtLayer.beta = 0.0005 +PopArtLayer.init_nu = 100.0 + +# Parameters for ResidualActor: +# ============================================================================== +# None. + +# Parameters for SigmaReparam: +# ============================================================================== +SigmaReparam.bias = True +SigmaReparam.fast_init = False + +# Parameters for TformerTrajEncoder: +# ============================================================================== +TformerTrajEncoder.activation = 'leaky_relu' +TformerTrajEncoder.attention_type = @transformer.FlashAttention +TformerTrajEncoder.d_ff = 3600 +TformerTrajEncoder.d_model = 900 +TformerTrajEncoder.dropout_attn = 0.0 +TformerTrajEncoder.dropout_emb = 0.05 +TformerTrajEncoder.dropout_ff = 0.05 +TformerTrajEncoder.dropout_qkv = 0.0 +TformerTrajEncoder.head_scaling = True +TformerTrajEncoder.n_heads = 12 +TformerTrajEncoder.n_layers = 10 +TformerTrajEncoder.norm = 'layer' +TformerTrajEncoder.normformer_norms = True +TformerTrajEncoder.pos_emb = 'fixed' +TformerTrajEncoder.sigma_reparam = True diff --git a/superkazam/ckpts/latest/policy.pt b/superkazam/ckpts/latest/policy.pt new file mode 100755 index 0000000000000000000000000000000000000000..87907247a56f92a63d2840a0516fc7146e358fdc --- /dev/null +++ b/superkazam/ckpts/latest/policy.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf9af399d8fb372d7819ebe0127c6716cd204214299cc093ee7ab578af52343 +size 571573427 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_0.pt b/superkazam/ckpts/policy_weights/policy_epoch_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..ea334637a51afb761bac81a97ac112c1aa40fcac --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c699c7d7f8b5322b1bd4a00492ea8a008e81353a478738a913f416182cef8ed9 +size 571580979 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_10.pt b/superkazam/ckpts/policy_weights/policy_epoch_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..f0bfbba0c9403a6f0c9189898fd957a93415be56 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434246d0cd6c06573e714a4b32aa722d72c6aac6a4215e6fd9f4dff2f70cf366 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_12.pt b/superkazam/ckpts/policy_weights/policy_epoch_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..c2adcbd3830ff472fbf76d77a2d5039d1df28570 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb784b867353312abebb5a75c92813fba25b5236b296e519dfb0368eab1556fc +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_14.pt b/superkazam/ckpts/policy_weights/policy_epoch_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..3bbdc0bb532ed539a6f44e89fe5a672493062201 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9310420355425239c36a13fe4478f5ee07c377f1f20033ffd0090fdbaf7a4b +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_16.pt b/superkazam/ckpts/policy_weights/policy_epoch_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..28c994e5f80b1521f826aef97e8d6403b882ce93 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6a875cf38fb83dd8697228cb641952cd119b4c319b0b0e5bd2f42ff8560f9db +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_18.pt b/superkazam/ckpts/policy_weights/policy_epoch_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..7a202687afafba59f2abe612c051bdd63bb3cea1 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e22e831b61f4a2640bdebce3d7c11a9cfb973b87652d78f08770f0b46a6ddda +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_2.pt b/superkazam/ckpts/policy_weights/policy_epoch_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..78bc9e29f602598b9c7128f3d871637aba17bc6c --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd4853d32f0bc3b593090e4c9c39079e7537ccef7f6506fba1236d8c75aaf05 +size 571580979 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_20.pt b/superkazam/ckpts/policy_weights/policy_epoch_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..c9477e994dee799ad36eb92301fcbda6efae4505 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac6472169bd035359312315c27148d55738d703d7a284677fd9d22c8c99068b +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_22.pt b/superkazam/ckpts/policy_weights/policy_epoch_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..549cab4b0adfaf7b91e2d9880a37ca64af60c8a5 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596705b7489bc7ac536cbfa101263766dc006b518250a8274b13a7dc43c02587 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_24.pt b/superkazam/ckpts/policy_weights/policy_epoch_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..d10620c0d74c799254c5812dd1af8e7dd80f6706 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd40c2d5292f16cf1032b141b96f062468e0317b53cbfea5fa7965cb265f677 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_26.pt b/superkazam/ckpts/policy_weights/policy_epoch_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..bb478a461b28536882adf27606477bc1424c56f7 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c5e04f0451757e70b6d876fbbcd617437e13347c113d12ceda3bdebb6b7cba7 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_28.pt b/superkazam/ckpts/policy_weights/policy_epoch_28.pt new file mode 100755 index 0000000000000000000000000000000000000000..31723903acb922aa7613fa2c420bf2ce1af8eb19 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf40c9405c755a5eedf2a9a7ab86814955399f55a0c08ad5aca43f1f13ac85f +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_30.pt b/superkazam/ckpts/policy_weights/policy_epoch_30.pt new file mode 100755 index 0000000000000000000000000000000000000000..a5ca75a9af269960d77a8df009f00eb07e5969a2 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a174e448ae4fc17fbc08bc9bb8ce7b6981b15f89d37a97358f32f4ebd8ecd9a9 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_32.pt b/superkazam/ckpts/policy_weights/policy_epoch_32.pt new file mode 100755 index 0000000000000000000000000000000000000000..60aaa92279a9c0d12d5996efd6088131b038d9b1 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4552e9ab284fe34f6d9bbea2f3c70a8d324b3fbe8ba5aa148b3298ca357aa373 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_34.pt b/superkazam/ckpts/policy_weights/policy_epoch_34.pt new file mode 100755 index 0000000000000000000000000000000000000000..8e0efe1d22fed7464263caf43ba0b580ff414627 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:922b49e88f34cb1a4660ab4050269a1e99c913570e01bbf5684673822fd26a10 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_36.pt b/superkazam/ckpts/policy_weights/policy_epoch_36.pt new file mode 100755 index 0000000000000000000000000000000000000000..0926e5e9eab46fbf3319b4bf6d48324a05a6537f --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7174d99b102d0359db8ae04820461ee12dbd2e4fae7d849e810f88e28faea0b3 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_38.pt b/superkazam/ckpts/policy_weights/policy_epoch_38.pt new file mode 100755 index 0000000000000000000000000000000000000000..77391666cf9881f42f63e9a5701d6140cd9bb34a --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a649c4701dd5de98d257c0c6346c4c50a74f1968384bb6e2181e1ebcd0ceeec5 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_4.pt b/superkazam/ckpts/policy_weights/policy_epoch_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..80757536dcc8e3962915941f3020e4b2bd3e30d4 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803525011f482227224d421f66ed40f6a0006298cc43f662ddd06e917436aec6 +size 571580979 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_40.pt b/superkazam/ckpts/policy_weights/policy_epoch_40.pt new file mode 100755 index 0000000000000000000000000000000000000000..6ab57d0d8384c594ff642de86159b8894541bef3 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ba7e9af0e713e9e2583560d87750b704ee2f4d817d32be2ab6675e78d473226 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_42.pt b/superkazam/ckpts/policy_weights/policy_epoch_42.pt new file mode 100755 index 0000000000000000000000000000000000000000..f6a678b36844fa68a8270460652fd44db9cb5745 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_42.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a84ed5d1f9a234e34a2fbb0597803d6aa44a262efcf50b5489f61bce98e01c3c +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_44.pt b/superkazam/ckpts/policy_weights/policy_epoch_44.pt new file mode 100755 index 0000000000000000000000000000000000000000..a1734f93e4154062a6e8a637c49e272ec196f7d1 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_44.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9426c5eeb62154b81585f21eebab7cd6b22bcaf56d28c5c9e85c77ff5ca6436 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_46.pt b/superkazam/ckpts/policy_weights/policy_epoch_46.pt new file mode 100755 index 0000000000000000000000000000000000000000..7ba52482c3f09eb75e4f3699ab656884c1b666fc --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_46.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85c571b4ee7617f0b618d424fd1fde613ba4c3385cbe905b7d8649143703ffa +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_48.pt b/superkazam/ckpts/policy_weights/policy_epoch_48.pt new file mode 100755 index 0000000000000000000000000000000000000000..02c66023e6e6a80ce01b688569ed11ea14b44209 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_48.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91921ab97009e807bbe22c2284c4f5bcc7c9ee0d7e8d1fd5c182a093fd3d7d12 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_50.pt b/superkazam/ckpts/policy_weights/policy_epoch_50.pt new file mode 100755 index 0000000000000000000000000000000000000000..827e47f839bba4c86a049fef7b9a687e90cac288 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_50.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb8d4b36168048b1912548b8c110c334d5e3683ba7e17ea15056fdfbfbd07297 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_52.pt b/superkazam/ckpts/policy_weights/policy_epoch_52.pt new file mode 100755 index 0000000000000000000000000000000000000000..090fc812fc7db2b289e9a9f64b5022c209467f82 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_52.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4501a0ef5e2c702b559669705db6c88fa5026fd5ea137a6aa584c862be6d30 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_54.pt b/superkazam/ckpts/policy_weights/policy_epoch_54.pt new file mode 100755 index 0000000000000000000000000000000000000000..56e841efb213072964e964f1f67c8a39857dc528 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_54.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f8c941cfda419743af9832b10394c4f64b17c57876b98021414fd4bcb87533a +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_56.pt b/superkazam/ckpts/policy_weights/policy_epoch_56.pt new file mode 100755 index 0000000000000000000000000000000000000000..b63505be8d111b1524cdb545be9b9250cfbf1e31 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_56.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dfab0318f9ade01a5239f7e1990062c1f9a99923790529fad6c3cb8a827b397 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_58.pt b/superkazam/ckpts/policy_weights/policy_epoch_58.pt new file mode 100755 index 0000000000000000000000000000000000000000..749c27d7c95c2e1b505030efbbb775ffa6e22db0 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_58.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3240388a1b1f6c0b5440bec2c12e9b1996b109b588e91e5fff25259b7775cd1 +size 571581627 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_6.pt b/superkazam/ckpts/policy_weights/policy_epoch_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..298880980595a115becff33b61ed228dff36f8e2 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:730b67d36ba59d471590cedfd467766c0b6059dba3f1057712eaa2a2ab56cb76 +size 571580979 diff --git a/superkazam/ckpts/policy_weights/policy_epoch_8.pt b/superkazam/ckpts/policy_weights/policy_epoch_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..f6f26691a50faaf1ca5a1033ab22803890f62ae2 --- /dev/null +++ b/superkazam/ckpts/policy_weights/policy_epoch_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f4c8be659accfc73ad53adf8ea738fef5a54641440b85ccf278393e9b26132 +size 571580979