diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e2c7c61fc569c1929e04e482e25f012495c7e571 --- /dev/null +++ b/config.json @@ -0,0 +1,211 @@ +{ + "architectures": [ + "PITForConditionGeneration" + ], + "hidden_sizes": [ + 1280, + 2048 + ], + "ignore_index": -100, + "model_type": "pit", + "moe_config": { + "capacity_factor": 1.25, + "drop_tokens": true, + "ep_size": 4, + "eval_capacity_factor": 2.0, + "k": 1, + "num_experts": 4, + "use_residual": false, + "use_rts": false, + "use_tutel": true + }, + "pad_token_id": 2, + "projector_hidden_act": "gelu", + "protein_config": { + "_name_or_path": "facebook/esm2_t33_650M_UR50D", + "add_cross_attention": false, + "architectures": [ + "EsmModel" + ], + "attention_probs_dropout_prob": 0.0, + "bad_words_ids": null, + "begin_suppress_tokens": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "classifier_dropout": null, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "early_stopping": false, + "emb_layer_norm_before": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "esmfold_config": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_size": 1280, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "initializer_range": 0.02, + "intermediate_size": 5120, + "is_decoder": false, + "is_encoder_decoder": false, + "is_folding_model": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "layer_norm_eps": 1e-05, + "length_penalty": 1.0, + "mask_token_id": 32, + "max_length": 20, + "max_position_embeddings": 1026, + "min_length": 0, + "model_type": "esm", + "no_repeat_ngram_size": 0, + "num_attention_heads": 20, + "num_beam_groups": 1, + "num_beams": 1, + "num_hidden_layers": 33, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": 1, + "position_embedding_type": "rotary", + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "suppress_tokens": null, + "task_specific_params": null, + "temperature": 1.0, + "tf_legacy_loss": false, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "token_dropout": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": "float32", + "torchscript": false, + "typical_p": 1.0, + "use_bfloat16": false, + "use_cache": true, + "vocab_list": null, + "vocab_size": 33 + }, + "protein_feature_layer": -2, + "protein_feature_select_strategy": "full", + "protein_token_index": 32000, + "sequence_only": false, + "text_config": { + "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "add_cross_attention": false, + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bad_words_ids": null, + "begin_suppress_tokens": null, + "bos_token_id": 1, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": 2, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "silu", + "hidden_size": 2048, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "initializer_range": 0.02, + "intermediate_size": 5632, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "length_penalty": 1.0, + "max_length": 20, + "max_position_embeddings": 2048, + "min_length": 0, + "model_type": "llama", + "moe_config": { + "capacity_factor": 1.25, + "drop_tokens": true, + "ep_size": 4, + "eval_capacity_factor": 2.0, + "k": 1, + "num_experts": 4, + "use_residual": false, + "use_rts": false, + "use_tutel": true + }, + "no_repeat_ngram_size": 0, + "num_attention_heads": 32, + "num_beam_groups": 1, + "num_beams": 1, + "num_experts": 4, + "num_hidden_layers": 22, + "num_key_value_heads": 4, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "prefix": null, + "pretraining_tp": 1, + "problem_type": null, + "pruned_heads": {}, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "return_dict": true, + "return_dict_in_generate": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.01, + "sep_token_id": null, + "suppress_tokens": null, + "task_specific_params": null, + "temperature": 1.0, + "tf_legacy_loss": false, + "tie_encoder_decoder": false, + "tie_word_embeddings": false, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": "bfloat16", + "torchscript": false, + "typical_p": 1.0, + "use_bfloat16": false, + "use_cache": true, + "vocab_size": 32008 + }, + "torch_dtype": "float16", + "transformers_version": "4.39.0", + "use_moe": true, + "vocab_size": 32008 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b2a4fd0744b1e7bfd88ec1905321de7924e1acc --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 2, + "transformers_version": "4.39.0" +} diff --git a/global_step42881/layer_0_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_0_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e83c42ebdf747e9e92356c077cd5ce777e9469ed --- /dev/null +++ b/global_step42881/layer_0_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f9334720cb66bb8edae70792d157e6e6bbeba2a0c4ad189bfe6837d29d8c626 +size 69208315 diff --git a/global_step42881/layer_0_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_0_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec9941a991e829bdb3a6eac53d38f2849c4cea64 --- /dev/null +++ b/global_step42881/layer_0_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d5aa5e2db8fe12b6a3e3b7f2b9d3b8aadc4f314cfad3988f2989973a030495 +size 69208315 diff --git a/global_step42881/layer_0_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_0_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..908ef843f0775855d2f831d2d49b63689218e1ae --- /dev/null +++ b/global_step42881/layer_0_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7fddea22017a1c4efc8af528b7304c5ece0045c17db20fccde990fb63c648a +size 69208315 diff --git a/global_step42881/layer_0_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_0_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..27e45027f8d37e16c7d5fe6461f86f2643eefd8b --- /dev/null +++ b/global_step42881/layer_0_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c207efa2d023042e3859864cd2eac2edce6555e4e79cb46b6c24adb8d8e3f0b3 +size 69208315 diff --git a/global_step42881/layer_10_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_10_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffcf099c8118f97f0f77e01390ceca8b303d1bd0 --- /dev/null +++ b/global_step42881/layer_10_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fccb20750d77538e132e0665cce699e2a9ff09ffda40ff1eb5009f54e2178ae2 +size 69208322 diff --git a/global_step42881/layer_10_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_10_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc8d4bdf96c2a1ad41d53e924690001de836a29e --- /dev/null +++ b/global_step42881/layer_10_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75c72c0e12bd231d014b242482263ddf798a98fddcb31ab59080c86c85bf52e6 +size 69208322 diff --git a/global_step42881/layer_10_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_10_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3628aa4aec7090e2265a6e837009355a293d8b2 --- /dev/null +++ b/global_step42881/layer_10_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf77f521c80c62cf9bcae6a670ac81792b8a056485b6d6bcea2f4169047dec78 +size 69208322 diff --git a/global_step42881/layer_10_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_10_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ace452f32a553a7594d407408b84bc90a1711904 --- /dev/null +++ b/global_step42881/layer_10_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ef14de307d63a8c2f800736caee7e0d478592e0294ec2378756c9ca3c5aca2 +size 69208322 diff --git a/global_step42881/layer_11_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_11_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eed9e6b4beff54f386cb73c43a991d1f9980d7ba --- /dev/null +++ b/global_step42881/layer_11_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bc11da963d58cf4258fb7a12507d21db61534b7a6ca9f99769afcf36efd6982 +size 69208322 diff --git a/global_step42881/layer_11_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_11_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc12c28abf626df762dc0919e3c9dc0c17ad95ed --- /dev/null +++ b/global_step42881/layer_11_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6155c974492d363923d83762428b44e29620b5481d5247b2a9604d31593dccc +size 69208322 diff --git a/global_step42881/layer_11_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_11_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a82753327a674f50608d399ad76d79d7bf3ac218 --- /dev/null +++ b/global_step42881/layer_11_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c010f792a5df1e4c183052a2026306e4a8582324f691ee199a8da70e82bd82ff +size 69208322 diff --git a/global_step42881/layer_11_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_11_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d947b91fa9a2898d64b27b722f048d9057cc368c --- /dev/null +++ b/global_step42881/layer_11_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4925edf9301027256ca84d9e6abbeaff3123750c7a8a43134b8bdadbe9bc5179 +size 69208322 diff --git a/global_step42881/layer_12_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_12_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..daa6494ec28f806abe3932cf7f660e1a88a8fcb8 --- /dev/null +++ b/global_step42881/layer_12_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7c7d838aff342c56745bad832b01cd8e80e54bc7511719da50734ec55eac0b +size 69208322 diff --git a/global_step42881/layer_12_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_12_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..486062838a9a6b20e9ac907e6777ea40884e54b5 --- /dev/null +++ b/global_step42881/layer_12_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c016ef7424a64903b3aba4e932f03f8c8d512e7c787f4f5cdb8db7df9a8db94 +size 69208322 diff --git a/global_step42881/layer_12_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_12_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe0d99f8d5d814590631452d26367859dee4fb02 --- /dev/null +++ b/global_step42881/layer_12_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c360984aaaa3096ddf23303fa85fca6a323e78ccf02f6b6a6308a2ddbacfda98 +size 69208322 diff --git a/global_step42881/layer_12_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_12_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3eda8fa52be911b6f76d8f816f0d53f60ff83c4 --- /dev/null +++ b/global_step42881/layer_12_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72abf67665416daf2a9853a67304e3802e77d823b8f7ea9b9ed515555226e995 +size 69208322 diff --git a/global_step42881/layer_13_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_13_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d46db16ae6206a17bb5cc405921837b7809faacb --- /dev/null +++ b/global_step42881/layer_13_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0afc36387a7336e5825be287a5229cc9c43c0566b65b4bae05e662db698fdae +size 69208322 diff --git a/global_step42881/layer_13_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_13_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..023c59956632b39e016da3f0c0ca3cd2cc16dd9f --- /dev/null +++ b/global_step42881/layer_13_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654f813074b2efd6bcb4b1f724d00ed0c26814a49eaa8a8b1f399dfa653ea03d +size 69208322 diff --git a/global_step42881/layer_13_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_13_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..04a5924e42aba761ffa5f71453cdc4aadf1e1a65 --- /dev/null +++ b/global_step42881/layer_13_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f8f8c1322551827f36fd22ea3dffbfce17ffd6a9152440954e903741c2b8ff6 +size 69208322 diff --git a/global_step42881/layer_13_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_13_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..762027677587cb15d9acce0383397ee684cd342f --- /dev/null +++ b/global_step42881/layer_13_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338222fc4f05b6ded3e074216ff65d3bfe7f1abf2153e7559450d5503fc6d65f +size 69208322 diff --git a/global_step42881/layer_14_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_14_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..17424910defb53446b8d674e5fb45f1dbc92607b --- /dev/null +++ b/global_step42881/layer_14_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce3063158fe47437da08ecda9ec18fd755783964fa99225c97d8bf6f6a1d908 +size 69208322 diff --git a/global_step42881/layer_14_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_14_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..342f47da588646998aa6d8fb855d7601b71e3f26 --- /dev/null +++ b/global_step42881/layer_14_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b732ff8112ad4664205ceacde240f4e0234959d2a4969bd90edc79ff42ffeb4 +size 69208322 diff --git a/global_step42881/layer_14_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_14_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbc0b5212084b61d216803fee460a1fb1ea7e923 --- /dev/null +++ b/global_step42881/layer_14_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ea53fbe3b9b1161ef29f419a9c60712a362911a722c9cb72932fbd13f7f8f0 +size 69208322 diff --git a/global_step42881/layer_14_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_14_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fa0d48ccad35947d77af3effe5f9ec3e2c4bf37 --- /dev/null +++ b/global_step42881/layer_14_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c75999f108f897b5642c0dd90b0f869b7f9be306bd583b8395944e008295146b +size 69208322 diff --git a/global_step42881/layer_15_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_15_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2146ecf57b408058df9693371e575e10b88e3c10 --- /dev/null +++ b/global_step42881/layer_15_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9c4859174be3c44487b10d44253d04f116336ae3fc5330899569682cb6fb43 +size 69208322 diff --git a/global_step42881/layer_15_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_15_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d90113fe70b3cdfcc0e547b4384458a81140b710 --- /dev/null +++ b/global_step42881/layer_15_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c26d0fd0b001872c537a059fed702039c6c9a0378404af769a52745a2bef7b +size 69208322 diff --git a/global_step42881/layer_15_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_15_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8770448821f96396c8b6016e3ccf32c167894a9c --- /dev/null +++ b/global_step42881/layer_15_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038c735113cff2b55fc102e81a968566df32844a4b845c41fb8b96c531522c76 +size 69208322 diff --git a/global_step42881/layer_15_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_15_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c35e542e0cc08f3cdb7e95c5ac2746f2df38fba6 --- /dev/null +++ b/global_step42881/layer_15_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad542b2d39a61cb8ff58b9de5ceb196aa1f71e084e1e849cb04ea0375f292db0 +size 69208322 diff --git a/global_step42881/layer_16_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_16_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b053818c26d5139f48fe74fa45a1784373c36a3 --- /dev/null +++ b/global_step42881/layer_16_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:901cebab41eeb993a66ebd4266baebb0546860afced1d9ed1f77415547185425 +size 69208322 diff --git a/global_step42881/layer_16_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_16_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d87b25beb10c3c060aa219e83cc406d3250aa29 --- /dev/null +++ b/global_step42881/layer_16_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4dbbc721c38e100be41b044d8fae0a4d91cea1176dce5f66b12398d235fca2c +size 69208322 diff --git a/global_step42881/layer_16_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_16_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc8f42f2b6adc71be58aec4178d514d177cf1737 --- /dev/null +++ b/global_step42881/layer_16_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41ead3d1b9babfe071fb51f3115cadb7e843aaee3d4ab1015bcb33a9ac789c15 +size 69208322 diff --git a/global_step42881/layer_16_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_16_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..127ba272b3f62684f89ecb81e6c2e3270fd1949b --- /dev/null +++ b/global_step42881/layer_16_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5574932be5c9851644df1627e9853c9694071b5f2c21fabbb306d01207a858e0 +size 69208322 diff --git a/global_step42881/layer_17_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_17_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a4710dd66ba80b4465b6cefc8158c5f6cca820c --- /dev/null +++ b/global_step42881/layer_17_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26a37aefd9d022485c77d6554978e761884bdcb43e2a26419fa8460d66d74bd +size 69208322 diff --git a/global_step42881/layer_17_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_17_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..96450119fbe209cb88dfe7363c04d36655761fe9 --- /dev/null +++ b/global_step42881/layer_17_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92fd05577db1e58912ac6997c2cf3f6f6a221f58df5f96e00d702351322ab860 +size 69208322 diff --git a/global_step42881/layer_17_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_17_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22bd58cc9a9b722cd8fc2047e527780db9816738 --- /dev/null +++ b/global_step42881/layer_17_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b400b11af8709dc291516d9c0d262b946b450f0cfbe914d280a263b5e9e7914c +size 69208322 diff --git a/global_step42881/layer_17_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_17_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22a03c96f7cc895d77a68cb2fb7eef7c3938767c --- /dev/null +++ b/global_step42881/layer_17_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee0b0c5d2c2d651b51c57549b21a6ea49044bf35971db24920d00410ca4fcbd +size 69208322 diff --git a/global_step42881/layer_18_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_18_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..652373c350c7b16f1e400c11c6180c3922f9617b --- /dev/null +++ b/global_step42881/layer_18_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc5f0fe1647ca825d6988478802b3501d073be328d78a1522ccf277c091356e +size 69208322 diff --git a/global_step42881/layer_18_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_18_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a705a5a6eed80a666b1601957948b0888d5f351 --- /dev/null +++ b/global_step42881/layer_18_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84fff54421c19f82f1429f4a00bf4ff3d95fc0bfab8b943dc66a0de674cf3dbd +size 69208322 diff --git a/global_step42881/layer_18_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_18_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..341c356e5256c6b1c452e47e254c3ede202952d4 --- /dev/null +++ b/global_step42881/layer_18_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ebd64076ba54cf054d3ad5bdc2972e245a0181bbe48326c2b047439c0884e68 +size 69208322 diff --git a/global_step42881/layer_18_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_18_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..29e368e7c5e1f43d34b8364f9ba635eb4bbf3b7a --- /dev/null +++ b/global_step42881/layer_18_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4d5846a0f1ad01ddc6494edac21df741725011234c506a5dbd7b315dc0b2374 +size 69208322 diff --git a/global_step42881/layer_19_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_19_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a5707815cdb8cbb6234f63c684604404db6d762 --- /dev/null +++ b/global_step42881/layer_19_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef0e54bd119089ffa03847710707047ee47bf2359e328f0a3e19f3e196aba527 +size 69208322 diff --git a/global_step42881/layer_19_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_19_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb25a2eeae622201adf34ed497e4b248b1e30c53 --- /dev/null +++ b/global_step42881/layer_19_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd4f8c23a6c8ed6316847bbf3b5328406ec518181d8e70ea6f130efef0d22e88 +size 69208322 diff --git a/global_step42881/layer_19_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_19_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0642ee01b51a238c9a8a5ed3ca9833e68c79e017 --- /dev/null +++ b/global_step42881/layer_19_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32990e1b9eb56847fc6a169dc36aad6b432d1ec6b61e497d7c70bf4a1293e9a3 +size 69208322 diff --git a/global_step42881/layer_19_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_19_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1fead5fce375c67b01400fcb84ccb2dfb1a6fb0 --- /dev/null +++ b/global_step42881/layer_19_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3aa166dd16d7d05cfcf65e354e15c1ad60b10d04fa549cc6ac56b88b96d21b3 +size 69208322 diff --git a/global_step42881/layer_1_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_1_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc75ee244fd1dd2e51acfec7d87a5523b5845303 --- /dev/null +++ b/global_step42881/layer_1_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2281a926c4358d4ac1277acdd7a7c2e831127bb7cdb37b7c85855f438b561222 +size 69208315 diff --git a/global_step42881/layer_1_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_1_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..32aef6ebebfb2e9fd27b0e024b872e235cfa32cb --- /dev/null +++ b/global_step42881/layer_1_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6237f9b8bca602afbcf97a01215f462aeb639a2e2d2c10fcf89bbc48a5f891 +size 69208315 diff --git a/global_step42881/layer_1_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_1_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5a188a201d9a0b410df3c88a8300b327a632c55 --- /dev/null +++ b/global_step42881/layer_1_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f788e8d4e86beb03b9736250c0f2894d76fcc5d1f242b05641730be5affa7fd8 +size 69208315 diff --git a/global_step42881/layer_1_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_1_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..83068fc213828d9d98f84ac2e26c26b7ec173b1d --- /dev/null +++ b/global_step42881/layer_1_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf02ac780ab9243c6fb49cf777b3bd0d53d739909b0684b4eb3a5e976ba80da3 +size 69208315 diff --git a/global_step42881/layer_20_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_20_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6282e831abe4b9125903b7d54f9603027149678a --- /dev/null +++ b/global_step42881/layer_20_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64307ccd6a66087471f2acc63f27fab16395492c7e43b625e1024dca789ffcc3 +size 69208322 diff --git a/global_step42881/layer_20_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_20_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c4a0deb857dc3fcffda8c48fb3a42ff3dc420ea --- /dev/null +++ b/global_step42881/layer_20_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfa1daa9680633259652ae9ef142a11fb24178ba95b2a84ebcb4ab6fd5691832 +size 69208322 diff --git a/global_step42881/layer_20_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_20_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a10385c79481b5d43a9e42bb1161c09c5bd1046 --- /dev/null +++ b/global_step42881/layer_20_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47af7de2d94e71450a4c8e2b4c04f3a289e08dba46d2b135ab2872870b57a311 +size 69208322 diff --git a/global_step42881/layer_20_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_20_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e142c22dbe5ae714b02e213a918d1977e2ba1a9 --- /dev/null +++ b/global_step42881/layer_20_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad852f311d93f6e10ebdddcee3777b8f2fe37fe5e2b51fec040dcf4436cc1551 +size 69208322 diff --git a/global_step42881/layer_21_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_21_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c89b3af62b3e832394f978e650c31edb8a0920b3 --- /dev/null +++ b/global_step42881/layer_21_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f7c4f86758541537e5ce4a1f6cea098a21380794eac310fab00da6b603e7bbf +size 69208322 diff --git a/global_step42881/layer_21_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_21_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1e35dc0fbabadd58d65ce93b4bdea9a24aede05 --- /dev/null +++ b/global_step42881/layer_21_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a7b50060620606528937be94ca911cd01781a3deb4a3ae75c562f45b9320fc7 +size 69208322 diff --git a/global_step42881/layer_21_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_21_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bb134327c64b81f3699f8c64c666a0d1c6debb0 --- /dev/null +++ b/global_step42881/layer_21_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65adc16892ed21bc1cb2d258137fc272abba9fb0b58e8984b74f5963ecfd3bca +size 69208322 diff --git a/global_step42881/layer_21_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_21_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..510278e3e7799ce9d2169a4c3bca0e546440a5f9 --- /dev/null +++ b/global_step42881/layer_21_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed099fd0034c55b961b37310a986f4aa39ec8155c4cbd9c870cf99984bfc9d32 +size 69208322 diff --git a/global_step42881/layer_2_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_2_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed5100e01ae3a0e828e7d0af9aded5e182e2c2f2 --- /dev/null +++ b/global_step42881/layer_2_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d7f268b29604471e2fa0b4adfa546eb66e1141004f0fedde0239a1ef7508ed2 +size 69208315 diff --git a/global_step42881/layer_2_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_2_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c268711904f167b1602101db62d13a07d92ed51 --- /dev/null +++ b/global_step42881/layer_2_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bfec87739808268fad81008c76992209ee060900c7eb9a5574483c6057a2174 +size 69208315 diff --git a/global_step42881/layer_2_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_2_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22cc05c6300c1322cd0bb409b376fd412f339257 --- /dev/null +++ b/global_step42881/layer_2_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d48b29d98cdc393bac1bf16ea89b66a80f249df0e43476cc6eb281faab39fe0d +size 69208315 diff --git a/global_step42881/layer_2_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_2_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..36317084ec41fcd95b041c7bb656d0827b09abf0 --- /dev/null +++ b/global_step42881/layer_2_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d6506be8e458a022f7679a4f486067815a531482312434486f9ae49aaa9be5 +size 69208315 diff --git a/global_step42881/layer_3_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_3_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b862f0a417606c642640a3637e864bdefbca018 --- /dev/null +++ b/global_step42881/layer_3_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:694b2d777a5b1276a49f936524834c0d5b5f011cfa1d045a7643f38e98140d0a +size 69208315 diff --git a/global_step42881/layer_3_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_3_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a61b49125e8696c7b0ad3ccebcc1455cff65a6e3 --- /dev/null +++ b/global_step42881/layer_3_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f637c1178b28674ea0509a795c746c6434216ebef69daee90c92b1823a4e01f +size 69208315 diff --git a/global_step42881/layer_3_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_3_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcaedd27f908cf39a836213581a7a719a520ea57 --- /dev/null +++ b/global_step42881/layer_3_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad6e51b5c8489ec6f5a82fc8d1966059bfcc0a8f27b39c6360da87fe4a560f8e +size 69208315 diff --git a/global_step42881/layer_3_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_3_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..489ee182cf1a68510e0b344413e6f48b1cace1dd --- /dev/null +++ b/global_step42881/layer_3_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ac1108631b2a7a8612b50740432f8c81e9c1edaa3992158a72557b7ecedc76 +size 69208315 diff --git a/global_step42881/layer_4_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_4_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1495f099614fa57fa9ce8538b0e862c835a362c1 --- /dev/null +++ b/global_step42881/layer_4_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:355cecb07b3a06dd53dde1cbb024ce5036ef56218c730cba22bc736eb8610ae9 +size 69208315 diff --git a/global_step42881/layer_4_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_4_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..df5dd5cf3086e81e0270ba32b6490c9470f1b885 --- /dev/null +++ b/global_step42881/layer_4_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a16b2afb6fc127d21a5e9bcafd3d32bb1c42f526640355be1b0a169fb32168c7 +size 69208315 diff --git a/global_step42881/layer_4_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_4_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..27f113f1bd55696d8955419314f402adbe02ee51 --- /dev/null +++ b/global_step42881/layer_4_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f8aaa9e90bcfb7e0a4b04b555da7b72449f2f22a49f8ae4be47f11aa3d9655 +size 69208315 diff --git a/global_step42881/layer_4_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_4_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5aa8173990777f02baeb4229b421baeec1464e39 --- /dev/null +++ b/global_step42881/layer_4_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29985f4b891c47e4ba4014e1170964201c6e5a3aef859d1f22dfc9e00300a2b1 +size 69208315 diff --git a/global_step42881/layer_5_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_5_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5791d4629b8bb473f72da4761cf093efb7512729 --- /dev/null +++ b/global_step42881/layer_5_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bb720d3b0cde2bb9f53bef2282f25c735f9af47a4a3527bb8528e0e6c06df0b +size 69208315 diff --git a/global_step42881/layer_5_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_5_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d882e869b85b0ee058161277494a5faa84ec7e7 --- /dev/null +++ b/global_step42881/layer_5_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dca491a43f7f7454c9efd199abe671dbcb44814ff452f2df3ab4e72e363b5094 +size 69208315 diff --git a/global_step42881/layer_5_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_5_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3e24fc78bd0f7757fe0c0ba550e726553a3909c --- /dev/null +++ b/global_step42881/layer_5_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47eca3287c9e353a77d567b2f09583918d4eb263dec8c79a4d1987925b8e188c +size 69208315 diff --git a/global_step42881/layer_5_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_5_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3afefbcbcc0ea9b732015e7d081b7f1efda3322b --- /dev/null +++ b/global_step42881/layer_5_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5903a8be9c1016bdf128c1019b7c871ebf65b242a9d67268644a9f22caeba84 +size 69208315 diff --git a/global_step42881/layer_6_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_6_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..726f11aaedbf73e00a4754115dc62a981c668e7b --- /dev/null +++ b/global_step42881/layer_6_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37ee8122e21d8cac7e04edd15829ccd81240322a5cf720f43b34528f33dcb18a +size 69208315 diff --git a/global_step42881/layer_6_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_6_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bec5af15bd9634304746d9ca3cb60c61c0c438d --- /dev/null +++ b/global_step42881/layer_6_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e7cb286792383a471e087448e34c2f1f5f0cfad35002eaeda64f14cd3cba59 +size 69208315 diff --git a/global_step42881/layer_6_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_6_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5c74e954ee37067f403cb718489e418818640e5 --- /dev/null +++ b/global_step42881/layer_6_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75386298388aba453f8f1061017fc0d7b44b0f800d3d124bb12f5e6f9fbf39c6 +size 69208315 diff --git a/global_step42881/layer_6_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_6_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aace0297057f147fb6402f76e03a784dd869cae5 --- /dev/null +++ b/global_step42881/layer_6_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dda9c04021db5f09710deff9bb4c40dbb8f9c2f730e674b8f5406c30dcec564b +size 69208315 diff --git a/global_step42881/layer_7_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_7_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebdc14dd090043d9e2f6a0e116bae5664c7caaeb --- /dev/null +++ b/global_step42881/layer_7_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:648268239f5e617627f0b171f7c556f43a1baddf282c1480f69b88e1d735d6c7 +size 69208315 diff --git a/global_step42881/layer_7_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_7_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2cff02cf7d3fa651d68ba1002dcb5d909c118a5 --- /dev/null +++ b/global_step42881/layer_7_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4017198c444a858a1782228dfe05c6a094e87913e5745b10c0b623008eb72635 +size 69208315 diff --git a/global_step42881/layer_7_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_7_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfbb5f88e900facab328bb79fea25a96040057c5 --- /dev/null +++ b/global_step42881/layer_7_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0775eabecdb7d8647d1c807525bb2a2cb98d9702708211606ce17dac080ee236 +size 69208315 diff --git a/global_step42881/layer_7_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_7_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d691d7e2f1de99648800ee1684e44de79ffb2799 --- /dev/null +++ b/global_step42881/layer_7_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e9c452c04e1c7522903b6a2ef00e1235e020bd44791ed43580b674cbcdec7e +size 69208315 diff --git a/global_step42881/layer_8_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_8_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..daf9e41f85ead3a60f07458d0bff008eb2d5fcfd --- /dev/null +++ b/global_step42881/layer_8_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72ff7bff466ba7b2ae5c6eef8c42f1d4a1ca7dc3a5745d468ca0c6d1b7a97057 +size 69208315 diff --git a/global_step42881/layer_8_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_8_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5dac5103828e44c28d57fabd3ec501975f4b514 --- /dev/null +++ b/global_step42881/layer_8_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13224df327b92a60d80b28e3994d601bea3573d424bb2fef5af8301e4259b7f0 +size 69208315 diff --git a/global_step42881/layer_8_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_8_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdc38933b833f6b304f6275533778575c822074c --- /dev/null +++ b/global_step42881/layer_8_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36e0e166d790781fcd37ec8ed363f34b109aef256501f28177eab9a60d3522f +size 69208315 diff --git a/global_step42881/layer_8_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_8_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..901efb29428bdf5bff51118e64ffbaeafc8e4cd5 --- /dev/null +++ b/global_step42881/layer_8_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:288926a52f2ca47cbaee52b1d8ef0e98cb82fc4c1874c7575519bd6787f607ce +size 69208315 diff --git a/global_step42881/layer_9_expert_0_mp_rank_00_model_states.pt b/global_step42881/layer_9_expert_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..379584a4a74bc04cc7987dbf98ce011a52ec4661 --- /dev/null +++ b/global_step42881/layer_9_expert_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3f63a913b5e0a5d10af2a20af3ea53a7bf560b369043f79fb57b60a10e0d0d +size 69208315 diff --git a/global_step42881/layer_9_expert_1_mp_rank_00_model_states.pt b/global_step42881/layer_9_expert_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c83ad8f518d273424650476f634749ece158d89b --- /dev/null +++ b/global_step42881/layer_9_expert_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a9ef53581b61af0aed25e04de8cbc95baa1bec65243d1908d29ff54a15d38b +size 69208315 diff --git a/global_step42881/layer_9_expert_2_mp_rank_00_model_states.pt b/global_step42881/layer_9_expert_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4eb732fa900a7ac0e08d19796c21ce5662ac49a --- /dev/null +++ b/global_step42881/layer_9_expert_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cad1d4c3dd6061c5a99a778b192c3893372fd4a3ab0b0f86d6cbad4ac92e6aa +size 69208315 diff --git a/global_step42881/layer_9_expert_3_mp_rank_00_model_states.pt b/global_step42881/layer_9_expert_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e8af5f917bde717b8e07b02b2a5e06ae9c277e4 --- /dev/null +++ b/global_step42881/layer_9_expert_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56f5cd773bcf58be8e4bb8597659e7f3ad0774b572e97e675a9419a28508d2f +size 69208315 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4b130b9cc73ebc43e884a418c3ba1df191d5e27 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57b5f36e756d5a0dcb44c1c4fcdcf2cc9b714977bef8e2b100e16496483b4fa +size 3519346410 diff --git a/protein_tokenizer/special_tokens_map.json b/protein_tokenizer/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ef5f0f7d7baf4947564eafcf79972d272cd80a15 --- /dev/null +++ b/protein_tokenizer/special_tokens_map.json @@ -0,0 +1,37 @@ +{ + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/protein_tokenizer/tokenizer_config.json b/protein_tokenizer/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..dbcdd9fb2e742627ee310713615e0d7aeed0c34e --- /dev/null +++ b/protein_tokenizer/tokenizer_config.json @@ -0,0 +1,52 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "EsmTokenizer", + "unk_token": "" +} diff --git a/protein_tokenizer/vocab.txt b/protein_tokenizer/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b946952cc35537226f07fd70957ee2f848880d2 --- /dev/null +++ b/protein_tokenizer/vocab.txt @@ -0,0 +1,33 @@ + + + + +L +A +G +V +S +E +R +T +I +D +P +K +Q +N +F +Y +M +H +W +C +X +B +U +Z +O +. +- + + \ No newline at end of file diff --git a/text_tokenizer/added_tokens.json b/text_tokenizer/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..acf048e5b3c342caee15461a9d118a3e262bbdab --- /dev/null +++ b/text_tokenizer/added_tokens.json @@ -0,0 +1,9 @@ +{ + "<>": 32006, + "<>": 32005, + "": 32001, + "": 32002, + "": 32000, + "[/INST]": 32004, + "[INST]": 32003 +} diff --git a/text_tokenizer/special_tokens_map.json b/text_tokenizer/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..efb62e2f5af43f40e63adffc9310020b3b63ace3 --- /dev/null +++ b/text_tokenizer/special_tokens_map.json @@ -0,0 +1,74 @@ +{ + "additional_special_tokens": [ + { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[INST]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[/INST]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "<>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "<>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/text_tokenizer/tokenizer.model b/text_tokenizer/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/text_tokenizer/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/text_tokenizer/tokenizer_config.json b/text_tokenizer/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..afa220db15d4bd1a8cfc2126b3ab4c522c2a7e13 --- /dev/null +++ b/text_tokenizer/tokenizer_config.json @@ -0,0 +1,108 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "[INST]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "[/INST]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "<>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "<>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "[INST]", + "[/INST]", + "<>", + "<>" + ], + "bos_token": "", + "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": false, + "model_max_length": 2048, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}