diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b12ff2c6ececcb9b89dbe0da9c488a5430abfed4 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ec7b17b24be98bb5061bee26e48fcb0342f48a66630e356ef3575d4d7cba6d3 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3207cbe50f67eb450bd17242d94f51e5eb6255f3 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45fd50b290bd60448e72ce386cbd24817b01c712099ea20c4888d14be5d08d06 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9bc213505a7eba1a415a19969e5e1a5f7659bcfd --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6c4fae2dbc4e09dcb1916b3513181a62899bf27dcfddffe76449df6be937a2 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..16e63c4ef691d7e5b73179fae99bb1de9d66cd6d --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c6f90078a2450ac8516906119b963c86e82b14a8e8f9482b75b9f2e26282b4 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f1bf17b122516c66359795d62cf5c7d5cf52b427 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 14964, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.0152, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.0119, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.0116, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.0116, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.0114, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.0114, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.0114, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.0118, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.0119, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.0111, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.0118, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.0116, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.0112, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.0114, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.0115, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9940472490465726, + "eval_f1": 0.0, + "eval_loss": 0.016565397381782532, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 82.6741, + "eval_samples_per_second": 609.659, + "eval_steps_per_second": 4.766, + "step": 14964 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.7008214784542912e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-14964/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..745b5704ac488978c1236c910834540059be0d88 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd2e501a8385c447d794fe6ee9b83ef4c2e239115db3ba191ebc165a10e527f +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ec3c72d93a003850bec98b92770c6c7a34f0ca5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c233ab490e4a16384978560e9ce243b94e4d0edde66e9c7736ac72bc328cbed +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..311b9836dd1cc5b5aba89aa163537198e6a69a3b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c94452baca72f151e8fd71f227f974658fc15f84f8df12aa691d214f2d3bb1 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f8d56bccb96e583d7d642b90a40bf2e1ac09fcb --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95aa11d132c66709e53ab1861363db19e03cdd43abe90abb88ecfb1d00f88ed7 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a8261f71491b6917951c1663b72d2ee1fbdbcbf2 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/trainer_state.json @@ -0,0 +1,319 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 22446, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.0152, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.0119, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.0116, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.0116, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.0114, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.0114, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.0114, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.0118, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.0119, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.0111, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.0118, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.0116, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.0112, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.0114, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.0115, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9940472490465726, + "eval_f1": 0.0, + "eval_loss": 0.016565397381782532, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 82.6741, + "eval_samples_per_second": 609.659, + "eval_steps_per_second": 4.766, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.0109, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.0077, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.0075, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.0076, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.0076, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.0077, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.0077, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.0078, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.008, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.0077, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.0078, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.0081, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.0081, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.0078, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9940838138558689, + "eval_f1": 0.0, + "eval_loss": 0.020097261294722557, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.581, + "eval_samples_per_second": 704.139, + "eval_steps_per_second": 5.504, + "step": 22446 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2.5512322176814368e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-22446/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..385d46c66558c9c8c642f93e6ceb452a0d0609db --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a688150a58135e72dadfc5d4565bc907692c77456484bef730d739ff3a3399 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7333604f7499c7fc8b30f770d03e843ae4a6cbc8 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad549c3c9a145d0ba0d3601a1b8871ffef6ec50f3964391cf383c6aeb10a44da +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..343f98423333b776804dea639a3eb228322e7f2b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9bec1569f2b2b19949906563cbd6e2d8a69b35607a8366b6695d838b750331b +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2538ea1bc1d89ce561d63cfb0b8a012031c753d3 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c5e1d1870ca9edfabe43cd9960ac818f48f088d52b8f2b3a331c9e576d215a +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..402a8c310d73c692a30be7afb56c9986f7d3e139 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/trainer_state.json @@ -0,0 +1,421 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 29928, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.0152, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.0119, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.0116, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.0116, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.0114, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.0114, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.0114, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.0118, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.0119, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.0111, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.0118, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.0116, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.0112, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.0114, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.0115, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9940472490465726, + "eval_f1": 0.0, + "eval_loss": 0.016565397381782532, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 82.6741, + "eval_samples_per_second": 609.659, + "eval_steps_per_second": 4.766, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.0109, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.0077, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.0075, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.0076, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.0076, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.0077, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.0077, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.0078, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.008, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.0077, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.0078, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.0081, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.0081, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.0078, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9940838138558689, + "eval_f1": 0.0, + "eval_loss": 0.020097261294722557, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.581, + "eval_samples_per_second": 704.139, + "eval_steps_per_second": 5.504, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.0074, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.0051, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.0052, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.005, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.0056, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.0049, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.0054, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.0052, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.0055, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.0053, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.0055, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.0056, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.0056, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.0056, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.0056, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9940216536800652, + "eval_f1": 0.0, + "eval_loss": 0.023068198934197426, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.8028, + "eval_samples_per_second": 616.153, + "eval_steps_per_second": 4.816, + "step": 29928 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3.4016429569085824e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-29928/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae586d0c3b5c7a6508a1ef63c7ff583a68711b77 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d46bdaf7365344d25ab56a273686b443074ac5df5b3da18af552adfb8bb0159 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5ead2a69113488c6d6a776f0e0a3e4125c0ee76 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420b30ad532637403259d1699e64db702f87677d6643105e5e5d9d86ebb4f92e +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc2fca1e39dd641207dfab41891dd19312dc5c5a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ea456c1c79bc9fb04e070c6d6156fad4a9b07018a34e6d964fb53c806593e2 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38ebb6b9c3f84bdc09fa6f8ee000c78b1d5c81fc --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcee0eaa0686cccc9bf89fbdc18b90fe7a8add00f6fc71d0c5923474a451192 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3c250a8dee8064f6d350915fdce64548e8efd07b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/trainer_state.json @@ -0,0 +1,523 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 37410, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.0152, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.0119, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.0116, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.0116, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.0114, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.0114, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.0114, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.0118, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.0119, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.0111, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.0118, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.0116, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.0112, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.0114, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.0115, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9940472490465726, + "eval_f1": 0.0, + "eval_loss": 0.016565397381782532, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 82.6741, + "eval_samples_per_second": 609.659, + "eval_steps_per_second": 4.766, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.0109, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.0077, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.0075, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.0076, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.0076, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.0077, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.0077, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.0078, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.008, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.0077, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.0078, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.0081, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.0081, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.0078, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9940838138558689, + "eval_f1": 0.0, + "eval_loss": 0.020097261294722557, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.581, + "eval_samples_per_second": 704.139, + "eval_steps_per_second": 5.504, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.0074, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.0051, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.0052, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.005, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.0056, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.0049, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.0054, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.0052, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.0055, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.0053, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.0055, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.0056, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.0056, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.0056, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.0056, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9940216536800652, + "eval_f1": 0.0, + "eval_loss": 0.023068198934197426, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.8028, + "eval_samples_per_second": 616.153, + "eval_steps_per_second": 4.816, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.0053, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.0035, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.0037, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.0039, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.0037, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.0038, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.004, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.0039, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.004, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.0041, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.0042, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.004, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.0042, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.0041, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.0042, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9940509055275022, + "eval_f1": 0.0, + "eval_loss": 0.025542501360177994, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.0096, + "eval_samples_per_second": 709.806, + "eval_steps_per_second": 5.549, + "step": 37410 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4.252053696135728e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-37410/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7082468d84cf29ed508ce25c9cf2493ae871fa54 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de640f5776860cd313a47097c975e864bb42ca47fc51730f89698d9d2857cc4 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..09ba9b8c85f85eb93de5d07e7f3043412d914255 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bab6353e25df2b251f3b7462ad687d29b3d68929b15b559c28369de6db587918 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6a820e15a354daf3ec520984b23e6fe786afca04 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ee5b3486670d33998adce168a823ad85a897e28c706d6b3b4c7d317ac3008b +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8b6bc6e9e5f35377926f5fa9c0647a5c5f13a1e --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b8e9929c5c026cdb0866b798ad37fddb86cbbceb958e03b72203a2e6c7d4dd +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a726bb7cd511466985aadac66a0cb1881ca7120c --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/trainer_state.json @@ -0,0 +1,625 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 44892, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.0152, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.0119, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.0116, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.0116, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.0114, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.0114, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.0114, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.0118, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.0119, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.0111, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.0118, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.0116, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.0112, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.0114, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.0115, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9940472490465726, + "eval_f1": 0.0, + "eval_loss": 0.016565397381782532, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 82.6741, + "eval_samples_per_second": 609.659, + "eval_steps_per_second": 4.766, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.0109, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.0077, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.0075, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.0076, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.0076, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.0077, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.0077, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.0078, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.008, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.0077, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.0078, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.0081, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.0081, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.0078, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9940838138558689, + "eval_f1": 0.0, + "eval_loss": 0.020097261294722557, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.581, + "eval_samples_per_second": 704.139, + "eval_steps_per_second": 5.504, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.0074, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.0051, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.0052, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.005, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.0056, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.0049, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.0054, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.0052, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.0055, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.0053, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.0055, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.0056, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.0056, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.0056, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.0056, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9940216536800652, + "eval_f1": 0.0, + "eval_loss": 0.023068198934197426, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.8028, + "eval_samples_per_second": 616.153, + "eval_steps_per_second": 4.816, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.0053, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.0035, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.0037, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.0039, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.0037, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.0038, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.004, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.0039, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.004, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.0041, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.0042, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.004, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.0042, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.0041, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.0042, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9940509055275022, + "eval_f1": 0.0, + "eval_loss": 0.025542501360177994, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.0096, + "eval_samples_per_second": 709.806, + "eval_steps_per_second": 5.549, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.0038, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.0029, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.003, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.003, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.003, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.0029, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.0031, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.0031, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.0031, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.0032, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.003, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.0032, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.0033, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.0032, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.0032, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9940655314512207, + "eval_f1": 0.0, + "eval_loss": 0.031192485243082047, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.6971, + "eval_samples_per_second": 616.95, + "eval_steps_per_second": 4.823, + "step": 44892 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5.1024644353628736e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-44892/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9616928826767f7a7dcd4a9ac65f95bca65a9384 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a2c94b3fa4105f0848e039f53ebc926d5c50bab3ec3ef6ee702b2cac182fc88 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..36b7ca6047dd98f753ea459624218ea9abb26e64 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93ea760e69150887aa70367fc7c023cde354ea5bbdc7677ff27694947e6896b +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..76c728d1ef1afec8421615601d92566d1203beae --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff9f9ba597613c9b6de2929cd52809617964b278efcc8d2d7f05547c143c6c43 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..11237e6d609494de06c80c17efded37d2f3818b1 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58cdca43612bcf5318fcbf9957a0b062eac9129103ad04c51d2784c587eaceab +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bcd18aa9202e0f3c348be223bc14fc6067e991a3 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/trainer_state.json @@ -0,0 +1,727 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 52374, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.0152, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.0119, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.0116, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.0116, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.0114, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.0114, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.0114, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.0118, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.0119, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.0111, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.0118, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.0116, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.0112, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.0114, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.0115, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9940472490465726, + "eval_f1": 0.0, + "eval_loss": 0.016565397381782532, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 82.6741, + "eval_samples_per_second": 609.659, + "eval_steps_per_second": 4.766, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.0109, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.0077, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.0075, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.0076, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.0076, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.0077, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.0077, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.0078, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.008, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.0077, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.0078, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.0081, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.0081, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.0078, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9940838138558689, + "eval_f1": 0.0, + "eval_loss": 0.020097261294722557, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.581, + "eval_samples_per_second": 704.139, + "eval_steps_per_second": 5.504, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.0074, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.0051, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.0052, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.005, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.0056, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.0049, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.0054, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.0052, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.0055, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.0053, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.0055, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.0056, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.0056, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.0056, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.0056, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9940216536800652, + "eval_f1": 0.0, + "eval_loss": 0.023068198934197426, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.8028, + "eval_samples_per_second": 616.153, + "eval_steps_per_second": 4.816, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.0053, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.0035, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.0037, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.0039, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.0037, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.0038, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.004, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.0039, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.004, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.0041, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.0042, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.004, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.0042, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.0041, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.0042, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9940509055275022, + "eval_f1": 0.0, + "eval_loss": 0.025542501360177994, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.0096, + "eval_samples_per_second": 709.806, + "eval_steps_per_second": 5.549, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.0038, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.0029, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.003, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.003, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.003, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.0029, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.0031, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.0031, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.0031, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.0032, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.003, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.0032, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.0033, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.0032, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.0032, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9940655314512207, + "eval_f1": 0.0, + "eval_loss": 0.031192485243082047, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.6971, + "eval_samples_per_second": 616.95, + "eval_steps_per_second": 4.823, + "step": 44892 + }, + { + "epoch": 6.01, + "learning_rate": 8.02475780409042e-06, + "loss": 0.003, + "step": 45000 + }, + { + "epoch": 6.08, + "learning_rate": 7.890204520990313e-06, + "loss": 0.0022, + "step": 45500 + }, + { + "epoch": 6.15, + "learning_rate": 7.755651237890205e-06, + "loss": 0.0024, + "step": 46000 + }, + { + "epoch": 6.21, + "learning_rate": 7.621097954790098e-06, + "loss": 0.0022, + "step": 46500 + }, + { + "epoch": 6.28, + "learning_rate": 7.486544671689991e-06, + "loss": 0.0024, + "step": 47000 + }, + { + "epoch": 6.35, + "learning_rate": 7.351991388589882e-06, + "loss": 0.0025, + "step": 47500 + }, + { + "epoch": 6.42, + "learning_rate": 7.217438105489775e-06, + "loss": 0.0025, + "step": 48000 + }, + { + "epoch": 6.48, + "learning_rate": 7.082884822389667e-06, + "loss": 0.0025, + "step": 48500 + }, + { + "epoch": 6.55, + "learning_rate": 6.948331539289559e-06, + "loss": 0.0026, + "step": 49000 + }, + { + "epoch": 6.62, + "learning_rate": 6.813778256189451e-06, + "loss": 0.0025, + "step": 49500 + }, + { + "epoch": 6.68, + "learning_rate": 6.679224973089344e-06, + "loss": 0.0026, + "step": 50000 + }, + { + "epoch": 6.75, + "learning_rate": 6.544671689989236e-06, + "loss": 0.0025, + "step": 50500 + }, + { + "epoch": 6.82, + "learning_rate": 6.410118406889129e-06, + "loss": 0.0025, + "step": 51000 + }, + { + "epoch": 6.88, + "learning_rate": 6.275565123789022e-06, + "loss": 0.0025, + "step": 51500 + }, + { + "epoch": 6.95, + "learning_rate": 6.141011840688913e-06, + "loss": 0.0025, + "step": 52000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9939119592521766, + "eval_f1": 0.0, + "eval_loss": 0.03196028992533684, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.5312, + "eval_samples_per_second": 704.63, + "eval_steps_per_second": 5.508, + "step": 52374 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5.95287517459002e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-52374/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..967545a26f7636cd9792d104fa6656c42d940ede --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d99c525306a2f5415aced53b4f26a4f948a39d19ab91beee79f05a3c0d2dc91 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e919c92688bd016c3e3d34b6fd8dc0ddf91285ca --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4584cb25d54c7bc100b750d5185dab318806f87dc469c72235b959bd7ea4852c +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d8549a42235aa1fecfa8ee8a9f73f4e3fcb8bd9 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d630713282a7e689b0e08e8ab7654d39f94f47b8f456bac345c21dba19706091 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbe6871c21d025b139c73307681661ec9c501d60 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67fdbe2ad084cae31bd619a47ba1b70ba128ff67dd25af4b47b1bd149313fe24 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f88f4c7e10b16689faa5d961efd80d7a76e2e014 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/trainer_state.json @@ -0,0 +1,829 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 59856, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.0152, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.0119, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.0116, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.0116, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.0114, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.0114, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.0114, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.0118, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.0119, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.0111, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.0118, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.0116, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.0112, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.0114, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.0115, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9940472490465726, + "eval_f1": 0.0, + "eval_loss": 0.016565397381782532, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 82.6741, + "eval_samples_per_second": 609.659, + "eval_steps_per_second": 4.766, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.0109, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.0077, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.0075, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.0076, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.0076, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.0077, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.0077, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.0078, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.008, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.0077, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.0078, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.0081, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.0081, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.0078, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9940838138558689, + "eval_f1": 0.0, + "eval_loss": 0.020097261294722557, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.581, + "eval_samples_per_second": 704.139, + "eval_steps_per_second": 5.504, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.0074, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.0051, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.0052, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.005, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.0056, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.0049, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.0054, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.0052, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.0055, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.0053, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.0055, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.0056, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.0056, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.0056, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.0056, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9940216536800652, + "eval_f1": 0.0, + "eval_loss": 0.023068198934197426, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.8028, + "eval_samples_per_second": 616.153, + "eval_steps_per_second": 4.816, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.0053, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.0035, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.0037, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.0039, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.0037, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.0038, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.004, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.0039, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.004, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.0041, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.0042, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.004, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.0042, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.0041, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.0042, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9940509055275022, + "eval_f1": 0.0, + "eval_loss": 0.025542501360177994, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.0096, + "eval_samples_per_second": 709.806, + "eval_steps_per_second": 5.549, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.0038, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.0029, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.003, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.003, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.003, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.0029, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.0031, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.0031, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.0031, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.0032, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.003, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.0032, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.0033, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.0032, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.0032, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9940655314512207, + "eval_f1": 0.0, + "eval_loss": 0.031192485243082047, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.6971, + "eval_samples_per_second": 616.95, + "eval_steps_per_second": 4.823, + "step": 44892 + }, + { + "epoch": 6.01, + "learning_rate": 8.02475780409042e-06, + "loss": 0.003, + "step": 45000 + }, + { + "epoch": 6.08, + "learning_rate": 7.890204520990313e-06, + "loss": 0.0022, + "step": 45500 + }, + { + "epoch": 6.15, + "learning_rate": 7.755651237890205e-06, + "loss": 0.0024, + "step": 46000 + }, + { + "epoch": 6.21, + "learning_rate": 7.621097954790098e-06, + "loss": 0.0022, + "step": 46500 + }, + { + "epoch": 6.28, + "learning_rate": 7.486544671689991e-06, + "loss": 0.0024, + "step": 47000 + }, + { + "epoch": 6.35, + "learning_rate": 7.351991388589882e-06, + "loss": 0.0025, + "step": 47500 + }, + { + "epoch": 6.42, + "learning_rate": 7.217438105489775e-06, + "loss": 0.0025, + "step": 48000 + }, + { + "epoch": 6.48, + "learning_rate": 7.082884822389667e-06, + "loss": 0.0025, + "step": 48500 + }, + { + "epoch": 6.55, + "learning_rate": 6.948331539289559e-06, + "loss": 0.0026, + "step": 49000 + }, + { + "epoch": 6.62, + "learning_rate": 6.813778256189451e-06, + "loss": 0.0025, + "step": 49500 + }, + { + "epoch": 6.68, + "learning_rate": 6.679224973089344e-06, + "loss": 0.0026, + "step": 50000 + }, + { + "epoch": 6.75, + "learning_rate": 6.544671689989236e-06, + "loss": 0.0025, + "step": 50500 + }, + { + "epoch": 6.82, + "learning_rate": 6.410118406889129e-06, + "loss": 0.0025, + "step": 51000 + }, + { + "epoch": 6.88, + "learning_rate": 6.275565123789022e-06, + "loss": 0.0025, + "step": 51500 + }, + { + "epoch": 6.95, + "learning_rate": 6.141011840688913e-06, + "loss": 0.0025, + "step": 52000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9939119592521766, + "eval_f1": 0.0, + "eval_loss": 0.03196028992533684, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.5312, + "eval_samples_per_second": 704.63, + "eval_steps_per_second": 5.508, + "step": 52374 + }, + { + "epoch": 7.02, + "learning_rate": 6.006458557588806e-06, + "loss": 0.0023, + "step": 52500 + }, + { + "epoch": 7.08, + "learning_rate": 5.871905274488698e-06, + "loss": 0.002, + "step": 53000 + }, + { + "epoch": 7.15, + "learning_rate": 5.73735199138859e-06, + "loss": 0.0019, + "step": 53500 + }, + { + "epoch": 7.22, + "learning_rate": 5.602798708288482e-06, + "loss": 0.002, + "step": 54000 + }, + { + "epoch": 7.28, + "learning_rate": 5.468245425188375e-06, + "loss": 0.002, + "step": 54500 + }, + { + "epoch": 7.35, + "learning_rate": 5.333692142088267e-06, + "loss": 0.0019, + "step": 55000 + }, + { + "epoch": 7.42, + "learning_rate": 5.19913885898816e-06, + "loss": 0.002, + "step": 55500 + }, + { + "epoch": 7.48, + "learning_rate": 5.064585575888053e-06, + "loss": 0.0018, + "step": 56000 + }, + { + "epoch": 7.55, + "learning_rate": 4.930032292787945e-06, + "loss": 0.0022, + "step": 56500 + }, + { + "epoch": 7.62, + "learning_rate": 4.795479009687837e-06, + "loss": 0.002, + "step": 57000 + }, + { + "epoch": 7.69, + "learning_rate": 4.660925726587729e-06, + "loss": 0.002, + "step": 57500 + }, + { + "epoch": 7.75, + "learning_rate": 4.526372443487621e-06, + "loss": 0.0021, + "step": 58000 + }, + { + "epoch": 7.82, + "learning_rate": 4.391819160387513e-06, + "loss": 0.0021, + "step": 58500 + }, + { + "epoch": 7.89, + "learning_rate": 4.2572658772874056e-06, + "loss": 0.0021, + "step": 59000 + }, + { + "epoch": 7.95, + "learning_rate": 4.1227125941872986e-06, + "loss": 0.0023, + "step": 59500 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9939704629470505, + "eval_f1": 0.0, + "eval_loss": 0.03371906280517578, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.4913, + "eval_samples_per_second": 618.508, + "eval_steps_per_second": 4.835, + "step": 59856 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 6.803285913817166e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-59856/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..87a657aecb48f10af6648aaa303997aae15247e0 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60368198d3305b282dba7f5f63a75903466d8d1772e5d80cb5993ca6175c322 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..322f812a746468a373a1e792c634f27338070c72 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ac7ca30357071f61d7c97766d213348cca9ec260183f5c5a6a989fd113832c1 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..510d704138bdbd221cb2a6a38b6e20cbc15a2b2a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726c213864c69585ab039740132fe5109f5fd7eb0a333991d8b4d428600251ae +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0d40091971afa771f4b0483ece4cd9b998497c8 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a25b477c2e1b5ff3ae52a94fc0fcd7f416cb8c0476386b2ccd1b446e460332b +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c0854e588547159432057dba5306be8b64fea656 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/trainer_state.json @@ -0,0 +1,931 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 67338, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.0152, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.0119, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.0116, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.0116, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.0114, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.0114, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.0114, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.0118, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.0119, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.0111, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.0118, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.0116, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.0112, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.0114, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.0115, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9940472490465726, + "eval_f1": 0.0, + "eval_loss": 0.016565397381782532, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 82.6741, + "eval_samples_per_second": 609.659, + "eval_steps_per_second": 4.766, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.0109, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.0077, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.0075, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.0076, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.0076, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.0077, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.0077, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.0078, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.008, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.0077, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.0078, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.0081, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.0081, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.0078, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9940838138558689, + "eval_f1": 0.0, + "eval_loss": 0.020097261294722557, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.581, + "eval_samples_per_second": 704.139, + "eval_steps_per_second": 5.504, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.0074, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.0051, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.0052, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.005, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.0056, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.0049, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.0054, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.0052, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.0055, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.0053, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.0055, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.0056, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.0056, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.0056, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.0056, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9940216536800652, + "eval_f1": 0.0, + "eval_loss": 0.023068198934197426, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.8028, + "eval_samples_per_second": 616.153, + "eval_steps_per_second": 4.816, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.0053, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.0035, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.0037, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.0039, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.0037, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.0038, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.004, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.0039, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.004, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.0041, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.0042, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.004, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.0042, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.0041, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.0042, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9940509055275022, + "eval_f1": 0.0, + "eval_loss": 0.025542501360177994, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.0096, + "eval_samples_per_second": 709.806, + "eval_steps_per_second": 5.549, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.0038, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.0029, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.003, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.003, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.003, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.0029, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.0031, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.0031, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.0031, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.0032, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.003, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.0032, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.0033, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.0032, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.0032, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9940655314512207, + "eval_f1": 0.0, + "eval_loss": 0.031192485243082047, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.6971, + "eval_samples_per_second": 616.95, + "eval_steps_per_second": 4.823, + "step": 44892 + }, + { + "epoch": 6.01, + "learning_rate": 8.02475780409042e-06, + "loss": 0.003, + "step": 45000 + }, + { + "epoch": 6.08, + "learning_rate": 7.890204520990313e-06, + "loss": 0.0022, + "step": 45500 + }, + { + "epoch": 6.15, + "learning_rate": 7.755651237890205e-06, + "loss": 0.0024, + "step": 46000 + }, + { + "epoch": 6.21, + "learning_rate": 7.621097954790098e-06, + "loss": 0.0022, + "step": 46500 + }, + { + "epoch": 6.28, + "learning_rate": 7.486544671689991e-06, + "loss": 0.0024, + "step": 47000 + }, + { + "epoch": 6.35, + "learning_rate": 7.351991388589882e-06, + "loss": 0.0025, + "step": 47500 + }, + { + "epoch": 6.42, + "learning_rate": 7.217438105489775e-06, + "loss": 0.0025, + "step": 48000 + }, + { + "epoch": 6.48, + "learning_rate": 7.082884822389667e-06, + "loss": 0.0025, + "step": 48500 + }, + { + "epoch": 6.55, + "learning_rate": 6.948331539289559e-06, + "loss": 0.0026, + "step": 49000 + }, + { + "epoch": 6.62, + "learning_rate": 6.813778256189451e-06, + "loss": 0.0025, + "step": 49500 + }, + { + "epoch": 6.68, + "learning_rate": 6.679224973089344e-06, + "loss": 0.0026, + "step": 50000 + }, + { + "epoch": 6.75, + "learning_rate": 6.544671689989236e-06, + "loss": 0.0025, + "step": 50500 + }, + { + "epoch": 6.82, + "learning_rate": 6.410118406889129e-06, + "loss": 0.0025, + "step": 51000 + }, + { + "epoch": 6.88, + "learning_rate": 6.275565123789022e-06, + "loss": 0.0025, + "step": 51500 + }, + { + "epoch": 6.95, + "learning_rate": 6.141011840688913e-06, + "loss": 0.0025, + "step": 52000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9939119592521766, + "eval_f1": 0.0, + "eval_loss": 0.03196028992533684, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.5312, + "eval_samples_per_second": 704.63, + "eval_steps_per_second": 5.508, + "step": 52374 + }, + { + "epoch": 7.02, + "learning_rate": 6.006458557588806e-06, + "loss": 0.0023, + "step": 52500 + }, + { + "epoch": 7.08, + "learning_rate": 5.871905274488698e-06, + "loss": 0.002, + "step": 53000 + }, + { + "epoch": 7.15, + "learning_rate": 5.73735199138859e-06, + "loss": 0.0019, + "step": 53500 + }, + { + "epoch": 7.22, + "learning_rate": 5.602798708288482e-06, + "loss": 0.002, + "step": 54000 + }, + { + "epoch": 7.28, + "learning_rate": 5.468245425188375e-06, + "loss": 0.002, + "step": 54500 + }, + { + "epoch": 7.35, + "learning_rate": 5.333692142088267e-06, + "loss": 0.0019, + "step": 55000 + }, + { + "epoch": 7.42, + "learning_rate": 5.19913885898816e-06, + "loss": 0.002, + "step": 55500 + }, + { + "epoch": 7.48, + "learning_rate": 5.064585575888053e-06, + "loss": 0.0018, + "step": 56000 + }, + { + "epoch": 7.55, + "learning_rate": 4.930032292787945e-06, + "loss": 0.0022, + "step": 56500 + }, + { + "epoch": 7.62, + "learning_rate": 4.795479009687837e-06, + "loss": 0.002, + "step": 57000 + }, + { + "epoch": 7.69, + "learning_rate": 4.660925726587729e-06, + "loss": 0.002, + "step": 57500 + }, + { + "epoch": 7.75, + "learning_rate": 4.526372443487621e-06, + "loss": 0.0021, + "step": 58000 + }, + { + "epoch": 7.82, + "learning_rate": 4.391819160387513e-06, + "loss": 0.0021, + "step": 58500 + }, + { + "epoch": 7.89, + "learning_rate": 4.2572658772874056e-06, + "loss": 0.0021, + "step": 59000 + }, + { + "epoch": 7.95, + "learning_rate": 4.1227125941872986e-06, + "loss": 0.0023, + "step": 59500 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9939704629470505, + "eval_f1": 0.0, + "eval_loss": 0.03371906280517578, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.4913, + "eval_samples_per_second": 618.508, + "eval_steps_per_second": 4.835, + "step": 59856 + }, + { + "epoch": 8.02, + "learning_rate": 3.988159311087191e-06, + "loss": 0.0019, + "step": 60000 + }, + { + "epoch": 8.09, + "learning_rate": 3.853606027987083e-06, + "loss": 0.0017, + "step": 60500 + }, + { + "epoch": 8.15, + "learning_rate": 3.719052744886976e-06, + "loss": 0.0015, + "step": 61000 + }, + { + "epoch": 8.22, + "learning_rate": 3.584499461786868e-06, + "loss": 0.0018, + "step": 61500 + }, + { + "epoch": 8.29, + "learning_rate": 3.4499461786867606e-06, + "loss": 0.0016, + "step": 62000 + }, + { + "epoch": 8.35, + "learning_rate": 3.3153928955866527e-06, + "loss": 0.0017, + "step": 62500 + }, + { + "epoch": 8.42, + "learning_rate": 3.180839612486545e-06, + "loss": 0.0017, + "step": 63000 + }, + { + "epoch": 8.49, + "learning_rate": 3.0462863293864374e-06, + "loss": 0.0017, + "step": 63500 + }, + { + "epoch": 8.55, + "learning_rate": 2.9117330462863296e-06, + "loss": 0.0016, + "step": 64000 + }, + { + "epoch": 8.62, + "learning_rate": 2.7771797631862217e-06, + "loss": 0.0016, + "step": 64500 + }, + { + "epoch": 8.69, + "learning_rate": 2.6426264800861147e-06, + "loss": 0.0017, + "step": 65000 + }, + { + "epoch": 8.75, + "learning_rate": 2.508073196986007e-06, + "loss": 0.0016, + "step": 65500 + }, + { + "epoch": 8.82, + "learning_rate": 2.373519913885899e-06, + "loss": 0.0017, + "step": 66000 + }, + { + "epoch": 8.89, + "learning_rate": 2.238966630785791e-06, + "loss": 0.0017, + "step": 66500 + }, + { + "epoch": 8.95, + "learning_rate": 2.1044133476856837e-06, + "loss": 0.0017, + "step": 67000 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.9939777759089098, + "eval_f1": 0.0, + "eval_loss": 0.038361821323633194, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.4125, + "eval_samples_per_second": 705.801, + "eval_steps_per_second": 5.517, + "step": 67338 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 7.653696653044312e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-67338/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..de64455183576a9f373df2e83ac91b10d54dd4af --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b2f66afddb265c3d144a2de13e7b8fa3fd5b088a094ea1f73c2949f242fa26 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d2946215eb786ae8013cad9478a4d807e4c03fdd --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c44c203843a5f8a8af3fff7f2e0546fd812bf7966d2cb09e689b08ad7ea9d56c +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..49d6418971dd5dcd6284a0da3b1d852a159bf106 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1388a98e01b7e131b2e58a1114b5578d7145cc0d6a711cf13cc01a4ce2dd84b0 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9ab6af8f47f64e0df294be433998adc6a9e01f3 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65c64b6b9c7ec564f996e449e03708919251f7be9813519a982c8e66d01dab32 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eeaa033ea9d0e4f91cf540f512b1b28757b2ca86 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/trainer_state.json @@ -0,0 +1,115 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 7482, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 8.504107392271456e+16, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-7482/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b697f7d6f50fde59d13fe645aa20461c1c4942b1 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90920c741905062d9dd144111885047f015046402654e3678b8dd92c0a3a8fe +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..05eb2b0c8faa2655acd26de11e8ecddd2c0eff2b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:793b15cdfdc169a9f16f1c8bc61f6a9989841b02496834177c339ec0b6860449 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..38a6aaebe3c0a4f1ab13b78bc597e970d99e8055 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0038f7fb1e6f2d035bd6e3e72538a930a715bd6413c48f2dab86b4e277cd244 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..360529da53e5faed39eb0ed24aa4d99bba89e3eb --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ca8cf0fe54e209d00bab22ea0d306420f0b8f7d1531efda28b4c3aa15a2ac9 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..15a75b5c1b251b2b86593dab016117dea3991d5f --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/trainer_state.json @@ -0,0 +1,1033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 74820, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.1264, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.0227, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.0203, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.0197, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.0185, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.0177, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.0171, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.0174, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.017, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.0168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.0161, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.0156, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.0167, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9943580499255906, + "eval_f1": 0.0, + "eval_loss": 0.015657523646950722, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.099, + "eval_samples_per_second": 708.913, + "eval_steps_per_second": 5.542, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.0152, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.0119, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.0116, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.0116, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.0114, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.0114, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.0114, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.0118, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.0119, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.0111, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.0118, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.0116, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.0112, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.0114, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.0115, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9940472490465726, + "eval_f1": 0.0, + "eval_loss": 0.016565397381782532, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 82.6741, + "eval_samples_per_second": 609.659, + "eval_steps_per_second": 4.766, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.0109, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.0077, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.0075, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.0076, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.0076, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.0077, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.0077, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.0078, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.008, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.0077, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.0078, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.0081, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.0081, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.0078, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9940838138558689, + "eval_f1": 0.0, + "eval_loss": 0.020097261294722557, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.581, + "eval_samples_per_second": 704.139, + "eval_steps_per_second": 5.504, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.0074, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.0051, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.0052, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.005, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.0056, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.0049, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.0054, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.0052, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.0055, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.0053, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.0055, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.0056, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.0056, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.0056, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.0056, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9940216536800652, + "eval_f1": 0.0, + "eval_loss": 0.023068198934197426, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.8028, + "eval_samples_per_second": 616.153, + "eval_steps_per_second": 4.816, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.0053, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.0035, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.0037, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.0039, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.0037, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.0038, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.004, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.0039, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.004, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.0041, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.0042, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.004, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.0042, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.0041, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.0042, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9940509055275022, + "eval_f1": 0.0, + "eval_loss": 0.025542501360177994, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.0096, + "eval_samples_per_second": 709.806, + "eval_steps_per_second": 5.549, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.0038, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.0029, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.003, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.003, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.003, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.0029, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.0031, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.0031, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.0031, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.0032, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.003, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.0032, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.0033, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.0032, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.0032, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9940655314512207, + "eval_f1": 0.0, + "eval_loss": 0.031192485243082047, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.6971, + "eval_samples_per_second": 616.95, + "eval_steps_per_second": 4.823, + "step": 44892 + }, + { + "epoch": 6.01, + "learning_rate": 8.02475780409042e-06, + "loss": 0.003, + "step": 45000 + }, + { + "epoch": 6.08, + "learning_rate": 7.890204520990313e-06, + "loss": 0.0022, + "step": 45500 + }, + { + "epoch": 6.15, + "learning_rate": 7.755651237890205e-06, + "loss": 0.0024, + "step": 46000 + }, + { + "epoch": 6.21, + "learning_rate": 7.621097954790098e-06, + "loss": 0.0022, + "step": 46500 + }, + { + "epoch": 6.28, + "learning_rate": 7.486544671689991e-06, + "loss": 0.0024, + "step": 47000 + }, + { + "epoch": 6.35, + "learning_rate": 7.351991388589882e-06, + "loss": 0.0025, + "step": 47500 + }, + { + "epoch": 6.42, + "learning_rate": 7.217438105489775e-06, + "loss": 0.0025, + "step": 48000 + }, + { + "epoch": 6.48, + "learning_rate": 7.082884822389667e-06, + "loss": 0.0025, + "step": 48500 + }, + { + "epoch": 6.55, + "learning_rate": 6.948331539289559e-06, + "loss": 0.0026, + "step": 49000 + }, + { + "epoch": 6.62, + "learning_rate": 6.813778256189451e-06, + "loss": 0.0025, + "step": 49500 + }, + { + "epoch": 6.68, + "learning_rate": 6.679224973089344e-06, + "loss": 0.0026, + "step": 50000 + }, + { + "epoch": 6.75, + "learning_rate": 6.544671689989236e-06, + "loss": 0.0025, + "step": 50500 + }, + { + "epoch": 6.82, + "learning_rate": 6.410118406889129e-06, + "loss": 0.0025, + "step": 51000 + }, + { + "epoch": 6.88, + "learning_rate": 6.275565123789022e-06, + "loss": 0.0025, + "step": 51500 + }, + { + "epoch": 6.95, + "learning_rate": 6.141011840688913e-06, + "loss": 0.0025, + "step": 52000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9939119592521766, + "eval_f1": 0.0, + "eval_loss": 0.03196028992533684, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.5312, + "eval_samples_per_second": 704.63, + "eval_steps_per_second": 5.508, + "step": 52374 + }, + { + "epoch": 7.02, + "learning_rate": 6.006458557588806e-06, + "loss": 0.0023, + "step": 52500 + }, + { + "epoch": 7.08, + "learning_rate": 5.871905274488698e-06, + "loss": 0.002, + "step": 53000 + }, + { + "epoch": 7.15, + "learning_rate": 5.73735199138859e-06, + "loss": 0.0019, + "step": 53500 + }, + { + "epoch": 7.22, + "learning_rate": 5.602798708288482e-06, + "loss": 0.002, + "step": 54000 + }, + { + "epoch": 7.28, + "learning_rate": 5.468245425188375e-06, + "loss": 0.002, + "step": 54500 + }, + { + "epoch": 7.35, + "learning_rate": 5.333692142088267e-06, + "loss": 0.0019, + "step": 55000 + }, + { + "epoch": 7.42, + "learning_rate": 5.19913885898816e-06, + "loss": 0.002, + "step": 55500 + }, + { + "epoch": 7.48, + "learning_rate": 5.064585575888053e-06, + "loss": 0.0018, + "step": 56000 + }, + { + "epoch": 7.55, + "learning_rate": 4.930032292787945e-06, + "loss": 0.0022, + "step": 56500 + }, + { + "epoch": 7.62, + "learning_rate": 4.795479009687837e-06, + "loss": 0.002, + "step": 57000 + }, + { + "epoch": 7.69, + "learning_rate": 4.660925726587729e-06, + "loss": 0.002, + "step": 57500 + }, + { + "epoch": 7.75, + "learning_rate": 4.526372443487621e-06, + "loss": 0.0021, + "step": 58000 + }, + { + "epoch": 7.82, + "learning_rate": 4.391819160387513e-06, + "loss": 0.0021, + "step": 58500 + }, + { + "epoch": 7.89, + "learning_rate": 4.2572658772874056e-06, + "loss": 0.0021, + "step": 59000 + }, + { + "epoch": 7.95, + "learning_rate": 4.1227125941872986e-06, + "loss": 0.0023, + "step": 59500 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9939704629470505, + "eval_f1": 0.0, + "eval_loss": 0.03371906280517578, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.4913, + "eval_samples_per_second": 618.508, + "eval_steps_per_second": 4.835, + "step": 59856 + }, + { + "epoch": 8.02, + "learning_rate": 3.988159311087191e-06, + "loss": 0.0019, + "step": 60000 + }, + { + "epoch": 8.09, + "learning_rate": 3.853606027987083e-06, + "loss": 0.0017, + "step": 60500 + }, + { + "epoch": 8.15, + "learning_rate": 3.719052744886976e-06, + "loss": 0.0015, + "step": 61000 + }, + { + "epoch": 8.22, + "learning_rate": 3.584499461786868e-06, + "loss": 0.0018, + "step": 61500 + }, + { + "epoch": 8.29, + "learning_rate": 3.4499461786867606e-06, + "loss": 0.0016, + "step": 62000 + }, + { + "epoch": 8.35, + "learning_rate": 3.3153928955866527e-06, + "loss": 0.0017, + "step": 62500 + }, + { + "epoch": 8.42, + "learning_rate": 3.180839612486545e-06, + "loss": 0.0017, + "step": 63000 + }, + { + "epoch": 8.49, + "learning_rate": 3.0462863293864374e-06, + "loss": 0.0017, + "step": 63500 + }, + { + "epoch": 8.55, + "learning_rate": 2.9117330462863296e-06, + "loss": 0.0016, + "step": 64000 + }, + { + "epoch": 8.62, + "learning_rate": 2.7771797631862217e-06, + "loss": 0.0016, + "step": 64500 + }, + { + "epoch": 8.69, + "learning_rate": 2.6426264800861147e-06, + "loss": 0.0017, + "step": 65000 + }, + { + "epoch": 8.75, + "learning_rate": 2.508073196986007e-06, + "loss": 0.0016, + "step": 65500 + }, + { + "epoch": 8.82, + "learning_rate": 2.373519913885899e-06, + "loss": 0.0017, + "step": 66000 + }, + { + "epoch": 8.89, + "learning_rate": 2.238966630785791e-06, + "loss": 0.0017, + "step": 66500 + }, + { + "epoch": 8.95, + "learning_rate": 2.1044133476856837e-06, + "loss": 0.0017, + "step": 67000 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.9939777759089098, + "eval_f1": 0.0, + "eval_loss": 0.038361821323633194, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 71.4125, + "eval_samples_per_second": 705.801, + "eval_steps_per_second": 5.517, + "step": 67338 + }, + { + "epoch": 9.02, + "learning_rate": 1.9698600645855763e-06, + "loss": 0.0015, + "step": 67500 + }, + { + "epoch": 9.09, + "learning_rate": 1.8353067814854682e-06, + "loss": 0.0014, + "step": 68000 + }, + { + "epoch": 9.16, + "learning_rate": 1.7007534983853608e-06, + "loss": 0.0013, + "step": 68500 + }, + { + "epoch": 9.22, + "learning_rate": 1.5662002152852531e-06, + "loss": 0.0014, + "step": 69000 + }, + { + "epoch": 9.29, + "learning_rate": 1.4316469321851453e-06, + "loss": 0.0014, + "step": 69500 + }, + { + "epoch": 9.36, + "learning_rate": 1.2970936490850379e-06, + "loss": 0.0014, + "step": 70000 + }, + { + "epoch": 9.42, + "learning_rate": 1.16254036598493e-06, + "loss": 0.0015, + "step": 70500 + }, + { + "epoch": 9.49, + "learning_rate": 1.0279870828848226e-06, + "loss": 0.0014, + "step": 71000 + }, + { + "epoch": 9.56, + "learning_rate": 8.934337997847148e-07, + "loss": 0.0014, + "step": 71500 + }, + { + "epoch": 9.62, + "learning_rate": 7.588805166846072e-07, + "loss": 0.0014, + "step": 72000 + }, + { + "epoch": 9.69, + "learning_rate": 6.243272335844995e-07, + "loss": 0.0013, + "step": 72500 + }, + { + "epoch": 9.76, + "learning_rate": 4.897739504843919e-07, + "loss": 0.0014, + "step": 73000 + }, + { + "epoch": 9.82, + "learning_rate": 3.5522066738428425e-07, + "loss": 0.0014, + "step": 73500 + }, + { + "epoch": 9.89, + "learning_rate": 2.2066738428417655e-07, + "loss": 0.0013, + "step": 74000 + }, + { + "epoch": 9.96, + "learning_rate": 8.61141011840689e-08, + "loss": 0.0013, + "step": 74500 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.993955837023332, + "eval_f1": 0.0, + "eval_loss": 0.04195573925971985, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 81.6909, + "eval_samples_per_second": 616.996, + "eval_steps_per_second": 4.823, + "step": 74820 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 8.504107392271459e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..56f659dc10654a15dedcf717815956383dd4fef5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/02-09-2023-11-01-00/checkpoint-74820/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f713488e20afd5141b39f9aaa67feba7da0a360f752dbaa308013d8d5212f449 +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8080e80076e85149cb15efdb057c4f0a689fb89 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e34d0285009723be71a0191774dce57037cda35ed7d470b3dc12d0d920694f +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa61c9d1d0198f265ff90489d96994537085c6e7 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56205e3199cf8fd4dd452c74a1825dcb0324d36aecf577466ced7bbc1f272ff1 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9bc213505a7eba1a415a19969e5e1a5f7659bcfd --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6c4fae2dbc4e09dcb1916b3513181a62899bf27dcfddffe76449df6be937a2 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..16e63c4ef691d7e5b73179fae99bb1de9d66cd6d --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c6f90078a2450ac8516906119b963c86e82b14a8e8f9482b75b9f2e26282b4 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b02180bdfc804f8dcd029d76183913735a49b395 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 14964, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.1458, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.1438, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.1389, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.1365, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.1345, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.1327, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.1304, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.1297, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.1268, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.1246, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.1243, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.1237, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.1224, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.1218, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.1207, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9781232745980614, + "eval_f1": 0.0, + "eval_loss": 0.07744112610816956, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.2255, + "eval_samples_per_second": 661.235, + "eval_steps_per_second": 5.169, + "step": 14964 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.7008214784542912e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-14964/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a988bce68c2058b808bff2a00be2c45d6b0063f4 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41a37c34eff80b0c7cb5df8af03e56d3d503514dd20830348deeaa38d9c5e78c +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..02ffc25cd656ef20161d93af4a598ea4d450a61f --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:479d6cf992d96596d240fe96a06cc8854d2de0179bdb300e5bbc4cd7265f2a4c +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..311b9836dd1cc5b5aba89aa163537198e6a69a3b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c94452baca72f151e8fd71f227f974658fc15f84f8df12aa691d214f2d3bb1 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f8d56bccb96e583d7d642b90a40bf2e1ac09fcb --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95aa11d132c66709e53ab1861363db19e03cdd43abe90abb88ecfb1d00f88ed7 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..563c10cb09e8d531c33fa3a53bccdbb6492560c1 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/trainer_state.json @@ -0,0 +1,319 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 22446, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.1458, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.1438, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.1389, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.1365, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.1345, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.1327, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.1304, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.1297, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.1268, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.1246, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.1243, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.1237, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.1224, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.1218, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.1207, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9781232745980614, + "eval_f1": 0.0, + "eval_loss": 0.07744112610816956, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.2255, + "eval_samples_per_second": 661.235, + "eval_steps_per_second": 5.169, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.1192, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.1195, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.1201, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.1194, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.1175, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.1175, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.1172, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.1154, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.1152, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.1146, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.1156, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.1152, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.1155, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.1143, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.114, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9796480271457144, + "eval_f1": 0.0, + "eval_loss": 0.07054685801267624, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3686, + "eval_samples_per_second": 716.271, + "eval_steps_per_second": 5.599, + "step": 22446 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2.5512322176814368e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-22446/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..65aec819beb24185c822c23fcbd8d5d6dc92a1e3 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9943b406bd7a0fa4e30d939b4fe087907c91929edc58acebbe9cef58b1ccb48c +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..df23c9b075b65b491075a296c9ee800eff5681bc --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc37629b6e8336e2f663bb0cb75826fac3f548c029e069bef6f8593161e8b80f +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..343f98423333b776804dea639a3eb228322e7f2b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9bec1569f2b2b19949906563cbd6e2d8a69b35607a8366b6695d838b750331b +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2538ea1bc1d89ce561d63cfb0b8a012031c753d3 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c5e1d1870ca9edfabe43cd9960ac818f48f088d52b8f2b3a331c9e576d215a +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0b02ebe898590389bf8212cd42454ef3e4b84288 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/trainer_state.json @@ -0,0 +1,421 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 29928, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.1458, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.1438, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.1389, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.1365, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.1345, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.1327, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.1304, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.1297, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.1268, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.1246, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.1243, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.1237, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.1224, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.1218, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.1207, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9781232745980614, + "eval_f1": 0.0, + "eval_loss": 0.07744112610816956, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.2255, + "eval_samples_per_second": 661.235, + "eval_steps_per_second": 5.169, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.1192, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.1195, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.1201, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.1194, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.1175, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.1175, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.1172, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.1154, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.1152, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.1146, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.1156, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.1152, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.1155, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.1143, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.114, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9796480271457144, + "eval_f1": 0.0, + "eval_loss": 0.07054685801267624, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3686, + "eval_samples_per_second": 716.271, + "eval_steps_per_second": 5.599, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.1137, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.1142, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.1143, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.1126, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.1147, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.1129, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.1135, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.112, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.1116, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.1109, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.1119, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.112, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.1127, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.1113, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.112, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9803646974079206, + "eval_f1": 0.0, + "eval_loss": 0.06762129813432693, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.1754, + "eval_samples_per_second": 661.67, + "eval_steps_per_second": 5.172, + "step": 29928 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3.4016429569085824e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-29928/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..92d07b60aadb6bf3023820e4bf98fcc13b3a06b0 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f775e0961987ba51e47887c3e6ded211a55c9cd7aa2f5b1db195f10fde57e6f +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2529a43716be7289c6252d63a664ff66f46eb57 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef0aebd967a35b01c54bcd867578a2239f79da6aa99e4f41f2c1badfe81a8b4b +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc2fca1e39dd641207dfab41891dd19312dc5c5a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ea456c1c79bc9fb04e070c6d6156fad4a9b07018a34e6d964fb53c806593e2 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38ebb6b9c3f84bdc09fa6f8ee000c78b1d5c81fc --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcee0eaa0686cccc9bf89fbdc18b90fe7a8add00f6fc71d0c5923474a451192 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..008c0648f46818ba5c74cc2df72f3c40550fe42e --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/trainer_state.json @@ -0,0 +1,523 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 37410, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.1458, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.1438, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.1389, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.1365, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.1345, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.1327, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.1304, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.1297, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.1268, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.1246, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.1243, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.1237, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.1224, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.1218, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.1207, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9781232745980614, + "eval_f1": 0.0, + "eval_loss": 0.07744112610816956, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.2255, + "eval_samples_per_second": 661.235, + "eval_steps_per_second": 5.169, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.1192, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.1195, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.1201, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.1194, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.1175, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.1175, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.1172, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.1154, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.1152, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.1146, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.1156, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.1152, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.1155, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.1143, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.114, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9796480271457144, + "eval_f1": 0.0, + "eval_loss": 0.07054685801267624, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3686, + "eval_samples_per_second": 716.271, + "eval_steps_per_second": 5.599, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.1137, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.1142, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.1143, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.1126, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.1147, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.1129, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.1135, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.112, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.1116, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.1109, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.1119, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.112, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.1127, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.1113, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.112, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9803646974079206, + "eval_f1": 0.0, + "eval_loss": 0.06762129813432693, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.1754, + "eval_samples_per_second": 661.67, + "eval_steps_per_second": 5.172, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.1131, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.111, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.1113, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.1127, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.1125, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.1104, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.1121, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.1103, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.1097, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.1099, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.1118, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.1109, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.1102, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.1097, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.1103, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9808290704859829, + "eval_f1": 0.0, + "eval_loss": 0.06593325734138489, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3722, + "eval_samples_per_second": 716.234, + "eval_steps_per_second": 5.599, + "step": 37410 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4.252053696135728e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-37410/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c7a2d0c46eeb66e104f4b2d2060911169523dea --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b50a5ad8712dc7d532a40ff210423020942dd48023c1b050cfc8b17b23aa8a +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..44ed934f7c8deeb18f50126331154b7f6a6c3912 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a6fe547ee962873422d829e370379a83e75fc165e708ee536661a614f04018d +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6a820e15a354daf3ec520984b23e6fe786afca04 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ee5b3486670d33998adce168a823ad85a897e28c706d6b3b4c7d317ac3008b +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8b6bc6e9e5f35377926f5fa9c0647a5c5f13a1e --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b8e9929c5c026cdb0866b798ad37fddb86cbbceb958e03b72203a2e6c7d4dd +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cd7578a5aeb76cc3c8f7f379dbea473e23b6891e --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/trainer_state.json @@ -0,0 +1,625 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 44892, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.1458, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.1438, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.1389, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.1365, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.1345, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.1327, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.1304, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.1297, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.1268, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.1246, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.1243, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.1237, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.1224, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.1218, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.1207, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9781232745980614, + "eval_f1": 0.0, + "eval_loss": 0.07744112610816956, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.2255, + "eval_samples_per_second": 661.235, + "eval_steps_per_second": 5.169, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.1192, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.1195, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.1201, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.1194, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.1175, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.1175, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.1172, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.1154, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.1152, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.1146, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.1156, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.1152, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.1155, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.1143, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.114, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9796480271457144, + "eval_f1": 0.0, + "eval_loss": 0.07054685801267624, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3686, + "eval_samples_per_second": 716.271, + "eval_steps_per_second": 5.599, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.1137, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.1142, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.1143, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.1126, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.1147, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.1129, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.1135, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.112, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.1116, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.1109, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.1119, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.112, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.1127, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.1113, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.112, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9803646974079206, + "eval_f1": 0.0, + "eval_loss": 0.06762129813432693, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.1754, + "eval_samples_per_second": 661.67, + "eval_steps_per_second": 5.172, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.1131, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.111, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.1113, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.1127, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.1125, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.1104, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.1121, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.1103, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.1097, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.1099, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.1118, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.1109, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.1102, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.1097, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.1103, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9808290704859829, + "eval_f1": 0.0, + "eval_loss": 0.06593325734138489, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3722, + "eval_samples_per_second": 716.234, + "eval_steps_per_second": 5.599, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.1099, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.1108, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.1106, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.1112, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.1106, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.1088, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.1085, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.1106, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.1087, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.1095, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.1093, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.1105, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.1101, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.1098, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.1087, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9811471843268601, + "eval_f1": 0.0, + "eval_loss": 0.064879409968853, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.6101, + "eval_samples_per_second": 657.916, + "eval_steps_per_second": 5.143, + "step": 44892 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5.1024644353628736e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-44892/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1ffe7d8d6fba72e8fc9806a0cf047c425d40ca6 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76b46bce30c0a8f233031a0817ff7c7fb07fb9fdfea9f66490c44ea38c56d423 +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c2a7143b6f28154968cb5c2d7bedc639df74402 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:789f9c03ac0a28f8150756f0211c2c665fe28f44af4736b6fa857b7d34c49882 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..76c728d1ef1afec8421615601d92566d1203beae --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff9f9ba597613c9b6de2929cd52809617964b278efcc8d2d7f05547c143c6c43 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..11237e6d609494de06c80c17efded37d2f3818b1 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58cdca43612bcf5318fcbf9957a0b062eac9129103ad04c51d2784c587eaceab +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9e404340e3fc4d24e3b39fa6ca8e13adb33bdb99 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/trainer_state.json @@ -0,0 +1,727 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 52374, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.1458, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.1438, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.1389, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.1365, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.1345, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.1327, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.1304, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.1297, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.1268, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.1246, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.1243, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.1237, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.1224, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.1218, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.1207, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9781232745980614, + "eval_f1": 0.0, + "eval_loss": 0.07744112610816956, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.2255, + "eval_samples_per_second": 661.235, + "eval_steps_per_second": 5.169, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.1192, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.1195, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.1201, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.1194, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.1175, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.1175, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.1172, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.1154, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.1152, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.1146, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.1156, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.1152, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.1155, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.1143, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.114, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9796480271457144, + "eval_f1": 0.0, + "eval_loss": 0.07054685801267624, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3686, + "eval_samples_per_second": 716.271, + "eval_steps_per_second": 5.599, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.1137, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.1142, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.1143, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.1126, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.1147, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.1129, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.1135, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.112, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.1116, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.1109, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.1119, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.112, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.1127, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.1113, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.112, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9803646974079206, + "eval_f1": 0.0, + "eval_loss": 0.06762129813432693, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.1754, + "eval_samples_per_second": 661.67, + "eval_steps_per_second": 5.172, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.1131, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.111, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.1113, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.1127, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.1125, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.1104, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.1121, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.1103, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.1097, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.1099, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.1118, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.1109, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.1102, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.1097, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.1103, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9808290704859829, + "eval_f1": 0.0, + "eval_loss": 0.06593325734138489, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3722, + "eval_samples_per_second": 716.234, + "eval_steps_per_second": 5.599, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.1099, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.1108, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.1106, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.1112, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.1106, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.1088, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.1085, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.1106, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.1087, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.1095, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.1093, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.1105, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.1101, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.1098, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.1087, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9811471843268601, + "eval_f1": 0.0, + "eval_loss": 0.064879409968853, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.6101, + "eval_samples_per_second": 657.916, + "eval_steps_per_second": 5.143, + "step": 44892 + }, + { + "epoch": 6.01, + "learning_rate": 8.02475780409042e-06, + "loss": 0.1114, + "step": 45000 + }, + { + "epoch": 6.08, + "learning_rate": 7.890204520990313e-06, + "loss": 0.1105, + "step": 45500 + }, + { + "epoch": 6.15, + "learning_rate": 7.755651237890205e-06, + "loss": 0.1095, + "step": 46000 + }, + { + "epoch": 6.21, + "learning_rate": 7.621097954790098e-06, + "loss": 0.11, + "step": 46500 + }, + { + "epoch": 6.28, + "learning_rate": 7.486544671689991e-06, + "loss": 0.1093, + "step": 47000 + }, + { + "epoch": 6.35, + "learning_rate": 7.351991388589882e-06, + "loss": 0.1089, + "step": 47500 + }, + { + "epoch": 6.42, + "learning_rate": 7.217438105489775e-06, + "loss": 0.1104, + "step": 48000 + }, + { + "epoch": 6.48, + "learning_rate": 7.082884822389667e-06, + "loss": 0.1093, + "step": 48500 + }, + { + "epoch": 6.55, + "learning_rate": 6.948331539289559e-06, + "loss": 0.1097, + "step": 49000 + }, + { + "epoch": 6.62, + "learning_rate": 6.813778256189451e-06, + "loss": 0.1097, + "step": 49500 + }, + { + "epoch": 6.68, + "learning_rate": 6.679224973089344e-06, + "loss": 0.1093, + "step": 50000 + }, + { + "epoch": 6.75, + "learning_rate": 6.544671689989236e-06, + "loss": 0.1087, + "step": 50500 + }, + { + "epoch": 6.82, + "learning_rate": 6.410118406889129e-06, + "loss": 0.1096, + "step": 51000 + }, + { + "epoch": 6.88, + "learning_rate": 6.275565123789022e-06, + "loss": 0.1089, + "step": 51500 + }, + { + "epoch": 6.95, + "learning_rate": 6.141011840688913e-06, + "loss": 0.1077, + "step": 52000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9813263518924117, + "eval_f1": 0.0, + "eval_loss": 0.06425908952951431, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.4697, + "eval_samples_per_second": 715.243, + "eval_steps_per_second": 5.591, + "step": 52374 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5.95287517459002e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-52374/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8b5da4aecd17712b2a05558acd4b58562b8484e --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2234bccc1c0ae28c1a9afeb50ea0efd8f9209aa972f3a7c26a12ece7901118f9 +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a450d56c98edb8ff10737aac872a4863821bbdd --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c362221eb35a9c1ad1f86502fea56cb785ecd4acbc4f5d3799ab80eab0faff9 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d8549a42235aa1fecfa8ee8a9f73f4e3fcb8bd9 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d630713282a7e689b0e08e8ab7654d39f94f47b8f456bac345c21dba19706091 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbe6871c21d025b139c73307681661ec9c501d60 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67fdbe2ad084cae31bd619a47ba1b70ba128ff67dd25af4b47b1bd149313fe24 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..56ee8d4130ed7d4881a77b7b8c9925143f96c3f7 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/trainer_state.json @@ -0,0 +1,829 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 59856, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.1458, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.1438, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.1389, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.1365, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.1345, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.1327, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.1304, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.1297, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.1268, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.1246, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.1243, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.1237, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.1224, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.1218, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.1207, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9781232745980614, + "eval_f1": 0.0, + "eval_loss": 0.07744112610816956, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.2255, + "eval_samples_per_second": 661.235, + "eval_steps_per_second": 5.169, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.1192, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.1195, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.1201, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.1194, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.1175, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.1175, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.1172, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.1154, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.1152, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.1146, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.1156, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.1152, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.1155, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.1143, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.114, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9796480271457144, + "eval_f1": 0.0, + "eval_loss": 0.07054685801267624, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3686, + "eval_samples_per_second": 716.271, + "eval_steps_per_second": 5.599, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.1137, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.1142, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.1143, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.1126, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.1147, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.1129, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.1135, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.112, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.1116, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.1109, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.1119, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.112, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.1127, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.1113, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.112, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9803646974079206, + "eval_f1": 0.0, + "eval_loss": 0.06762129813432693, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.1754, + "eval_samples_per_second": 661.67, + "eval_steps_per_second": 5.172, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.1131, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.111, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.1113, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.1127, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.1125, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.1104, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.1121, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.1103, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.1097, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.1099, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.1118, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.1109, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.1102, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.1097, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.1103, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9808290704859829, + "eval_f1": 0.0, + "eval_loss": 0.06593325734138489, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3722, + "eval_samples_per_second": 716.234, + "eval_steps_per_second": 5.599, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.1099, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.1108, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.1106, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.1112, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.1106, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.1088, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.1085, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.1106, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.1087, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.1095, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.1093, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.1105, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.1101, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.1098, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.1087, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9811471843268601, + "eval_f1": 0.0, + "eval_loss": 0.064879409968853, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.6101, + "eval_samples_per_second": 657.916, + "eval_steps_per_second": 5.143, + "step": 44892 + }, + { + "epoch": 6.01, + "learning_rate": 8.02475780409042e-06, + "loss": 0.1114, + "step": 45000 + }, + { + "epoch": 6.08, + "learning_rate": 7.890204520990313e-06, + "loss": 0.1105, + "step": 45500 + }, + { + "epoch": 6.15, + "learning_rate": 7.755651237890205e-06, + "loss": 0.1095, + "step": 46000 + }, + { + "epoch": 6.21, + "learning_rate": 7.621097954790098e-06, + "loss": 0.11, + "step": 46500 + }, + { + "epoch": 6.28, + "learning_rate": 7.486544671689991e-06, + "loss": 0.1093, + "step": 47000 + }, + { + "epoch": 6.35, + "learning_rate": 7.351991388589882e-06, + "loss": 0.1089, + "step": 47500 + }, + { + "epoch": 6.42, + "learning_rate": 7.217438105489775e-06, + "loss": 0.1104, + "step": 48000 + }, + { + "epoch": 6.48, + "learning_rate": 7.082884822389667e-06, + "loss": 0.1093, + "step": 48500 + }, + { + "epoch": 6.55, + "learning_rate": 6.948331539289559e-06, + "loss": 0.1097, + "step": 49000 + }, + { + "epoch": 6.62, + "learning_rate": 6.813778256189451e-06, + "loss": 0.1097, + "step": 49500 + }, + { + "epoch": 6.68, + "learning_rate": 6.679224973089344e-06, + "loss": 0.1093, + "step": 50000 + }, + { + "epoch": 6.75, + "learning_rate": 6.544671689989236e-06, + "loss": 0.1087, + "step": 50500 + }, + { + "epoch": 6.82, + "learning_rate": 6.410118406889129e-06, + "loss": 0.1096, + "step": 51000 + }, + { + "epoch": 6.88, + "learning_rate": 6.275565123789022e-06, + "loss": 0.1089, + "step": 51500 + }, + { + "epoch": 6.95, + "learning_rate": 6.141011840688913e-06, + "loss": 0.1077, + "step": 52000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9813263518924117, + "eval_f1": 0.0, + "eval_loss": 0.06425908952951431, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.4697, + "eval_samples_per_second": 715.243, + "eval_steps_per_second": 5.591, + "step": 52374 + }, + { + "epoch": 7.02, + "learning_rate": 6.006458557588806e-06, + "loss": 0.1095, + "step": 52500 + }, + { + "epoch": 7.08, + "learning_rate": 5.871905274488698e-06, + "loss": 0.1094, + "step": 53000 + }, + { + "epoch": 7.15, + "learning_rate": 5.73735199138859e-06, + "loss": 0.1086, + "step": 53500 + }, + { + "epoch": 7.22, + "learning_rate": 5.602798708288482e-06, + "loss": 0.1091, + "step": 54000 + }, + { + "epoch": 7.28, + "learning_rate": 5.468245425188375e-06, + "loss": 0.1085, + "step": 54500 + }, + { + "epoch": 7.35, + "learning_rate": 5.333692142088267e-06, + "loss": 0.1097, + "step": 55000 + }, + { + "epoch": 7.42, + "learning_rate": 5.19913885898816e-06, + "loss": 0.1086, + "step": 55500 + }, + { + "epoch": 7.48, + "learning_rate": 5.064585575888053e-06, + "loss": 0.1082, + "step": 56000 + }, + { + "epoch": 7.55, + "learning_rate": 4.930032292787945e-06, + "loss": 0.1098, + "step": 56500 + }, + { + "epoch": 7.62, + "learning_rate": 4.795479009687837e-06, + "loss": 0.1095, + "step": 57000 + }, + { + "epoch": 7.69, + "learning_rate": 4.660925726587729e-06, + "loss": 0.1086, + "step": 57500 + }, + { + "epoch": 7.75, + "learning_rate": 4.526372443487621e-06, + "loss": 0.109, + "step": 58000 + }, + { + "epoch": 7.82, + "learning_rate": 4.391819160387513e-06, + "loss": 0.1093, + "step": 58500 + }, + { + "epoch": 7.89, + "learning_rate": 4.2572658772874056e-06, + "loss": 0.1085, + "step": 59000 + }, + { + "epoch": 7.95, + "learning_rate": 4.1227125941872986e-06, + "loss": 0.1084, + "step": 59500 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9814470157630892, + "eval_f1": 0.0, + "eval_loss": 0.06387381255626678, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.9011, + "eval_samples_per_second": 655.426, + "eval_steps_per_second": 5.123, + "step": 59856 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 6.803285913817166e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-59856/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..74e76c23e0d37d89c94ac8e609655d2327ca1298 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa14b7ad138a7da0f4e3c83408bccfe369d5772a033e58fce7853f773411b16 +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..348248a94d0867253808a3f99e94cf5e00a8dee6 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7528def1b526f2d9a1508c5fbfd3dc0e6917ae4fb5b12427ce069551a6141c2 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..510d704138bdbd221cb2a6a38b6e20cbc15a2b2a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726c213864c69585ab039740132fe5109f5fd7eb0a333991d8b4d428600251ae +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0d40091971afa771f4b0483ece4cd9b998497c8 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a25b477c2e1b5ff3ae52a94fc0fcd7f416cb8c0476386b2ccd1b446e460332b +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..03c776b18e9ccd38791a44e435a3e943473ac7e5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/trainer_state.json @@ -0,0 +1,931 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 67338, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.1458, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.1438, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.1389, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.1365, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.1345, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.1327, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.1304, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.1297, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.1268, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.1246, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.1243, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.1237, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.1224, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.1218, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.1207, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9781232745980614, + "eval_f1": 0.0, + "eval_loss": 0.07744112610816956, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.2255, + "eval_samples_per_second": 661.235, + "eval_steps_per_second": 5.169, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.1192, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.1195, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.1201, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.1194, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.1175, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.1175, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.1172, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.1154, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.1152, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.1146, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.1156, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.1152, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.1155, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.1143, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.114, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9796480271457144, + "eval_f1": 0.0, + "eval_loss": 0.07054685801267624, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3686, + "eval_samples_per_second": 716.271, + "eval_steps_per_second": 5.599, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.1137, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.1142, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.1143, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.1126, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.1147, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.1129, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.1135, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.112, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.1116, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.1109, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.1119, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.112, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.1127, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.1113, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.112, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9803646974079206, + "eval_f1": 0.0, + "eval_loss": 0.06762129813432693, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.1754, + "eval_samples_per_second": 661.67, + "eval_steps_per_second": 5.172, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.1131, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.111, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.1113, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.1127, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.1125, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.1104, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.1121, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.1103, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.1097, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.1099, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.1118, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.1109, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.1102, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.1097, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.1103, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9808290704859829, + "eval_f1": 0.0, + "eval_loss": 0.06593325734138489, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3722, + "eval_samples_per_second": 716.234, + "eval_steps_per_second": 5.599, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.1099, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.1108, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.1106, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.1112, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.1106, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.1088, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.1085, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.1106, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.1087, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.1095, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.1093, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.1105, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.1101, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.1098, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.1087, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9811471843268601, + "eval_f1": 0.0, + "eval_loss": 0.064879409968853, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.6101, + "eval_samples_per_second": 657.916, + "eval_steps_per_second": 5.143, + "step": 44892 + }, + { + "epoch": 6.01, + "learning_rate": 8.02475780409042e-06, + "loss": 0.1114, + "step": 45000 + }, + { + "epoch": 6.08, + "learning_rate": 7.890204520990313e-06, + "loss": 0.1105, + "step": 45500 + }, + { + "epoch": 6.15, + "learning_rate": 7.755651237890205e-06, + "loss": 0.1095, + "step": 46000 + }, + { + "epoch": 6.21, + "learning_rate": 7.621097954790098e-06, + "loss": 0.11, + "step": 46500 + }, + { + "epoch": 6.28, + "learning_rate": 7.486544671689991e-06, + "loss": 0.1093, + "step": 47000 + }, + { + "epoch": 6.35, + "learning_rate": 7.351991388589882e-06, + "loss": 0.1089, + "step": 47500 + }, + { + "epoch": 6.42, + "learning_rate": 7.217438105489775e-06, + "loss": 0.1104, + "step": 48000 + }, + { + "epoch": 6.48, + "learning_rate": 7.082884822389667e-06, + "loss": 0.1093, + "step": 48500 + }, + { + "epoch": 6.55, + "learning_rate": 6.948331539289559e-06, + "loss": 0.1097, + "step": 49000 + }, + { + "epoch": 6.62, + "learning_rate": 6.813778256189451e-06, + "loss": 0.1097, + "step": 49500 + }, + { + "epoch": 6.68, + "learning_rate": 6.679224973089344e-06, + "loss": 0.1093, + "step": 50000 + }, + { + "epoch": 6.75, + "learning_rate": 6.544671689989236e-06, + "loss": 0.1087, + "step": 50500 + }, + { + "epoch": 6.82, + "learning_rate": 6.410118406889129e-06, + "loss": 0.1096, + "step": 51000 + }, + { + "epoch": 6.88, + "learning_rate": 6.275565123789022e-06, + "loss": 0.1089, + "step": 51500 + }, + { + "epoch": 6.95, + "learning_rate": 6.141011840688913e-06, + "loss": 0.1077, + "step": 52000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9813263518924117, + "eval_f1": 0.0, + "eval_loss": 0.06425908952951431, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.4697, + "eval_samples_per_second": 715.243, + "eval_steps_per_second": 5.591, + "step": 52374 + }, + { + "epoch": 7.02, + "learning_rate": 6.006458557588806e-06, + "loss": 0.1095, + "step": 52500 + }, + { + "epoch": 7.08, + "learning_rate": 5.871905274488698e-06, + "loss": 0.1094, + "step": 53000 + }, + { + "epoch": 7.15, + "learning_rate": 5.73735199138859e-06, + "loss": 0.1086, + "step": 53500 + }, + { + "epoch": 7.22, + "learning_rate": 5.602798708288482e-06, + "loss": 0.1091, + "step": 54000 + }, + { + "epoch": 7.28, + "learning_rate": 5.468245425188375e-06, + "loss": 0.1085, + "step": 54500 + }, + { + "epoch": 7.35, + "learning_rate": 5.333692142088267e-06, + "loss": 0.1097, + "step": 55000 + }, + { + "epoch": 7.42, + "learning_rate": 5.19913885898816e-06, + "loss": 0.1086, + "step": 55500 + }, + { + "epoch": 7.48, + "learning_rate": 5.064585575888053e-06, + "loss": 0.1082, + "step": 56000 + }, + { + "epoch": 7.55, + "learning_rate": 4.930032292787945e-06, + "loss": 0.1098, + "step": 56500 + }, + { + "epoch": 7.62, + "learning_rate": 4.795479009687837e-06, + "loss": 0.1095, + "step": 57000 + }, + { + "epoch": 7.69, + "learning_rate": 4.660925726587729e-06, + "loss": 0.1086, + "step": 57500 + }, + { + "epoch": 7.75, + "learning_rate": 4.526372443487621e-06, + "loss": 0.109, + "step": 58000 + }, + { + "epoch": 7.82, + "learning_rate": 4.391819160387513e-06, + "loss": 0.1093, + "step": 58500 + }, + { + "epoch": 7.89, + "learning_rate": 4.2572658772874056e-06, + "loss": 0.1085, + "step": 59000 + }, + { + "epoch": 7.95, + "learning_rate": 4.1227125941872986e-06, + "loss": 0.1084, + "step": 59500 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9814470157630892, + "eval_f1": 0.0, + "eval_loss": 0.06387381255626678, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.9011, + "eval_samples_per_second": 655.426, + "eval_steps_per_second": 5.123, + "step": 59856 + }, + { + "epoch": 8.02, + "learning_rate": 3.988159311087191e-06, + "loss": 0.1087, + "step": 60000 + }, + { + "epoch": 8.09, + "learning_rate": 3.853606027987083e-06, + "loss": 0.1084, + "step": 60500 + }, + { + "epoch": 8.15, + "learning_rate": 3.719052744886976e-06, + "loss": 0.1091, + "step": 61000 + }, + { + "epoch": 8.22, + "learning_rate": 3.584499461786868e-06, + "loss": 0.1086, + "step": 61500 + }, + { + "epoch": 8.29, + "learning_rate": 3.4499461786867606e-06, + "loss": 0.1091, + "step": 62000 + }, + { + "epoch": 8.35, + "learning_rate": 3.3153928955866527e-06, + "loss": 0.1089, + "step": 62500 + }, + { + "epoch": 8.42, + "learning_rate": 3.180839612486545e-06, + "loss": 0.1103, + "step": 63000 + }, + { + "epoch": 8.49, + "learning_rate": 3.0462863293864374e-06, + "loss": 0.1093, + "step": 63500 + }, + { + "epoch": 8.55, + "learning_rate": 2.9117330462863296e-06, + "loss": 0.1088, + "step": 64000 + }, + { + "epoch": 8.62, + "learning_rate": 2.7771797631862217e-06, + "loss": 0.1093, + "step": 64500 + }, + { + "epoch": 8.69, + "learning_rate": 2.6426264800861147e-06, + "loss": 0.1081, + "step": 65000 + }, + { + "epoch": 8.75, + "learning_rate": 2.508073196986007e-06, + "loss": 0.1085, + "step": 65500 + }, + { + "epoch": 8.82, + "learning_rate": 2.373519913885899e-06, + "loss": 0.1095, + "step": 66000 + }, + { + "epoch": 8.89, + "learning_rate": 2.238966630785791e-06, + "loss": 0.108, + "step": 66500 + }, + { + "epoch": 8.95, + "learning_rate": 2.1044133476856837e-06, + "loss": 0.1097, + "step": 67000 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.9814506722440189, + "eval_f1": 0.0, + "eval_loss": 0.06374968588352203, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.468, + "eval_samples_per_second": 715.261, + "eval_steps_per_second": 5.591, + "step": 67338 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 7.653696653044312e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-67338/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d8639ad95e0046cf38a8d93ba14cc5b27c76581 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b944267af674e907f363a7ccb8630c8f1b86b613140e34d19e5c33fd92a38809 +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..21059d8b185a42dbc3fd098dcc32019e89534fe5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:527a5079ccedde33dc303dc200cc2eabb4c173be18955560e4e3975db3c996b0 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..49d6418971dd5dcd6284a0da3b1d852a159bf106 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1388a98e01b7e131b2e58a1114b5578d7145cc0d6a711cf13cc01a4ce2dd84b0 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9ab6af8f47f64e0df294be433998adc6a9e01f3 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65c64b6b9c7ec564f996e449e03708919251f7be9813519a982c8e66d01dab32 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ed349889b4cefd2dcded77a8dcc66ee7c3c2ce4c --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/trainer_state.json @@ -0,0 +1,115 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 7482, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 8.504107392271456e+16, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-7482/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1bce6d2cc527f4089dad00b0aeea2ff2968f9f4 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40c4a8c73f833420737ffad10b07eb18dedfa43d14fb3b00f1356371f9adfd54 +size 14695 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5c3f7e00e306561fe66751e504564a3aae29b3df --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:424410db07194449ec65ed41859d7f1a433cde7eedfc05a9bbff8d8799b5d3d3 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..38a6aaebe3c0a4f1ab13b78bc597e970d99e8055 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0038f7fb1e6f2d035bd6e3e72538a930a715bd6413c48f2dab86b4e277cd244 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..360529da53e5faed39eb0ed24aa4d99bba89e3eb --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ca8cf0fe54e209d00bab22ea0d306420f0b8f7d1531efda28b4c3aa15a2ac9 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..abe636ae2d0274f7a43b2ea4219249b45c6bff17 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/trainer_state.json @@ -0,0 +1,1033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 74820, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 0.603, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 1.9865446716899895e-05, + "loss": 0.4359, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9730893433799788e-05, + "loss": 0.3531, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 1.959634015069968e-05, + "loss": 0.299, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.946178686759957e-05, + "loss": 0.2636, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 1.9327233584499463e-05, + "loss": 0.237, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192680301399356e-05, + "loss": 0.2162, + "step": 3500 + }, + { + "epoch": 0.53, + "learning_rate": 1.905812701829925e-05, + "loss": 0.1995, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923573735199142e-05, + "loss": 0.1867, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8789020452099035e-05, + "loss": 0.1772, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 1.8654467168998925e-05, + "loss": 0.168, + "step": 5500 + }, + { + "epoch": 0.8, + "learning_rate": 1.8519913885898818e-05, + "loss": 0.1611, + "step": 6000 + }, + { + "epoch": 0.87, + "learning_rate": 1.838536060279871e-05, + "loss": 0.1558, + "step": 6500 + }, + { + "epoch": 0.94, + "learning_rate": 1.8250807319698604e-05, + "loss": 0.1509, + "step": 7000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.972481324523652, + "eval_f1": 0.0, + "eval_loss": 0.10318954288959503, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.8238, + "eval_samples_per_second": 711.667, + "eval_steps_per_second": 5.563, + "step": 7482 + }, + { + "epoch": 1.0, + "learning_rate": 1.8116254036598497e-05, + "loss": 0.1458, + "step": 7500 + }, + { + "epoch": 1.07, + "learning_rate": 1.7981700753498386e-05, + "loss": 0.1438, + "step": 8000 + }, + { + "epoch": 1.14, + "learning_rate": 1.784714747039828e-05, + "loss": 0.1389, + "step": 8500 + }, + { + "epoch": 1.2, + "learning_rate": 1.7712594187298172e-05, + "loss": 0.1365, + "step": 9000 + }, + { + "epoch": 1.27, + "learning_rate": 1.7578040904198065e-05, + "loss": 0.1345, + "step": 9500 + }, + { + "epoch": 1.34, + "learning_rate": 1.7443487621097955e-05, + "loss": 0.1327, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 1.7308934337997848e-05, + "loss": 0.1304, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 1.717438105489774e-05, + "loss": 0.1297, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 1.7039827771797634e-05, + "loss": 0.1268, + "step": 11500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6905274488697524e-05, + "loss": 0.1246, + "step": 12000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6770721205597417e-05, + "loss": 0.1243, + "step": 12500 + }, + { + "epoch": 1.74, + "learning_rate": 1.663616792249731e-05, + "loss": 0.1237, + "step": 13000 + }, + { + "epoch": 1.8, + "learning_rate": 1.6501614639397203e-05, + "loss": 0.1224, + "step": 13500 + }, + { + "epoch": 1.87, + "learning_rate": 1.6367061356297096e-05, + "loss": 0.1218, + "step": 14000 + }, + { + "epoch": 1.94, + "learning_rate": 1.6232508073196985e-05, + "loss": 0.1207, + "step": 14500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9781232745980614, + "eval_f1": 0.0, + "eval_loss": 0.07744112610816956, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.2255, + "eval_samples_per_second": 661.235, + "eval_steps_per_second": 5.169, + "step": 14964 + }, + { + "epoch": 2.0, + "learning_rate": 1.6097954790096878e-05, + "loss": 0.1192, + "step": 15000 + }, + { + "epoch": 2.07, + "learning_rate": 1.596340150699677e-05, + "loss": 0.1195, + "step": 15500 + }, + { + "epoch": 2.14, + "learning_rate": 1.5828848223896664e-05, + "loss": 0.1201, + "step": 16000 + }, + { + "epoch": 2.21, + "learning_rate": 1.5694294940796557e-05, + "loss": 0.1194, + "step": 16500 + }, + { + "epoch": 2.27, + "learning_rate": 1.555974165769645e-05, + "loss": 0.1175, + "step": 17000 + }, + { + "epoch": 2.34, + "learning_rate": 1.542518837459634e-05, + "loss": 0.1175, + "step": 17500 + }, + { + "epoch": 2.41, + "learning_rate": 1.5290635091496233e-05, + "loss": 0.1172, + "step": 18000 + }, + { + "epoch": 2.47, + "learning_rate": 1.5156081808396126e-05, + "loss": 0.1154, + "step": 18500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5021528525296019e-05, + "loss": 0.1152, + "step": 19000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4886975242195912e-05, + "loss": 0.1146, + "step": 19500 + }, + { + "epoch": 2.67, + "learning_rate": 1.4752421959095805e-05, + "loss": 0.1156, + "step": 20000 + }, + { + "epoch": 2.74, + "learning_rate": 1.4617868675995694e-05, + "loss": 0.1152, + "step": 20500 + }, + { + "epoch": 2.81, + "learning_rate": 1.4483315392895587e-05, + "loss": 0.1155, + "step": 21000 + }, + { + "epoch": 2.87, + "learning_rate": 1.434876210979548e-05, + "loss": 0.1143, + "step": 21500 + }, + { + "epoch": 2.94, + "learning_rate": 1.4214208826695373e-05, + "loss": 0.114, + "step": 22000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9796480271457144, + "eval_f1": 0.0, + "eval_loss": 0.07054685801267624, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3686, + "eval_samples_per_second": 716.271, + "eval_steps_per_second": 5.599, + "step": 22446 + }, + { + "epoch": 3.01, + "learning_rate": 1.4079655543595265e-05, + "loss": 0.1137, + "step": 22500 + }, + { + "epoch": 3.07, + "learning_rate": 1.3945102260495158e-05, + "loss": 0.1142, + "step": 23000 + }, + { + "epoch": 3.14, + "learning_rate": 1.3810548977395049e-05, + "loss": 0.1143, + "step": 23500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3675995694294942e-05, + "loss": 0.1126, + "step": 24000 + }, + { + "epoch": 3.27, + "learning_rate": 1.3541442411194833e-05, + "loss": 0.1147, + "step": 24500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3406889128094726e-05, + "loss": 0.1129, + "step": 25000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3272335844994619e-05, + "loss": 0.1135, + "step": 25500 + }, + { + "epoch": 3.48, + "learning_rate": 1.3137782561894512e-05, + "loss": 0.112, + "step": 26000 + }, + { + "epoch": 3.54, + "learning_rate": 1.3003229278794403e-05, + "loss": 0.1116, + "step": 26500 + }, + { + "epoch": 3.61, + "learning_rate": 1.2868675995694295e-05, + "loss": 0.1109, + "step": 27000 + }, + { + "epoch": 3.68, + "learning_rate": 1.2734122712594188e-05, + "loss": 0.1119, + "step": 27500 + }, + { + "epoch": 3.74, + "learning_rate": 1.259956942949408e-05, + "loss": 0.112, + "step": 28000 + }, + { + "epoch": 3.81, + "learning_rate": 1.2465016146393974e-05, + "loss": 0.1127, + "step": 28500 + }, + { + "epoch": 3.88, + "learning_rate": 1.2330462863293867e-05, + "loss": 0.1113, + "step": 29000 + }, + { + "epoch": 3.94, + "learning_rate": 1.219590958019376e-05, + "loss": 0.112, + "step": 29500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9803646974079206, + "eval_f1": 0.0, + "eval_loss": 0.06762129813432693, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.1754, + "eval_samples_per_second": 661.67, + "eval_steps_per_second": 5.172, + "step": 29928 + }, + { + "epoch": 4.01, + "learning_rate": 1.206135629709365e-05, + "loss": 0.1131, + "step": 30000 + }, + { + "epoch": 4.08, + "learning_rate": 1.1926803013993542e-05, + "loss": 0.111, + "step": 30500 + }, + { + "epoch": 4.14, + "learning_rate": 1.1792249730893435e-05, + "loss": 0.1113, + "step": 31000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1657696447793328e-05, + "loss": 0.1127, + "step": 31500 + }, + { + "epoch": 4.28, + "learning_rate": 1.152314316469322e-05, + "loss": 0.1125, + "step": 32000 + }, + { + "epoch": 4.34, + "learning_rate": 1.1388589881593113e-05, + "loss": 0.1104, + "step": 32500 + }, + { + "epoch": 4.41, + "learning_rate": 1.1254036598493004e-05, + "loss": 0.1121, + "step": 33000 + }, + { + "epoch": 4.48, + "learning_rate": 1.1119483315392897e-05, + "loss": 0.1103, + "step": 33500 + }, + { + "epoch": 4.54, + "learning_rate": 1.0984930032292788e-05, + "loss": 0.1097, + "step": 34000 + }, + { + "epoch": 4.61, + "learning_rate": 1.0850376749192681e-05, + "loss": 0.1099, + "step": 34500 + }, + { + "epoch": 4.68, + "learning_rate": 1.0715823466092574e-05, + "loss": 0.1118, + "step": 35000 + }, + { + "epoch": 4.74, + "learning_rate": 1.0581270182992467e-05, + "loss": 0.1109, + "step": 35500 + }, + { + "epoch": 4.81, + "learning_rate": 1.0446716899892357e-05, + "loss": 0.1102, + "step": 36000 + }, + { + "epoch": 4.88, + "learning_rate": 1.031216361679225e-05, + "loss": 0.1097, + "step": 36500 + }, + { + "epoch": 4.95, + "learning_rate": 1.0177610333692143e-05, + "loss": 0.1103, + "step": 37000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9808290704859829, + "eval_f1": 0.0, + "eval_loss": 0.06593325734138489, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.3722, + "eval_samples_per_second": 716.234, + "eval_steps_per_second": 5.599, + "step": 37410 + }, + { + "epoch": 5.01, + "learning_rate": 1.0043057050592036e-05, + "loss": 0.1099, + "step": 37500 + }, + { + "epoch": 5.08, + "learning_rate": 9.908503767491927e-06, + "loss": 0.1108, + "step": 38000 + }, + { + "epoch": 5.15, + "learning_rate": 9.77395048439182e-06, + "loss": 0.1106, + "step": 38500 + }, + { + "epoch": 5.21, + "learning_rate": 9.639397201291713e-06, + "loss": 0.1112, + "step": 39000 + }, + { + "epoch": 5.28, + "learning_rate": 9.504843918191604e-06, + "loss": 0.1106, + "step": 39500 + }, + { + "epoch": 5.35, + "learning_rate": 9.370290635091497e-06, + "loss": 0.1088, + "step": 40000 + }, + { + "epoch": 5.41, + "learning_rate": 9.23573735199139e-06, + "loss": 0.1085, + "step": 40500 + }, + { + "epoch": 5.48, + "learning_rate": 9.101184068891282e-06, + "loss": 0.1106, + "step": 41000 + }, + { + "epoch": 5.55, + "learning_rate": 8.966630785791175e-06, + "loss": 0.1087, + "step": 41500 + }, + { + "epoch": 5.61, + "learning_rate": 8.832077502691066e-06, + "loss": 0.1095, + "step": 42000 + }, + { + "epoch": 5.68, + "learning_rate": 8.697524219590959e-06, + "loss": 0.1093, + "step": 42500 + }, + { + "epoch": 5.75, + "learning_rate": 8.56297093649085e-06, + "loss": 0.1105, + "step": 43000 + }, + { + "epoch": 5.81, + "learning_rate": 8.428417653390743e-06, + "loss": 0.1101, + "step": 43500 + }, + { + "epoch": 5.88, + "learning_rate": 8.293864370290636e-06, + "loss": 0.1098, + "step": 44000 + }, + { + "epoch": 5.95, + "learning_rate": 8.159311087190527e-06, + "loss": 0.1087, + "step": 44500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9811471843268601, + "eval_f1": 0.0, + "eval_loss": 0.064879409968853, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.6101, + "eval_samples_per_second": 657.916, + "eval_steps_per_second": 5.143, + "step": 44892 + }, + { + "epoch": 6.01, + "learning_rate": 8.02475780409042e-06, + "loss": 0.1114, + "step": 45000 + }, + { + "epoch": 6.08, + "learning_rate": 7.890204520990313e-06, + "loss": 0.1105, + "step": 45500 + }, + { + "epoch": 6.15, + "learning_rate": 7.755651237890205e-06, + "loss": 0.1095, + "step": 46000 + }, + { + "epoch": 6.21, + "learning_rate": 7.621097954790098e-06, + "loss": 0.11, + "step": 46500 + }, + { + "epoch": 6.28, + "learning_rate": 7.486544671689991e-06, + "loss": 0.1093, + "step": 47000 + }, + { + "epoch": 6.35, + "learning_rate": 7.351991388589882e-06, + "loss": 0.1089, + "step": 47500 + }, + { + "epoch": 6.42, + "learning_rate": 7.217438105489775e-06, + "loss": 0.1104, + "step": 48000 + }, + { + "epoch": 6.48, + "learning_rate": 7.082884822389667e-06, + "loss": 0.1093, + "step": 48500 + }, + { + "epoch": 6.55, + "learning_rate": 6.948331539289559e-06, + "loss": 0.1097, + "step": 49000 + }, + { + "epoch": 6.62, + "learning_rate": 6.813778256189451e-06, + "loss": 0.1097, + "step": 49500 + }, + { + "epoch": 6.68, + "learning_rate": 6.679224973089344e-06, + "loss": 0.1093, + "step": 50000 + }, + { + "epoch": 6.75, + "learning_rate": 6.544671689989236e-06, + "loss": 0.1087, + "step": 50500 + }, + { + "epoch": 6.82, + "learning_rate": 6.410118406889129e-06, + "loss": 0.1096, + "step": 51000 + }, + { + "epoch": 6.88, + "learning_rate": 6.275565123789022e-06, + "loss": 0.1089, + "step": 51500 + }, + { + "epoch": 6.95, + "learning_rate": 6.141011840688913e-06, + "loss": 0.1077, + "step": 52000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9813263518924117, + "eval_f1": 0.0, + "eval_loss": 0.06425908952951431, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.4697, + "eval_samples_per_second": 715.243, + "eval_steps_per_second": 5.591, + "step": 52374 + }, + { + "epoch": 7.02, + "learning_rate": 6.006458557588806e-06, + "loss": 0.1095, + "step": 52500 + }, + { + "epoch": 7.08, + "learning_rate": 5.871905274488698e-06, + "loss": 0.1094, + "step": 53000 + }, + { + "epoch": 7.15, + "learning_rate": 5.73735199138859e-06, + "loss": 0.1086, + "step": 53500 + }, + { + "epoch": 7.22, + "learning_rate": 5.602798708288482e-06, + "loss": 0.1091, + "step": 54000 + }, + { + "epoch": 7.28, + "learning_rate": 5.468245425188375e-06, + "loss": 0.1085, + "step": 54500 + }, + { + "epoch": 7.35, + "learning_rate": 5.333692142088267e-06, + "loss": 0.1097, + "step": 55000 + }, + { + "epoch": 7.42, + "learning_rate": 5.19913885898816e-06, + "loss": 0.1086, + "step": 55500 + }, + { + "epoch": 7.48, + "learning_rate": 5.064585575888053e-06, + "loss": 0.1082, + "step": 56000 + }, + { + "epoch": 7.55, + "learning_rate": 4.930032292787945e-06, + "loss": 0.1098, + "step": 56500 + }, + { + "epoch": 7.62, + "learning_rate": 4.795479009687837e-06, + "loss": 0.1095, + "step": 57000 + }, + { + "epoch": 7.69, + "learning_rate": 4.660925726587729e-06, + "loss": 0.1086, + "step": 57500 + }, + { + "epoch": 7.75, + "learning_rate": 4.526372443487621e-06, + "loss": 0.109, + "step": 58000 + }, + { + "epoch": 7.82, + "learning_rate": 4.391819160387513e-06, + "loss": 0.1093, + "step": 58500 + }, + { + "epoch": 7.89, + "learning_rate": 4.2572658772874056e-06, + "loss": 0.1085, + "step": 59000 + }, + { + "epoch": 7.95, + "learning_rate": 4.1227125941872986e-06, + "loss": 0.1084, + "step": 59500 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9814470157630892, + "eval_f1": 0.0, + "eval_loss": 0.06387381255626678, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 76.9011, + "eval_samples_per_second": 655.426, + "eval_steps_per_second": 5.123, + "step": 59856 + }, + { + "epoch": 8.02, + "learning_rate": 3.988159311087191e-06, + "loss": 0.1087, + "step": 60000 + }, + { + "epoch": 8.09, + "learning_rate": 3.853606027987083e-06, + "loss": 0.1084, + "step": 60500 + }, + { + "epoch": 8.15, + "learning_rate": 3.719052744886976e-06, + "loss": 0.1091, + "step": 61000 + }, + { + "epoch": 8.22, + "learning_rate": 3.584499461786868e-06, + "loss": 0.1086, + "step": 61500 + }, + { + "epoch": 8.29, + "learning_rate": 3.4499461786867606e-06, + "loss": 0.1091, + "step": 62000 + }, + { + "epoch": 8.35, + "learning_rate": 3.3153928955866527e-06, + "loss": 0.1089, + "step": 62500 + }, + { + "epoch": 8.42, + "learning_rate": 3.180839612486545e-06, + "loss": 0.1103, + "step": 63000 + }, + { + "epoch": 8.49, + "learning_rate": 3.0462863293864374e-06, + "loss": 0.1093, + "step": 63500 + }, + { + "epoch": 8.55, + "learning_rate": 2.9117330462863296e-06, + "loss": 0.1088, + "step": 64000 + }, + { + "epoch": 8.62, + "learning_rate": 2.7771797631862217e-06, + "loss": 0.1093, + "step": 64500 + }, + { + "epoch": 8.69, + "learning_rate": 2.6426264800861147e-06, + "loss": 0.1081, + "step": 65000 + }, + { + "epoch": 8.75, + "learning_rate": 2.508073196986007e-06, + "loss": 0.1085, + "step": 65500 + }, + { + "epoch": 8.82, + "learning_rate": 2.373519913885899e-06, + "loss": 0.1095, + "step": 66000 + }, + { + "epoch": 8.89, + "learning_rate": 2.238966630785791e-06, + "loss": 0.108, + "step": 66500 + }, + { + "epoch": 8.95, + "learning_rate": 2.1044133476856837e-06, + "loss": 0.1097, + "step": 67000 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.9814506722440189, + "eval_f1": 0.0, + "eval_loss": 0.06374968588352203, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 70.468, + "eval_samples_per_second": 715.261, + "eval_steps_per_second": 5.591, + "step": 67338 + }, + { + "epoch": 9.02, + "learning_rate": 1.9698600645855763e-06, + "loss": 0.1084, + "step": 67500 + }, + { + "epoch": 9.09, + "learning_rate": 1.8353067814854682e-06, + "loss": 0.1091, + "step": 68000 + }, + { + "epoch": 9.16, + "learning_rate": 1.7007534983853608e-06, + "loss": 0.1095, + "step": 68500 + }, + { + "epoch": 9.22, + "learning_rate": 1.5662002152852531e-06, + "loss": 0.1088, + "step": 69000 + }, + { + "epoch": 9.29, + "learning_rate": 1.4316469321851453e-06, + "loss": 0.1084, + "step": 69500 + }, + { + "epoch": 9.36, + "learning_rate": 1.2970936490850379e-06, + "loss": 0.1089, + "step": 70000 + }, + { + "epoch": 9.42, + "learning_rate": 1.16254036598493e-06, + "loss": 0.1088, + "step": 70500 + }, + { + "epoch": 9.49, + "learning_rate": 1.0279870828848226e-06, + "loss": 0.1085, + "step": 71000 + }, + { + "epoch": 9.56, + "learning_rate": 8.934337997847148e-07, + "loss": 0.1073, + "step": 71500 + }, + { + "epoch": 9.62, + "learning_rate": 7.588805166846072e-07, + "loss": 0.1099, + "step": 72000 + }, + { + "epoch": 9.69, + "learning_rate": 6.243272335844995e-07, + "loss": 0.1089, + "step": 72500 + }, + { + "epoch": 9.76, + "learning_rate": 4.897739504843919e-07, + "loss": 0.1079, + "step": 73000 + }, + { + "epoch": 9.82, + "learning_rate": 3.5522066738428425e-07, + "loss": 0.1091, + "step": 73500 + }, + { + "epoch": 9.89, + "learning_rate": 2.2066738428417655e-07, + "loss": 0.1083, + "step": 74000 + }, + { + "epoch": 9.96, + "learning_rate": 8.61141011840689e-08, + "loss": 0.1088, + "step": 74500 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9814762676105263, + "eval_f1": 0.0, + "eval_loss": 0.0636528730392456, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 75.942, + "eval_samples_per_second": 663.704, + "eval_steps_per_second": 5.188, + "step": 74820 + } + ], + "logging_steps": 500, + "max_steps": 74820, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 8.504107392271459e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f375a252b40f093376af46a59a95f708005d615a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/03-09-2023-12-52-25/checkpoint-74820/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420ab2eaed25185be1889c315a3a56ec6f0e5cd67d7d2d1c5e5dce5d2bc3075b +size 4091 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b266e5d4cdcb4f6bd90ef673ff2c8d89dfd9a5e --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ae297044d13f1080bd8e779052c78bd52d4c9ba4d4275b250d2f6a5066a160 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..67aab434dbf4904b21a51b9ef06c10cd697566a3 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f2b68c6b2750e83edab03e6643f2655507f3cfeea6966558ee3d8c8c4cf1c5 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b67e3a39c9684706c8fa1e64d43261002907446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac8b3ef798f32a1049f8cc0c9575b86edad5aca1c0796f58d3274f6c170172a0 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..76fbce2d9a44af159a778647982605d1fefe9be5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf29771d2367e4da5710932440eb7426e7d4f870a7c4ec4135af0f5d15afb101 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a82f81a05b013ff58ce81eaafcd8974d621ab2e8 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/trainer_state.json @@ -0,0 +1,193 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 12602, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + }, + { + "epoch": 1.03, + "learning_rate": 1.8080307150855866e-05, + "loss": 0.0142, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.792033274676052e-05, + "loss": 0.0113, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7760358342665175e-05, + "loss": 0.0118, + "step": 7500 + }, + { + "epoch": 1.27, + "learning_rate": 1.7600383938569828e-05, + "loss": 0.0115, + "step": 8000 + }, + { + "epoch": 1.35, + "learning_rate": 1.7440409534474484e-05, + "loss": 0.0118, + "step": 8500 + }, + { + "epoch": 1.43, + "learning_rate": 1.728043513037914e-05, + "loss": 0.0112, + "step": 9000 + }, + { + "epoch": 1.51, + "learning_rate": 1.7120460726283797e-05, + "loss": 0.0112, + "step": 9500 + }, + { + "epoch": 1.59, + "learning_rate": 1.6960486322188453e-05, + "loss": 0.0115, + "step": 10000 + }, + { + "epoch": 1.67, + "learning_rate": 1.680051191809311e-05, + "loss": 0.0119, + "step": 10500 + }, + { + "epoch": 1.75, + "learning_rate": 1.6640537513997762e-05, + "loss": 0.0111, + "step": 11000 + }, + { + "epoch": 1.83, + "learning_rate": 1.6480563109902418e-05, + "loss": 0.0115, + "step": 11500 + }, + { + "epoch": 1.9, + "learning_rate": 1.632058870580707e-05, + "loss": 0.0117, + "step": 12000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6160614301711727e-05, + "loss": 0.0111, + "step": 12500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9928343580163489, + "eval_f1": 0.0, + "eval_loss": 0.020694203674793243, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.3755, + "eval_samples_per_second": 424.115, + "eval_steps_per_second": 3.315, + "step": 12602 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.3087989341861082e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-12602/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f26379aac6cab32e824dc7f4fbed2732c45596ab --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab4e9ea5999213b2cdae8740558b981060fe1c665f8a086f564984c6b81cee4 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6dd9f19935763ebec3f88f5c64e77478e711d103 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c459b66a29bf292e2e96e860ad24ca4cf19ee1192dea54675e48879fc6f5f4 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7311710ad854555a0ae57c8fb398722a0238d17f --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e273c8f90634c75a0416ed28960bd0560feaa56cf15d49078e46d6f8b05b5a +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb9bce02dd0ded0cd63610052d58358cf06d0bb7 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c8d2f3c3b8fcf819b9cd0a150ed334262247532cf396a4698ddcd87645c592 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6ba4cb3330cdba97d5093b72202c286e6c768e7f --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/trainer_state.json @@ -0,0 +1,277 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 18903, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + }, + { + "epoch": 1.03, + "learning_rate": 1.8080307150855866e-05, + "loss": 0.0142, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.792033274676052e-05, + "loss": 0.0113, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7760358342665175e-05, + "loss": 0.0118, + "step": 7500 + }, + { + "epoch": 1.27, + "learning_rate": 1.7600383938569828e-05, + "loss": 0.0115, + "step": 8000 + }, + { + "epoch": 1.35, + "learning_rate": 1.7440409534474484e-05, + "loss": 0.0118, + "step": 8500 + }, + { + "epoch": 1.43, + "learning_rate": 1.728043513037914e-05, + "loss": 0.0112, + "step": 9000 + }, + { + "epoch": 1.51, + "learning_rate": 1.7120460726283797e-05, + "loss": 0.0112, + "step": 9500 + }, + { + "epoch": 1.59, + "learning_rate": 1.6960486322188453e-05, + "loss": 0.0115, + "step": 10000 + }, + { + "epoch": 1.67, + "learning_rate": 1.680051191809311e-05, + "loss": 0.0119, + "step": 10500 + }, + { + "epoch": 1.75, + "learning_rate": 1.6640537513997762e-05, + "loss": 0.0111, + "step": 11000 + }, + { + "epoch": 1.83, + "learning_rate": 1.6480563109902418e-05, + "loss": 0.0115, + "step": 11500 + }, + { + "epoch": 1.9, + "learning_rate": 1.632058870580707e-05, + "loss": 0.0117, + "step": 12000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6160614301711727e-05, + "loss": 0.0111, + "step": 12500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9928343580163489, + "eval_f1": 0.0, + "eval_loss": 0.020694203674793243, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.3755, + "eval_samples_per_second": 424.115, + "eval_steps_per_second": 3.315, + "step": 12602 + }, + { + "epoch": 2.06, + "learning_rate": 1.6000639897616383e-05, + "loss": 0.008, + "step": 13000 + }, + { + "epoch": 2.14, + "learning_rate": 1.5840665493521036e-05, + "loss": 0.0079, + "step": 13500 + }, + { + "epoch": 2.22, + "learning_rate": 1.5680691089425692e-05, + "loss": 0.0076, + "step": 14000 + }, + { + "epoch": 2.3, + "learning_rate": 1.552071668533035e-05, + "loss": 0.0078, + "step": 14500 + }, + { + "epoch": 2.38, + "learning_rate": 1.5360742281235005e-05, + "loss": 0.0079, + "step": 15000 + }, + { + "epoch": 2.46, + "learning_rate": 1.520076787713966e-05, + "loss": 0.0078, + "step": 15500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5040793473044315e-05, + "loss": 0.0078, + "step": 16000 + }, + { + "epoch": 2.62, + "learning_rate": 1.4880819068948968e-05, + "loss": 0.0079, + "step": 16500 + }, + { + "epoch": 2.7, + "learning_rate": 1.4720844664853625e-05, + "loss": 0.0078, + "step": 17000 + }, + { + "epoch": 2.78, + "learning_rate": 1.4560870260758279e-05, + "loss": 0.0081, + "step": 17500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4400895856662935e-05, + "loss": 0.0079, + "step": 18000 + }, + { + "epoch": 2.94, + "learning_rate": 1.4240921452567591e-05, + "loss": 0.0077, + "step": 18500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9924637062899773, + "eval_f1": 0.0, + "eval_loss": 0.02258981019258499, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.2433, + "eval_samples_per_second": 422.455, + "eval_steps_per_second": 3.302, + "step": 18903 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.9631984012791622e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-18903/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4428355c7a66da5f4e0a0957ad45774fc951d4c --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa9da567e4481ad2a9ac4ab0efa227c12192e109a52205daa4ca70ed256401d +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c95956da30ae9e07293c0e75ec3553d5b7959a86 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1fe40fc817b5015d20fc8475ee5c02fcc954290e81972d98fcb82d17552242 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..46a93cb244786851c3cd8a799034df53daf982ae --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa7bd82c6e6d40abe0bb83ebf73bce399f19dd58250ac11a989db8dbfe4756e5 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5787a5e2ab2125f7f22112d5d1383f716927ce0 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb4ae61df08c930c8aa6086a801e4702c0a8f41977b04dede6ff36458d069c9 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9cbd50d58f443f0b6de677b4cf085fa1d6d89de4 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/trainer_state.json @@ -0,0 +1,367 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 25204, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + }, + { + "epoch": 1.03, + "learning_rate": 1.8080307150855866e-05, + "loss": 0.0142, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.792033274676052e-05, + "loss": 0.0113, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7760358342665175e-05, + "loss": 0.0118, + "step": 7500 + }, + { + "epoch": 1.27, + "learning_rate": 1.7600383938569828e-05, + "loss": 0.0115, + "step": 8000 + }, + { + "epoch": 1.35, + "learning_rate": 1.7440409534474484e-05, + "loss": 0.0118, + "step": 8500 + }, + { + "epoch": 1.43, + "learning_rate": 1.728043513037914e-05, + "loss": 0.0112, + "step": 9000 + }, + { + "epoch": 1.51, + "learning_rate": 1.7120460726283797e-05, + "loss": 0.0112, + "step": 9500 + }, + { + "epoch": 1.59, + "learning_rate": 1.6960486322188453e-05, + "loss": 0.0115, + "step": 10000 + }, + { + "epoch": 1.67, + "learning_rate": 1.680051191809311e-05, + "loss": 0.0119, + "step": 10500 + }, + { + "epoch": 1.75, + "learning_rate": 1.6640537513997762e-05, + "loss": 0.0111, + "step": 11000 + }, + { + "epoch": 1.83, + "learning_rate": 1.6480563109902418e-05, + "loss": 0.0115, + "step": 11500 + }, + { + "epoch": 1.9, + "learning_rate": 1.632058870580707e-05, + "loss": 0.0117, + "step": 12000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6160614301711727e-05, + "loss": 0.0111, + "step": 12500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9928343580163489, + "eval_f1": 0.0, + "eval_loss": 0.020694203674793243, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.3755, + "eval_samples_per_second": 424.115, + "eval_steps_per_second": 3.315, + "step": 12602 + }, + { + "epoch": 2.06, + "learning_rate": 1.6000639897616383e-05, + "loss": 0.008, + "step": 13000 + }, + { + "epoch": 2.14, + "learning_rate": 1.5840665493521036e-05, + "loss": 0.0079, + "step": 13500 + }, + { + "epoch": 2.22, + "learning_rate": 1.5680691089425692e-05, + "loss": 0.0076, + "step": 14000 + }, + { + "epoch": 2.3, + "learning_rate": 1.552071668533035e-05, + "loss": 0.0078, + "step": 14500 + }, + { + "epoch": 2.38, + "learning_rate": 1.5360742281235005e-05, + "loss": 0.0079, + "step": 15000 + }, + { + "epoch": 2.46, + "learning_rate": 1.520076787713966e-05, + "loss": 0.0078, + "step": 15500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5040793473044315e-05, + "loss": 0.0078, + "step": 16000 + }, + { + "epoch": 2.62, + "learning_rate": 1.4880819068948968e-05, + "loss": 0.0079, + "step": 16500 + }, + { + "epoch": 2.7, + "learning_rate": 1.4720844664853625e-05, + "loss": 0.0078, + "step": 17000 + }, + { + "epoch": 2.78, + "learning_rate": 1.4560870260758279e-05, + "loss": 0.0081, + "step": 17500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4400895856662935e-05, + "loss": 0.0079, + "step": 18000 + }, + { + "epoch": 2.94, + "learning_rate": 1.4240921452567591e-05, + "loss": 0.0077, + "step": 18500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9924637062899773, + "eval_f1": 0.0, + "eval_loss": 0.02258981019258499, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.2433, + "eval_samples_per_second": 422.455, + "eval_steps_per_second": 3.302, + "step": 18903 + }, + { + "epoch": 3.02, + "learning_rate": 1.4080947048472246e-05, + "loss": 0.0072, + "step": 19000 + }, + { + "epoch": 3.09, + "learning_rate": 1.39209726443769e-05, + "loss": 0.0053, + "step": 19500 + }, + { + "epoch": 3.17, + "learning_rate": 1.3760998240281555e-05, + "loss": 0.0054, + "step": 20000 + }, + { + "epoch": 3.25, + "learning_rate": 1.3601023836186211e-05, + "loss": 0.0053, + "step": 20500 + }, + { + "epoch": 3.33, + "learning_rate": 1.3441049432090867e-05, + "loss": 0.005, + "step": 21000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3281075027995522e-05, + "loss": 0.0058, + "step": 21500 + }, + { + "epoch": 3.49, + "learning_rate": 1.3121100623900178e-05, + "loss": 0.0054, + "step": 22000 + }, + { + "epoch": 3.57, + "learning_rate": 1.2961126219804831e-05, + "loss": 0.0055, + "step": 22500 + }, + { + "epoch": 3.65, + "learning_rate": 1.2801151815709487e-05, + "loss": 0.0056, + "step": 23000 + }, + { + "epoch": 3.73, + "learning_rate": 1.2641177411614143e-05, + "loss": 0.0056, + "step": 23500 + }, + { + "epoch": 3.81, + "learning_rate": 1.2481203007518798e-05, + "loss": 0.0058, + "step": 24000 + }, + { + "epoch": 3.89, + "learning_rate": 1.2321228603423454e-05, + "loss": 0.0052, + "step": 24500 + }, + { + "epoch": 3.97, + "learning_rate": 1.2161254199328107e-05, + "loss": 0.0052, + "step": 25000 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9926866218329602, + "eval_f1": 0.0, + "eval_loss": 0.02687273919582367, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.5012, + "eval_samples_per_second": 424.003, + "eval_steps_per_second": 3.314, + "step": 25204 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2.6175978683722163e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-25204/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d025715d9f23510d6b30f4effcf9295f21747409 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a9b2b4ee73c9a5a39bcc5e5a3e20c84b7edc90c9e69e896fe8d91187cef7894 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e321c456cb159604094c329e22663285a21a70d2 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312ad0fe8a398cc1fa1eb34b921f99b1f938e82fb7258a66a0a708926580ece2 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc0469c10f89f7674482c863b35c54d589d404d1 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73321d880555412c4843c0c3d9c4e7adc8dd7269d257201dd971ea72f0f1ec6 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01f7beb0453d79dcf82f2c1cb32b33d4a3c7b40c --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f416e43bba766ba0f670f142fbd3de7ba714b84ae497c0a6c40dd08918cafa +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..319e2d73510c82bd624bb5bc7c42ee7609573684 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/trainer_state.json @@ -0,0 +1,457 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 31505, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + }, + { + "epoch": 1.03, + "learning_rate": 1.8080307150855866e-05, + "loss": 0.0142, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.792033274676052e-05, + "loss": 0.0113, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7760358342665175e-05, + "loss": 0.0118, + "step": 7500 + }, + { + "epoch": 1.27, + "learning_rate": 1.7600383938569828e-05, + "loss": 0.0115, + "step": 8000 + }, + { + "epoch": 1.35, + "learning_rate": 1.7440409534474484e-05, + "loss": 0.0118, + "step": 8500 + }, + { + "epoch": 1.43, + "learning_rate": 1.728043513037914e-05, + "loss": 0.0112, + "step": 9000 + }, + { + "epoch": 1.51, + "learning_rate": 1.7120460726283797e-05, + "loss": 0.0112, + "step": 9500 + }, + { + "epoch": 1.59, + "learning_rate": 1.6960486322188453e-05, + "loss": 0.0115, + "step": 10000 + }, + { + "epoch": 1.67, + "learning_rate": 1.680051191809311e-05, + "loss": 0.0119, + "step": 10500 + }, + { + "epoch": 1.75, + "learning_rate": 1.6640537513997762e-05, + "loss": 0.0111, + "step": 11000 + }, + { + "epoch": 1.83, + "learning_rate": 1.6480563109902418e-05, + "loss": 0.0115, + "step": 11500 + }, + { + "epoch": 1.9, + "learning_rate": 1.632058870580707e-05, + "loss": 0.0117, + "step": 12000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6160614301711727e-05, + "loss": 0.0111, + "step": 12500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9928343580163489, + "eval_f1": 0.0, + "eval_loss": 0.020694203674793243, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.3755, + "eval_samples_per_second": 424.115, + "eval_steps_per_second": 3.315, + "step": 12602 + }, + { + "epoch": 2.06, + "learning_rate": 1.6000639897616383e-05, + "loss": 0.008, + "step": 13000 + }, + { + "epoch": 2.14, + "learning_rate": 1.5840665493521036e-05, + "loss": 0.0079, + "step": 13500 + }, + { + "epoch": 2.22, + "learning_rate": 1.5680691089425692e-05, + "loss": 0.0076, + "step": 14000 + }, + { + "epoch": 2.3, + "learning_rate": 1.552071668533035e-05, + "loss": 0.0078, + "step": 14500 + }, + { + "epoch": 2.38, + "learning_rate": 1.5360742281235005e-05, + "loss": 0.0079, + "step": 15000 + }, + { + "epoch": 2.46, + "learning_rate": 1.520076787713966e-05, + "loss": 0.0078, + "step": 15500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5040793473044315e-05, + "loss": 0.0078, + "step": 16000 + }, + { + "epoch": 2.62, + "learning_rate": 1.4880819068948968e-05, + "loss": 0.0079, + "step": 16500 + }, + { + "epoch": 2.7, + "learning_rate": 1.4720844664853625e-05, + "loss": 0.0078, + "step": 17000 + }, + { + "epoch": 2.78, + "learning_rate": 1.4560870260758279e-05, + "loss": 0.0081, + "step": 17500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4400895856662935e-05, + "loss": 0.0079, + "step": 18000 + }, + { + "epoch": 2.94, + "learning_rate": 1.4240921452567591e-05, + "loss": 0.0077, + "step": 18500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9924637062899773, + "eval_f1": 0.0, + "eval_loss": 0.02258981019258499, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.2433, + "eval_samples_per_second": 422.455, + "eval_steps_per_second": 3.302, + "step": 18903 + }, + { + "epoch": 3.02, + "learning_rate": 1.4080947048472246e-05, + "loss": 0.0072, + "step": 19000 + }, + { + "epoch": 3.09, + "learning_rate": 1.39209726443769e-05, + "loss": 0.0053, + "step": 19500 + }, + { + "epoch": 3.17, + "learning_rate": 1.3760998240281555e-05, + "loss": 0.0054, + "step": 20000 + }, + { + "epoch": 3.25, + "learning_rate": 1.3601023836186211e-05, + "loss": 0.0053, + "step": 20500 + }, + { + "epoch": 3.33, + "learning_rate": 1.3441049432090867e-05, + "loss": 0.005, + "step": 21000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3281075027995522e-05, + "loss": 0.0058, + "step": 21500 + }, + { + "epoch": 3.49, + "learning_rate": 1.3121100623900178e-05, + "loss": 0.0054, + "step": 22000 + }, + { + "epoch": 3.57, + "learning_rate": 1.2961126219804831e-05, + "loss": 0.0055, + "step": 22500 + }, + { + "epoch": 3.65, + "learning_rate": 1.2801151815709487e-05, + "loss": 0.0056, + "step": 23000 + }, + { + "epoch": 3.73, + "learning_rate": 1.2641177411614143e-05, + "loss": 0.0056, + "step": 23500 + }, + { + "epoch": 3.81, + "learning_rate": 1.2481203007518798e-05, + "loss": 0.0058, + "step": 24000 + }, + { + "epoch": 3.89, + "learning_rate": 1.2321228603423454e-05, + "loss": 0.0052, + "step": 24500 + }, + { + "epoch": 3.97, + "learning_rate": 1.2161254199328107e-05, + "loss": 0.0052, + "step": 25000 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9926866218329602, + "eval_f1": 0.0, + "eval_loss": 0.02687273919582367, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.5012, + "eval_samples_per_second": 424.003, + "eval_steps_per_second": 3.314, + "step": 25204 + }, + { + "epoch": 4.05, + "learning_rate": 1.2001279795232763e-05, + "loss": 0.0042, + "step": 25500 + }, + { + "epoch": 4.13, + "learning_rate": 1.184130539113742e-05, + "loss": 0.0037, + "step": 26000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1681330987042074e-05, + "loss": 0.0039, + "step": 26500 + }, + { + "epoch": 4.29, + "learning_rate": 1.152135658294673e-05, + "loss": 0.0038, + "step": 27000 + }, + { + "epoch": 4.36, + "learning_rate": 1.1361382178851386e-05, + "loss": 0.0041, + "step": 27500 + }, + { + "epoch": 4.44, + "learning_rate": 1.120140777475604e-05, + "loss": 0.0041, + "step": 28000 + }, + { + "epoch": 4.52, + "learning_rate": 1.1041433370660695e-05, + "loss": 0.0042, + "step": 28500 + }, + { + "epoch": 4.6, + "learning_rate": 1.088145896656535e-05, + "loss": 0.0039, + "step": 29000 + }, + { + "epoch": 4.68, + "learning_rate": 1.0721484562470006e-05, + "loss": 0.0044, + "step": 29500 + }, + { + "epoch": 4.76, + "learning_rate": 1.056151015837466e-05, + "loss": 0.0043, + "step": 30000 + }, + { + "epoch": 4.84, + "learning_rate": 1.0401535754279317e-05, + "loss": 0.0042, + "step": 30500 + }, + { + "epoch": 4.92, + "learning_rate": 1.024156135018397e-05, + "loss": 0.0045, + "step": 31000 + }, + { + "epoch": 5.0, + "learning_rate": 1.0081586946088626e-05, + "loss": 0.0041, + "step": 31500 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9926359194741641, + "eval_f1": 0.0, + "eval_loss": 0.029035158455371857, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 469.3362, + "eval_samples_per_second": 429.573, + "eval_steps_per_second": 3.358, + "step": 31505 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3.2719973354652704e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-31505/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..676864d73ea4dabd15af755396560f8c5c510fc9 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec216336e94e991e99a47a7636d3c7c453ab5e774000b29841de7e0e5f3a1f7 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..000e3a9393d59ddf20ee8b4f9d2a1cc32c6684b6 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f0f8294a4195e88df43ab8ddedf390c17578fbafef4c5454fd22a8dc0b31617 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..42272c2f38e5a781913457ff5cb9135ca72bc205 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fefb80a712edd524abb5b3a6998fe0c4f26b5abeafaa3d8e2c4cfac4a4f24479 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..92cf19e844828bf8d1896c5a0258b2ee7990569e --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89c231a2e0faab70586d65071b01c57c0f8f3b5890453e1c7704a83f97a2bb44 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f1f4cb2693dad130723dd2e67ff9b216a1b04a0d --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/trainer_state.json @@ -0,0 +1,541 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 37806, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + }, + { + "epoch": 1.03, + "learning_rate": 1.8080307150855866e-05, + "loss": 0.0142, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.792033274676052e-05, + "loss": 0.0113, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7760358342665175e-05, + "loss": 0.0118, + "step": 7500 + }, + { + "epoch": 1.27, + "learning_rate": 1.7600383938569828e-05, + "loss": 0.0115, + "step": 8000 + }, + { + "epoch": 1.35, + "learning_rate": 1.7440409534474484e-05, + "loss": 0.0118, + "step": 8500 + }, + { + "epoch": 1.43, + "learning_rate": 1.728043513037914e-05, + "loss": 0.0112, + "step": 9000 + }, + { + "epoch": 1.51, + "learning_rate": 1.7120460726283797e-05, + "loss": 0.0112, + "step": 9500 + }, + { + "epoch": 1.59, + "learning_rate": 1.6960486322188453e-05, + "loss": 0.0115, + "step": 10000 + }, + { + "epoch": 1.67, + "learning_rate": 1.680051191809311e-05, + "loss": 0.0119, + "step": 10500 + }, + { + "epoch": 1.75, + "learning_rate": 1.6640537513997762e-05, + "loss": 0.0111, + "step": 11000 + }, + { + "epoch": 1.83, + "learning_rate": 1.6480563109902418e-05, + "loss": 0.0115, + "step": 11500 + }, + { + "epoch": 1.9, + "learning_rate": 1.632058870580707e-05, + "loss": 0.0117, + "step": 12000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6160614301711727e-05, + "loss": 0.0111, + "step": 12500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9928343580163489, + "eval_f1": 0.0, + "eval_loss": 0.020694203674793243, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.3755, + "eval_samples_per_second": 424.115, + "eval_steps_per_second": 3.315, + "step": 12602 + }, + { + "epoch": 2.06, + "learning_rate": 1.6000639897616383e-05, + "loss": 0.008, + "step": 13000 + }, + { + "epoch": 2.14, + "learning_rate": 1.5840665493521036e-05, + "loss": 0.0079, + "step": 13500 + }, + { + "epoch": 2.22, + "learning_rate": 1.5680691089425692e-05, + "loss": 0.0076, + "step": 14000 + }, + { + "epoch": 2.3, + "learning_rate": 1.552071668533035e-05, + "loss": 0.0078, + "step": 14500 + }, + { + "epoch": 2.38, + "learning_rate": 1.5360742281235005e-05, + "loss": 0.0079, + "step": 15000 + }, + { + "epoch": 2.46, + "learning_rate": 1.520076787713966e-05, + "loss": 0.0078, + "step": 15500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5040793473044315e-05, + "loss": 0.0078, + "step": 16000 + }, + { + "epoch": 2.62, + "learning_rate": 1.4880819068948968e-05, + "loss": 0.0079, + "step": 16500 + }, + { + "epoch": 2.7, + "learning_rate": 1.4720844664853625e-05, + "loss": 0.0078, + "step": 17000 + }, + { + "epoch": 2.78, + "learning_rate": 1.4560870260758279e-05, + "loss": 0.0081, + "step": 17500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4400895856662935e-05, + "loss": 0.0079, + "step": 18000 + }, + { + "epoch": 2.94, + "learning_rate": 1.4240921452567591e-05, + "loss": 0.0077, + "step": 18500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9924637062899773, + "eval_f1": 0.0, + "eval_loss": 0.02258981019258499, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.2433, + "eval_samples_per_second": 422.455, + "eval_steps_per_second": 3.302, + "step": 18903 + }, + { + "epoch": 3.02, + "learning_rate": 1.4080947048472246e-05, + "loss": 0.0072, + "step": 19000 + }, + { + "epoch": 3.09, + "learning_rate": 1.39209726443769e-05, + "loss": 0.0053, + "step": 19500 + }, + { + "epoch": 3.17, + "learning_rate": 1.3760998240281555e-05, + "loss": 0.0054, + "step": 20000 + }, + { + "epoch": 3.25, + "learning_rate": 1.3601023836186211e-05, + "loss": 0.0053, + "step": 20500 + }, + { + "epoch": 3.33, + "learning_rate": 1.3441049432090867e-05, + "loss": 0.005, + "step": 21000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3281075027995522e-05, + "loss": 0.0058, + "step": 21500 + }, + { + "epoch": 3.49, + "learning_rate": 1.3121100623900178e-05, + "loss": 0.0054, + "step": 22000 + }, + { + "epoch": 3.57, + "learning_rate": 1.2961126219804831e-05, + "loss": 0.0055, + "step": 22500 + }, + { + "epoch": 3.65, + "learning_rate": 1.2801151815709487e-05, + "loss": 0.0056, + "step": 23000 + }, + { + "epoch": 3.73, + "learning_rate": 1.2641177411614143e-05, + "loss": 0.0056, + "step": 23500 + }, + { + "epoch": 3.81, + "learning_rate": 1.2481203007518798e-05, + "loss": 0.0058, + "step": 24000 + }, + { + "epoch": 3.89, + "learning_rate": 1.2321228603423454e-05, + "loss": 0.0052, + "step": 24500 + }, + { + "epoch": 3.97, + "learning_rate": 1.2161254199328107e-05, + "loss": 0.0052, + "step": 25000 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9926866218329602, + "eval_f1": 0.0, + "eval_loss": 0.02687273919582367, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.5012, + "eval_samples_per_second": 424.003, + "eval_steps_per_second": 3.314, + "step": 25204 + }, + { + "epoch": 4.05, + "learning_rate": 1.2001279795232763e-05, + "loss": 0.0042, + "step": 25500 + }, + { + "epoch": 4.13, + "learning_rate": 1.184130539113742e-05, + "loss": 0.0037, + "step": 26000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1681330987042074e-05, + "loss": 0.0039, + "step": 26500 + }, + { + "epoch": 4.29, + "learning_rate": 1.152135658294673e-05, + "loss": 0.0038, + "step": 27000 + }, + { + "epoch": 4.36, + "learning_rate": 1.1361382178851386e-05, + "loss": 0.0041, + "step": 27500 + }, + { + "epoch": 4.44, + "learning_rate": 1.120140777475604e-05, + "loss": 0.0041, + "step": 28000 + }, + { + "epoch": 4.52, + "learning_rate": 1.1041433370660695e-05, + "loss": 0.0042, + "step": 28500 + }, + { + "epoch": 4.6, + "learning_rate": 1.088145896656535e-05, + "loss": 0.0039, + "step": 29000 + }, + { + "epoch": 4.68, + "learning_rate": 1.0721484562470006e-05, + "loss": 0.0044, + "step": 29500 + }, + { + "epoch": 4.76, + "learning_rate": 1.056151015837466e-05, + "loss": 0.0043, + "step": 30000 + }, + { + "epoch": 4.84, + "learning_rate": 1.0401535754279317e-05, + "loss": 0.0042, + "step": 30500 + }, + { + "epoch": 4.92, + "learning_rate": 1.024156135018397e-05, + "loss": 0.0045, + "step": 31000 + }, + { + "epoch": 5.0, + "learning_rate": 1.0081586946088626e-05, + "loss": 0.0041, + "step": 31500 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9926359194741641, + "eval_f1": 0.0, + "eval_loss": 0.029035158455371857, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 469.3362, + "eval_samples_per_second": 429.573, + "eval_steps_per_second": 3.358, + "step": 31505 + }, + { + "epoch": 5.08, + "learning_rate": 9.921612541993282e-06, + "loss": 0.0032, + "step": 32000 + }, + { + "epoch": 5.16, + "learning_rate": 9.761638137897937e-06, + "loss": 0.0028, + "step": 32500 + }, + { + "epoch": 5.24, + "learning_rate": 9.601663733802591e-06, + "loss": 0.003, + "step": 33000 + }, + { + "epoch": 5.32, + "learning_rate": 9.441689329707247e-06, + "loss": 0.0031, + "step": 33500 + }, + { + "epoch": 5.4, + "learning_rate": 9.281714925611904e-06, + "loss": 0.0031, + "step": 34000 + }, + { + "epoch": 5.48, + "learning_rate": 9.121740521516558e-06, + "loss": 0.0032, + "step": 34500 + }, + { + "epoch": 5.55, + "learning_rate": 8.961766117421213e-06, + "loss": 0.0031, + "step": 35000 + }, + { + "epoch": 5.63, + "learning_rate": 8.801791713325869e-06, + "loss": 0.0031, + "step": 35500 + }, + { + "epoch": 5.71, + "learning_rate": 8.641817309230523e-06, + "loss": 0.0032, + "step": 36000 + }, + { + "epoch": 5.79, + "learning_rate": 8.48184290513518e-06, + "loss": 0.0032, + "step": 36500 + }, + { + "epoch": 5.87, + "learning_rate": 8.321868501039834e-06, + "loss": 0.0031, + "step": 37000 + }, + { + "epoch": 5.95, + "learning_rate": 8.161894096944489e-06, + "loss": 0.0034, + "step": 37500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9926551514033626, + "eval_f1": 0.0, + "eval_loss": 0.03292802721261978, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.0131, + "eval_samples_per_second": 424.439, + "eval_steps_per_second": 3.318, + "step": 37806 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3.9263968025583245e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-37806/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fe6772606bedeb31377570a78ef250fac06721c --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e8dd783c40a966ddaac72eb42609691d327fde1320e83b9079c4f1828254e2c +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..dddb17b21436acdf493751597b7d3b570149dcd5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6170c3cdd4c6db74fea4d1d89c1aec87f02232cb202ae6e21c0554f404e4a8 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..404abc39816d755abc1c24f9167c1d7e11d19a93 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d483d1ca9c5436c1e996b71b6534065a8a45bb91a3be473ed10642f6f216a7d6 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b25a0d73a7131904115b8da50af48fab82cbfaf3 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ab9de274ef20f4bc9c43e08da918ede8c34b0c532d8503cde79dd041011d5a +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cd3ad98b63a611dd135152a24d2cfa2e7f3eaa42 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/trainer_state.json @@ -0,0 +1,631 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 44107, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + }, + { + "epoch": 1.03, + "learning_rate": 1.8080307150855866e-05, + "loss": 0.0142, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.792033274676052e-05, + "loss": 0.0113, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7760358342665175e-05, + "loss": 0.0118, + "step": 7500 + }, + { + "epoch": 1.27, + "learning_rate": 1.7600383938569828e-05, + "loss": 0.0115, + "step": 8000 + }, + { + "epoch": 1.35, + "learning_rate": 1.7440409534474484e-05, + "loss": 0.0118, + "step": 8500 + }, + { + "epoch": 1.43, + "learning_rate": 1.728043513037914e-05, + "loss": 0.0112, + "step": 9000 + }, + { + "epoch": 1.51, + "learning_rate": 1.7120460726283797e-05, + "loss": 0.0112, + "step": 9500 + }, + { + "epoch": 1.59, + "learning_rate": 1.6960486322188453e-05, + "loss": 0.0115, + "step": 10000 + }, + { + "epoch": 1.67, + "learning_rate": 1.680051191809311e-05, + "loss": 0.0119, + "step": 10500 + }, + { + "epoch": 1.75, + "learning_rate": 1.6640537513997762e-05, + "loss": 0.0111, + "step": 11000 + }, + { + "epoch": 1.83, + "learning_rate": 1.6480563109902418e-05, + "loss": 0.0115, + "step": 11500 + }, + { + "epoch": 1.9, + "learning_rate": 1.632058870580707e-05, + "loss": 0.0117, + "step": 12000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6160614301711727e-05, + "loss": 0.0111, + "step": 12500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9928343580163489, + "eval_f1": 0.0, + "eval_loss": 0.020694203674793243, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.3755, + "eval_samples_per_second": 424.115, + "eval_steps_per_second": 3.315, + "step": 12602 + }, + { + "epoch": 2.06, + "learning_rate": 1.6000639897616383e-05, + "loss": 0.008, + "step": 13000 + }, + { + "epoch": 2.14, + "learning_rate": 1.5840665493521036e-05, + "loss": 0.0079, + "step": 13500 + }, + { + "epoch": 2.22, + "learning_rate": 1.5680691089425692e-05, + "loss": 0.0076, + "step": 14000 + }, + { + "epoch": 2.3, + "learning_rate": 1.552071668533035e-05, + "loss": 0.0078, + "step": 14500 + }, + { + "epoch": 2.38, + "learning_rate": 1.5360742281235005e-05, + "loss": 0.0079, + "step": 15000 + }, + { + "epoch": 2.46, + "learning_rate": 1.520076787713966e-05, + "loss": 0.0078, + "step": 15500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5040793473044315e-05, + "loss": 0.0078, + "step": 16000 + }, + { + "epoch": 2.62, + "learning_rate": 1.4880819068948968e-05, + "loss": 0.0079, + "step": 16500 + }, + { + "epoch": 2.7, + "learning_rate": 1.4720844664853625e-05, + "loss": 0.0078, + "step": 17000 + }, + { + "epoch": 2.78, + "learning_rate": 1.4560870260758279e-05, + "loss": 0.0081, + "step": 17500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4400895856662935e-05, + "loss": 0.0079, + "step": 18000 + }, + { + "epoch": 2.94, + "learning_rate": 1.4240921452567591e-05, + "loss": 0.0077, + "step": 18500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9924637062899773, + "eval_f1": 0.0, + "eval_loss": 0.02258981019258499, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.2433, + "eval_samples_per_second": 422.455, + "eval_steps_per_second": 3.302, + "step": 18903 + }, + { + "epoch": 3.02, + "learning_rate": 1.4080947048472246e-05, + "loss": 0.0072, + "step": 19000 + }, + { + "epoch": 3.09, + "learning_rate": 1.39209726443769e-05, + "loss": 0.0053, + "step": 19500 + }, + { + "epoch": 3.17, + "learning_rate": 1.3760998240281555e-05, + "loss": 0.0054, + "step": 20000 + }, + { + "epoch": 3.25, + "learning_rate": 1.3601023836186211e-05, + "loss": 0.0053, + "step": 20500 + }, + { + "epoch": 3.33, + "learning_rate": 1.3441049432090867e-05, + "loss": 0.005, + "step": 21000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3281075027995522e-05, + "loss": 0.0058, + "step": 21500 + }, + { + "epoch": 3.49, + "learning_rate": 1.3121100623900178e-05, + "loss": 0.0054, + "step": 22000 + }, + { + "epoch": 3.57, + "learning_rate": 1.2961126219804831e-05, + "loss": 0.0055, + "step": 22500 + }, + { + "epoch": 3.65, + "learning_rate": 1.2801151815709487e-05, + "loss": 0.0056, + "step": 23000 + }, + { + "epoch": 3.73, + "learning_rate": 1.2641177411614143e-05, + "loss": 0.0056, + "step": 23500 + }, + { + "epoch": 3.81, + "learning_rate": 1.2481203007518798e-05, + "loss": 0.0058, + "step": 24000 + }, + { + "epoch": 3.89, + "learning_rate": 1.2321228603423454e-05, + "loss": 0.0052, + "step": 24500 + }, + { + "epoch": 3.97, + "learning_rate": 1.2161254199328107e-05, + "loss": 0.0052, + "step": 25000 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9926866218329602, + "eval_f1": 0.0, + "eval_loss": 0.02687273919582367, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.5012, + "eval_samples_per_second": 424.003, + "eval_steps_per_second": 3.314, + "step": 25204 + }, + { + "epoch": 4.05, + "learning_rate": 1.2001279795232763e-05, + "loss": 0.0042, + "step": 25500 + }, + { + "epoch": 4.13, + "learning_rate": 1.184130539113742e-05, + "loss": 0.0037, + "step": 26000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1681330987042074e-05, + "loss": 0.0039, + "step": 26500 + }, + { + "epoch": 4.29, + "learning_rate": 1.152135658294673e-05, + "loss": 0.0038, + "step": 27000 + }, + { + "epoch": 4.36, + "learning_rate": 1.1361382178851386e-05, + "loss": 0.0041, + "step": 27500 + }, + { + "epoch": 4.44, + "learning_rate": 1.120140777475604e-05, + "loss": 0.0041, + "step": 28000 + }, + { + "epoch": 4.52, + "learning_rate": 1.1041433370660695e-05, + "loss": 0.0042, + "step": 28500 + }, + { + "epoch": 4.6, + "learning_rate": 1.088145896656535e-05, + "loss": 0.0039, + "step": 29000 + }, + { + "epoch": 4.68, + "learning_rate": 1.0721484562470006e-05, + "loss": 0.0044, + "step": 29500 + }, + { + "epoch": 4.76, + "learning_rate": 1.056151015837466e-05, + "loss": 0.0043, + "step": 30000 + }, + { + "epoch": 4.84, + "learning_rate": 1.0401535754279317e-05, + "loss": 0.0042, + "step": 30500 + }, + { + "epoch": 4.92, + "learning_rate": 1.024156135018397e-05, + "loss": 0.0045, + "step": 31000 + }, + { + "epoch": 5.0, + "learning_rate": 1.0081586946088626e-05, + "loss": 0.0041, + "step": 31500 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9926359194741641, + "eval_f1": 0.0, + "eval_loss": 0.029035158455371857, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 469.3362, + "eval_samples_per_second": 429.573, + "eval_steps_per_second": 3.358, + "step": 31505 + }, + { + "epoch": 5.08, + "learning_rate": 9.921612541993282e-06, + "loss": 0.0032, + "step": 32000 + }, + { + "epoch": 5.16, + "learning_rate": 9.761638137897937e-06, + "loss": 0.0028, + "step": 32500 + }, + { + "epoch": 5.24, + "learning_rate": 9.601663733802591e-06, + "loss": 0.003, + "step": 33000 + }, + { + "epoch": 5.32, + "learning_rate": 9.441689329707247e-06, + "loss": 0.0031, + "step": 33500 + }, + { + "epoch": 5.4, + "learning_rate": 9.281714925611904e-06, + "loss": 0.0031, + "step": 34000 + }, + { + "epoch": 5.48, + "learning_rate": 9.121740521516558e-06, + "loss": 0.0032, + "step": 34500 + }, + { + "epoch": 5.55, + "learning_rate": 8.961766117421213e-06, + "loss": 0.0031, + "step": 35000 + }, + { + "epoch": 5.63, + "learning_rate": 8.801791713325869e-06, + "loss": 0.0031, + "step": 35500 + }, + { + "epoch": 5.71, + "learning_rate": 8.641817309230523e-06, + "loss": 0.0032, + "step": 36000 + }, + { + "epoch": 5.79, + "learning_rate": 8.48184290513518e-06, + "loss": 0.0032, + "step": 36500 + }, + { + "epoch": 5.87, + "learning_rate": 8.321868501039834e-06, + "loss": 0.0031, + "step": 37000 + }, + { + "epoch": 5.95, + "learning_rate": 8.161894096944489e-06, + "loss": 0.0034, + "step": 37500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9926551514033626, + "eval_f1": 0.0, + "eval_loss": 0.03292802721261978, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.0131, + "eval_samples_per_second": 424.439, + "eval_steps_per_second": 3.318, + "step": 37806 + }, + { + "epoch": 6.03, + "learning_rate": 8.001919692849145e-06, + "loss": 0.003, + "step": 38000 + }, + { + "epoch": 6.11, + "learning_rate": 7.841945288753801e-06, + "loss": 0.0025, + "step": 38500 + }, + { + "epoch": 6.19, + "learning_rate": 7.681970884658456e-06, + "loss": 0.0023, + "step": 39000 + }, + { + "epoch": 6.27, + "learning_rate": 7.521996480563111e-06, + "loss": 0.0026, + "step": 39500 + }, + { + "epoch": 6.35, + "learning_rate": 7.3620220764677655e-06, + "loss": 0.0026, + "step": 40000 + }, + { + "epoch": 6.43, + "learning_rate": 7.202047672372421e-06, + "loss": 0.0027, + "step": 40500 + }, + { + "epoch": 6.51, + "learning_rate": 7.042073268277076e-06, + "loss": 0.0024, + "step": 41000 + }, + { + "epoch": 6.59, + "learning_rate": 6.882098864181731e-06, + "loss": 0.0026, + "step": 41500 + }, + { + "epoch": 6.67, + "learning_rate": 6.722124460086387e-06, + "loss": 0.0025, + "step": 42000 + }, + { + "epoch": 6.74, + "learning_rate": 6.562150055991042e-06, + "loss": 0.0026, + "step": 42500 + }, + { + "epoch": 6.82, + "learning_rate": 6.402175651895697e-06, + "loss": 0.0025, + "step": 43000 + }, + { + "epoch": 6.9, + "learning_rate": 6.242201247800352e-06, + "loss": 0.0024, + "step": 43500 + }, + { + "epoch": 6.98, + "learning_rate": 6.0822268437050084e-06, + "loss": 0.0025, + "step": 44000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9924776931475762, + "eval_f1": 0.0, + "eval_loss": 0.04146205633878708, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.72, + "eval_samples_per_second": 422.034, + "eval_steps_per_second": 3.299, + "step": 44107 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4.5807962696513786e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-44107/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..be33f99f630cc1f1632c9a5720e15cfe55191480 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:813df7a6e91256a8f67012275d28daea3d98a49cd1b8eb11eb59f01a95b37262 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c72248645adb37257b9179a43d23df79b63396a5 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e752122620cd0634cca00f548103dbb3a91966911eb68cf4d7ccdaf6464cbb9 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd9c8c23bd66a1a2d22b350a99bc8a1850aa4d2d --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:954b07eee7f22ef06dfae179bc946ddb52009fd29bd7fa38717b4e32c3c0fc8b +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbeaabb1f4eb6fc57adbd68f9ba38edf4e4fdff4 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e83eb1b2f384152b8debbf373d6516e29e5d47bd575011bebaeffbc9c23f9b30 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..deb0170fc8bf7d4c6397d369fbb5fb71a24bfcc6 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/trainer_state.json @@ -0,0 +1,715 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 50408, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + }, + { + "epoch": 1.03, + "learning_rate": 1.8080307150855866e-05, + "loss": 0.0142, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.792033274676052e-05, + "loss": 0.0113, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7760358342665175e-05, + "loss": 0.0118, + "step": 7500 + }, + { + "epoch": 1.27, + "learning_rate": 1.7600383938569828e-05, + "loss": 0.0115, + "step": 8000 + }, + { + "epoch": 1.35, + "learning_rate": 1.7440409534474484e-05, + "loss": 0.0118, + "step": 8500 + }, + { + "epoch": 1.43, + "learning_rate": 1.728043513037914e-05, + "loss": 0.0112, + "step": 9000 + }, + { + "epoch": 1.51, + "learning_rate": 1.7120460726283797e-05, + "loss": 0.0112, + "step": 9500 + }, + { + "epoch": 1.59, + "learning_rate": 1.6960486322188453e-05, + "loss": 0.0115, + "step": 10000 + }, + { + "epoch": 1.67, + "learning_rate": 1.680051191809311e-05, + "loss": 0.0119, + "step": 10500 + }, + { + "epoch": 1.75, + "learning_rate": 1.6640537513997762e-05, + "loss": 0.0111, + "step": 11000 + }, + { + "epoch": 1.83, + "learning_rate": 1.6480563109902418e-05, + "loss": 0.0115, + "step": 11500 + }, + { + "epoch": 1.9, + "learning_rate": 1.632058870580707e-05, + "loss": 0.0117, + "step": 12000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6160614301711727e-05, + "loss": 0.0111, + "step": 12500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9928343580163489, + "eval_f1": 0.0, + "eval_loss": 0.020694203674793243, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.3755, + "eval_samples_per_second": 424.115, + "eval_steps_per_second": 3.315, + "step": 12602 + }, + { + "epoch": 2.06, + "learning_rate": 1.6000639897616383e-05, + "loss": 0.008, + "step": 13000 + }, + { + "epoch": 2.14, + "learning_rate": 1.5840665493521036e-05, + "loss": 0.0079, + "step": 13500 + }, + { + "epoch": 2.22, + "learning_rate": 1.5680691089425692e-05, + "loss": 0.0076, + "step": 14000 + }, + { + "epoch": 2.3, + "learning_rate": 1.552071668533035e-05, + "loss": 0.0078, + "step": 14500 + }, + { + "epoch": 2.38, + "learning_rate": 1.5360742281235005e-05, + "loss": 0.0079, + "step": 15000 + }, + { + "epoch": 2.46, + "learning_rate": 1.520076787713966e-05, + "loss": 0.0078, + "step": 15500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5040793473044315e-05, + "loss": 0.0078, + "step": 16000 + }, + { + "epoch": 2.62, + "learning_rate": 1.4880819068948968e-05, + "loss": 0.0079, + "step": 16500 + }, + { + "epoch": 2.7, + "learning_rate": 1.4720844664853625e-05, + "loss": 0.0078, + "step": 17000 + }, + { + "epoch": 2.78, + "learning_rate": 1.4560870260758279e-05, + "loss": 0.0081, + "step": 17500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4400895856662935e-05, + "loss": 0.0079, + "step": 18000 + }, + { + "epoch": 2.94, + "learning_rate": 1.4240921452567591e-05, + "loss": 0.0077, + "step": 18500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9924637062899773, + "eval_f1": 0.0, + "eval_loss": 0.02258981019258499, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.2433, + "eval_samples_per_second": 422.455, + "eval_steps_per_second": 3.302, + "step": 18903 + }, + { + "epoch": 3.02, + "learning_rate": 1.4080947048472246e-05, + "loss": 0.0072, + "step": 19000 + }, + { + "epoch": 3.09, + "learning_rate": 1.39209726443769e-05, + "loss": 0.0053, + "step": 19500 + }, + { + "epoch": 3.17, + "learning_rate": 1.3760998240281555e-05, + "loss": 0.0054, + "step": 20000 + }, + { + "epoch": 3.25, + "learning_rate": 1.3601023836186211e-05, + "loss": 0.0053, + "step": 20500 + }, + { + "epoch": 3.33, + "learning_rate": 1.3441049432090867e-05, + "loss": 0.005, + "step": 21000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3281075027995522e-05, + "loss": 0.0058, + "step": 21500 + }, + { + "epoch": 3.49, + "learning_rate": 1.3121100623900178e-05, + "loss": 0.0054, + "step": 22000 + }, + { + "epoch": 3.57, + "learning_rate": 1.2961126219804831e-05, + "loss": 0.0055, + "step": 22500 + }, + { + "epoch": 3.65, + "learning_rate": 1.2801151815709487e-05, + "loss": 0.0056, + "step": 23000 + }, + { + "epoch": 3.73, + "learning_rate": 1.2641177411614143e-05, + "loss": 0.0056, + "step": 23500 + }, + { + "epoch": 3.81, + "learning_rate": 1.2481203007518798e-05, + "loss": 0.0058, + "step": 24000 + }, + { + "epoch": 3.89, + "learning_rate": 1.2321228603423454e-05, + "loss": 0.0052, + "step": 24500 + }, + { + "epoch": 3.97, + "learning_rate": 1.2161254199328107e-05, + "loss": 0.0052, + "step": 25000 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9926866218329602, + "eval_f1": 0.0, + "eval_loss": 0.02687273919582367, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.5012, + "eval_samples_per_second": 424.003, + "eval_steps_per_second": 3.314, + "step": 25204 + }, + { + "epoch": 4.05, + "learning_rate": 1.2001279795232763e-05, + "loss": 0.0042, + "step": 25500 + }, + { + "epoch": 4.13, + "learning_rate": 1.184130539113742e-05, + "loss": 0.0037, + "step": 26000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1681330987042074e-05, + "loss": 0.0039, + "step": 26500 + }, + { + "epoch": 4.29, + "learning_rate": 1.152135658294673e-05, + "loss": 0.0038, + "step": 27000 + }, + { + "epoch": 4.36, + "learning_rate": 1.1361382178851386e-05, + "loss": 0.0041, + "step": 27500 + }, + { + "epoch": 4.44, + "learning_rate": 1.120140777475604e-05, + "loss": 0.0041, + "step": 28000 + }, + { + "epoch": 4.52, + "learning_rate": 1.1041433370660695e-05, + "loss": 0.0042, + "step": 28500 + }, + { + "epoch": 4.6, + "learning_rate": 1.088145896656535e-05, + "loss": 0.0039, + "step": 29000 + }, + { + "epoch": 4.68, + "learning_rate": 1.0721484562470006e-05, + "loss": 0.0044, + "step": 29500 + }, + { + "epoch": 4.76, + "learning_rate": 1.056151015837466e-05, + "loss": 0.0043, + "step": 30000 + }, + { + "epoch": 4.84, + "learning_rate": 1.0401535754279317e-05, + "loss": 0.0042, + "step": 30500 + }, + { + "epoch": 4.92, + "learning_rate": 1.024156135018397e-05, + "loss": 0.0045, + "step": 31000 + }, + { + "epoch": 5.0, + "learning_rate": 1.0081586946088626e-05, + "loss": 0.0041, + "step": 31500 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9926359194741641, + "eval_f1": 0.0, + "eval_loss": 0.029035158455371857, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 469.3362, + "eval_samples_per_second": 429.573, + "eval_steps_per_second": 3.358, + "step": 31505 + }, + { + "epoch": 5.08, + "learning_rate": 9.921612541993282e-06, + "loss": 0.0032, + "step": 32000 + }, + { + "epoch": 5.16, + "learning_rate": 9.761638137897937e-06, + "loss": 0.0028, + "step": 32500 + }, + { + "epoch": 5.24, + "learning_rate": 9.601663733802591e-06, + "loss": 0.003, + "step": 33000 + }, + { + "epoch": 5.32, + "learning_rate": 9.441689329707247e-06, + "loss": 0.0031, + "step": 33500 + }, + { + "epoch": 5.4, + "learning_rate": 9.281714925611904e-06, + "loss": 0.0031, + "step": 34000 + }, + { + "epoch": 5.48, + "learning_rate": 9.121740521516558e-06, + "loss": 0.0032, + "step": 34500 + }, + { + "epoch": 5.55, + "learning_rate": 8.961766117421213e-06, + "loss": 0.0031, + "step": 35000 + }, + { + "epoch": 5.63, + "learning_rate": 8.801791713325869e-06, + "loss": 0.0031, + "step": 35500 + }, + { + "epoch": 5.71, + "learning_rate": 8.641817309230523e-06, + "loss": 0.0032, + "step": 36000 + }, + { + "epoch": 5.79, + "learning_rate": 8.48184290513518e-06, + "loss": 0.0032, + "step": 36500 + }, + { + "epoch": 5.87, + "learning_rate": 8.321868501039834e-06, + "loss": 0.0031, + "step": 37000 + }, + { + "epoch": 5.95, + "learning_rate": 8.161894096944489e-06, + "loss": 0.0034, + "step": 37500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9926551514033626, + "eval_f1": 0.0, + "eval_loss": 0.03292802721261978, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.0131, + "eval_samples_per_second": 424.439, + "eval_steps_per_second": 3.318, + "step": 37806 + }, + { + "epoch": 6.03, + "learning_rate": 8.001919692849145e-06, + "loss": 0.003, + "step": 38000 + }, + { + "epoch": 6.11, + "learning_rate": 7.841945288753801e-06, + "loss": 0.0025, + "step": 38500 + }, + { + "epoch": 6.19, + "learning_rate": 7.681970884658456e-06, + "loss": 0.0023, + "step": 39000 + }, + { + "epoch": 6.27, + "learning_rate": 7.521996480563111e-06, + "loss": 0.0026, + "step": 39500 + }, + { + "epoch": 6.35, + "learning_rate": 7.3620220764677655e-06, + "loss": 0.0026, + "step": 40000 + }, + { + "epoch": 6.43, + "learning_rate": 7.202047672372421e-06, + "loss": 0.0027, + "step": 40500 + }, + { + "epoch": 6.51, + "learning_rate": 7.042073268277076e-06, + "loss": 0.0024, + "step": 41000 + }, + { + "epoch": 6.59, + "learning_rate": 6.882098864181731e-06, + "loss": 0.0026, + "step": 41500 + }, + { + "epoch": 6.67, + "learning_rate": 6.722124460086387e-06, + "loss": 0.0025, + "step": 42000 + }, + { + "epoch": 6.74, + "learning_rate": 6.562150055991042e-06, + "loss": 0.0026, + "step": 42500 + }, + { + "epoch": 6.82, + "learning_rate": 6.402175651895697e-06, + "loss": 0.0025, + "step": 43000 + }, + { + "epoch": 6.9, + "learning_rate": 6.242201247800352e-06, + "loss": 0.0024, + "step": 43500 + }, + { + "epoch": 6.98, + "learning_rate": 6.0822268437050084e-06, + "loss": 0.0025, + "step": 44000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9924776931475762, + "eval_f1": 0.0, + "eval_loss": 0.04146205633878708, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.72, + "eval_samples_per_second": 422.034, + "eval_steps_per_second": 3.299, + "step": 44107 + }, + { + "epoch": 7.06, + "learning_rate": 5.922252439609663e-06, + "loss": 0.002, + "step": 44500 + }, + { + "epoch": 7.14, + "learning_rate": 5.762278035514318e-06, + "loss": 0.0019, + "step": 45000 + }, + { + "epoch": 7.22, + "learning_rate": 5.602303631418974e-06, + "loss": 0.0021, + "step": 45500 + }, + { + "epoch": 7.3, + "learning_rate": 5.442329227323628e-06, + "loss": 0.0021, + "step": 46000 + }, + { + "epoch": 7.38, + "learning_rate": 5.282354823228284e-06, + "loss": 0.0021, + "step": 46500 + }, + { + "epoch": 7.46, + "learning_rate": 5.12238041913294e-06, + "loss": 0.002, + "step": 47000 + }, + { + "epoch": 7.54, + "learning_rate": 4.962406015037594e-06, + "loss": 0.0019, + "step": 47500 + }, + { + "epoch": 7.62, + "learning_rate": 4.80243161094225e-06, + "loss": 0.002, + "step": 48000 + }, + { + "epoch": 7.7, + "learning_rate": 4.642457206846905e-06, + "loss": 0.002, + "step": 48500 + }, + { + "epoch": 7.78, + "learning_rate": 4.4824828027515596e-06, + "loss": 0.0023, + "step": 49000 + }, + { + "epoch": 7.86, + "learning_rate": 4.322508398656216e-06, + "loss": 0.002, + "step": 49500 + }, + { + "epoch": 7.94, + "learning_rate": 4.16253399456087e-06, + "loss": 0.002, + "step": 50000 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9924226198957804, + "eval_f1": 0.0, + "eval_loss": 0.044451288878917694, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 469.8304, + "eval_samples_per_second": 429.121, + "eval_steps_per_second": 3.354, + "step": 50408 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5.2351957367444326e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-50408/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca74bbd319dd630bba9e81941bdcc8ad9250400b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b43eda2884d6596099db3c81f20efdc0a188d7fc9bf0f904fee213e7ffdc2330 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef501fbde46906e76dfaeaf1f221c875155d5f56 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f68ad33bf9df857783eeef635eab928c756cd6d57b4a1c110733c1abb73ec40 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..692ce77be0cd77c1bb133f04175a3fbb04e9a647 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6778a5c44db7da32e74e2ec88c13187257f0f50e7344210dfc5bb25aee9e2b71 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c62c9b6aca78822a49e673ff78aa81ad1751f68 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652c12ceaef99ff11a4ad59a8228c95feabdac078d6a91bb072cd9fa03af2ce3 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2c416e45a0b3afef8928c5d4768ea2589e174db8 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/trainer_state.json @@ -0,0 +1,805 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 56709, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + }, + { + "epoch": 1.03, + "learning_rate": 1.8080307150855866e-05, + "loss": 0.0142, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.792033274676052e-05, + "loss": 0.0113, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7760358342665175e-05, + "loss": 0.0118, + "step": 7500 + }, + { + "epoch": 1.27, + "learning_rate": 1.7600383938569828e-05, + "loss": 0.0115, + "step": 8000 + }, + { + "epoch": 1.35, + "learning_rate": 1.7440409534474484e-05, + "loss": 0.0118, + "step": 8500 + }, + { + "epoch": 1.43, + "learning_rate": 1.728043513037914e-05, + "loss": 0.0112, + "step": 9000 + }, + { + "epoch": 1.51, + "learning_rate": 1.7120460726283797e-05, + "loss": 0.0112, + "step": 9500 + }, + { + "epoch": 1.59, + "learning_rate": 1.6960486322188453e-05, + "loss": 0.0115, + "step": 10000 + }, + { + "epoch": 1.67, + "learning_rate": 1.680051191809311e-05, + "loss": 0.0119, + "step": 10500 + }, + { + "epoch": 1.75, + "learning_rate": 1.6640537513997762e-05, + "loss": 0.0111, + "step": 11000 + }, + { + "epoch": 1.83, + "learning_rate": 1.6480563109902418e-05, + "loss": 0.0115, + "step": 11500 + }, + { + "epoch": 1.9, + "learning_rate": 1.632058870580707e-05, + "loss": 0.0117, + "step": 12000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6160614301711727e-05, + "loss": 0.0111, + "step": 12500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9928343580163489, + "eval_f1": 0.0, + "eval_loss": 0.020694203674793243, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.3755, + "eval_samples_per_second": 424.115, + "eval_steps_per_second": 3.315, + "step": 12602 + }, + { + "epoch": 2.06, + "learning_rate": 1.6000639897616383e-05, + "loss": 0.008, + "step": 13000 + }, + { + "epoch": 2.14, + "learning_rate": 1.5840665493521036e-05, + "loss": 0.0079, + "step": 13500 + }, + { + "epoch": 2.22, + "learning_rate": 1.5680691089425692e-05, + "loss": 0.0076, + "step": 14000 + }, + { + "epoch": 2.3, + "learning_rate": 1.552071668533035e-05, + "loss": 0.0078, + "step": 14500 + }, + { + "epoch": 2.38, + "learning_rate": 1.5360742281235005e-05, + "loss": 0.0079, + "step": 15000 + }, + { + "epoch": 2.46, + "learning_rate": 1.520076787713966e-05, + "loss": 0.0078, + "step": 15500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5040793473044315e-05, + "loss": 0.0078, + "step": 16000 + }, + { + "epoch": 2.62, + "learning_rate": 1.4880819068948968e-05, + "loss": 0.0079, + "step": 16500 + }, + { + "epoch": 2.7, + "learning_rate": 1.4720844664853625e-05, + "loss": 0.0078, + "step": 17000 + }, + { + "epoch": 2.78, + "learning_rate": 1.4560870260758279e-05, + "loss": 0.0081, + "step": 17500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4400895856662935e-05, + "loss": 0.0079, + "step": 18000 + }, + { + "epoch": 2.94, + "learning_rate": 1.4240921452567591e-05, + "loss": 0.0077, + "step": 18500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9924637062899773, + "eval_f1": 0.0, + "eval_loss": 0.02258981019258499, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.2433, + "eval_samples_per_second": 422.455, + "eval_steps_per_second": 3.302, + "step": 18903 + }, + { + "epoch": 3.02, + "learning_rate": 1.4080947048472246e-05, + "loss": 0.0072, + "step": 19000 + }, + { + "epoch": 3.09, + "learning_rate": 1.39209726443769e-05, + "loss": 0.0053, + "step": 19500 + }, + { + "epoch": 3.17, + "learning_rate": 1.3760998240281555e-05, + "loss": 0.0054, + "step": 20000 + }, + { + "epoch": 3.25, + "learning_rate": 1.3601023836186211e-05, + "loss": 0.0053, + "step": 20500 + }, + { + "epoch": 3.33, + "learning_rate": 1.3441049432090867e-05, + "loss": 0.005, + "step": 21000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3281075027995522e-05, + "loss": 0.0058, + "step": 21500 + }, + { + "epoch": 3.49, + "learning_rate": 1.3121100623900178e-05, + "loss": 0.0054, + "step": 22000 + }, + { + "epoch": 3.57, + "learning_rate": 1.2961126219804831e-05, + "loss": 0.0055, + "step": 22500 + }, + { + "epoch": 3.65, + "learning_rate": 1.2801151815709487e-05, + "loss": 0.0056, + "step": 23000 + }, + { + "epoch": 3.73, + "learning_rate": 1.2641177411614143e-05, + "loss": 0.0056, + "step": 23500 + }, + { + "epoch": 3.81, + "learning_rate": 1.2481203007518798e-05, + "loss": 0.0058, + "step": 24000 + }, + { + "epoch": 3.89, + "learning_rate": 1.2321228603423454e-05, + "loss": 0.0052, + "step": 24500 + }, + { + "epoch": 3.97, + "learning_rate": 1.2161254199328107e-05, + "loss": 0.0052, + "step": 25000 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9926866218329602, + "eval_f1": 0.0, + "eval_loss": 0.02687273919582367, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.5012, + "eval_samples_per_second": 424.003, + "eval_steps_per_second": 3.314, + "step": 25204 + }, + { + "epoch": 4.05, + "learning_rate": 1.2001279795232763e-05, + "loss": 0.0042, + "step": 25500 + }, + { + "epoch": 4.13, + "learning_rate": 1.184130539113742e-05, + "loss": 0.0037, + "step": 26000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1681330987042074e-05, + "loss": 0.0039, + "step": 26500 + }, + { + "epoch": 4.29, + "learning_rate": 1.152135658294673e-05, + "loss": 0.0038, + "step": 27000 + }, + { + "epoch": 4.36, + "learning_rate": 1.1361382178851386e-05, + "loss": 0.0041, + "step": 27500 + }, + { + "epoch": 4.44, + "learning_rate": 1.120140777475604e-05, + "loss": 0.0041, + "step": 28000 + }, + { + "epoch": 4.52, + "learning_rate": 1.1041433370660695e-05, + "loss": 0.0042, + "step": 28500 + }, + { + "epoch": 4.6, + "learning_rate": 1.088145896656535e-05, + "loss": 0.0039, + "step": 29000 + }, + { + "epoch": 4.68, + "learning_rate": 1.0721484562470006e-05, + "loss": 0.0044, + "step": 29500 + }, + { + "epoch": 4.76, + "learning_rate": 1.056151015837466e-05, + "loss": 0.0043, + "step": 30000 + }, + { + "epoch": 4.84, + "learning_rate": 1.0401535754279317e-05, + "loss": 0.0042, + "step": 30500 + }, + { + "epoch": 4.92, + "learning_rate": 1.024156135018397e-05, + "loss": 0.0045, + "step": 31000 + }, + { + "epoch": 5.0, + "learning_rate": 1.0081586946088626e-05, + "loss": 0.0041, + "step": 31500 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9926359194741641, + "eval_f1": 0.0, + "eval_loss": 0.029035158455371857, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 469.3362, + "eval_samples_per_second": 429.573, + "eval_steps_per_second": 3.358, + "step": 31505 + }, + { + "epoch": 5.08, + "learning_rate": 9.921612541993282e-06, + "loss": 0.0032, + "step": 32000 + }, + { + "epoch": 5.16, + "learning_rate": 9.761638137897937e-06, + "loss": 0.0028, + "step": 32500 + }, + { + "epoch": 5.24, + "learning_rate": 9.601663733802591e-06, + "loss": 0.003, + "step": 33000 + }, + { + "epoch": 5.32, + "learning_rate": 9.441689329707247e-06, + "loss": 0.0031, + "step": 33500 + }, + { + "epoch": 5.4, + "learning_rate": 9.281714925611904e-06, + "loss": 0.0031, + "step": 34000 + }, + { + "epoch": 5.48, + "learning_rate": 9.121740521516558e-06, + "loss": 0.0032, + "step": 34500 + }, + { + "epoch": 5.55, + "learning_rate": 8.961766117421213e-06, + "loss": 0.0031, + "step": 35000 + }, + { + "epoch": 5.63, + "learning_rate": 8.801791713325869e-06, + "loss": 0.0031, + "step": 35500 + }, + { + "epoch": 5.71, + "learning_rate": 8.641817309230523e-06, + "loss": 0.0032, + "step": 36000 + }, + { + "epoch": 5.79, + "learning_rate": 8.48184290513518e-06, + "loss": 0.0032, + "step": 36500 + }, + { + "epoch": 5.87, + "learning_rate": 8.321868501039834e-06, + "loss": 0.0031, + "step": 37000 + }, + { + "epoch": 5.95, + "learning_rate": 8.161894096944489e-06, + "loss": 0.0034, + "step": 37500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9926551514033626, + "eval_f1": 0.0, + "eval_loss": 0.03292802721261978, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.0131, + "eval_samples_per_second": 424.439, + "eval_steps_per_second": 3.318, + "step": 37806 + }, + { + "epoch": 6.03, + "learning_rate": 8.001919692849145e-06, + "loss": 0.003, + "step": 38000 + }, + { + "epoch": 6.11, + "learning_rate": 7.841945288753801e-06, + "loss": 0.0025, + "step": 38500 + }, + { + "epoch": 6.19, + "learning_rate": 7.681970884658456e-06, + "loss": 0.0023, + "step": 39000 + }, + { + "epoch": 6.27, + "learning_rate": 7.521996480563111e-06, + "loss": 0.0026, + "step": 39500 + }, + { + "epoch": 6.35, + "learning_rate": 7.3620220764677655e-06, + "loss": 0.0026, + "step": 40000 + }, + { + "epoch": 6.43, + "learning_rate": 7.202047672372421e-06, + "loss": 0.0027, + "step": 40500 + }, + { + "epoch": 6.51, + "learning_rate": 7.042073268277076e-06, + "loss": 0.0024, + "step": 41000 + }, + { + "epoch": 6.59, + "learning_rate": 6.882098864181731e-06, + "loss": 0.0026, + "step": 41500 + }, + { + "epoch": 6.67, + "learning_rate": 6.722124460086387e-06, + "loss": 0.0025, + "step": 42000 + }, + { + "epoch": 6.74, + "learning_rate": 6.562150055991042e-06, + "loss": 0.0026, + "step": 42500 + }, + { + "epoch": 6.82, + "learning_rate": 6.402175651895697e-06, + "loss": 0.0025, + "step": 43000 + }, + { + "epoch": 6.9, + "learning_rate": 6.242201247800352e-06, + "loss": 0.0024, + "step": 43500 + }, + { + "epoch": 6.98, + "learning_rate": 6.0822268437050084e-06, + "loss": 0.0025, + "step": 44000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9924776931475762, + "eval_f1": 0.0, + "eval_loss": 0.04146205633878708, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.72, + "eval_samples_per_second": 422.034, + "eval_steps_per_second": 3.299, + "step": 44107 + }, + { + "epoch": 7.06, + "learning_rate": 5.922252439609663e-06, + "loss": 0.002, + "step": 44500 + }, + { + "epoch": 7.14, + "learning_rate": 5.762278035514318e-06, + "loss": 0.0019, + "step": 45000 + }, + { + "epoch": 7.22, + "learning_rate": 5.602303631418974e-06, + "loss": 0.0021, + "step": 45500 + }, + { + "epoch": 7.3, + "learning_rate": 5.442329227323628e-06, + "loss": 0.0021, + "step": 46000 + }, + { + "epoch": 7.38, + "learning_rate": 5.282354823228284e-06, + "loss": 0.0021, + "step": 46500 + }, + { + "epoch": 7.46, + "learning_rate": 5.12238041913294e-06, + "loss": 0.002, + "step": 47000 + }, + { + "epoch": 7.54, + "learning_rate": 4.962406015037594e-06, + "loss": 0.0019, + "step": 47500 + }, + { + "epoch": 7.62, + "learning_rate": 4.80243161094225e-06, + "loss": 0.002, + "step": 48000 + }, + { + "epoch": 7.7, + "learning_rate": 4.642457206846905e-06, + "loss": 0.002, + "step": 48500 + }, + { + "epoch": 7.78, + "learning_rate": 4.4824828027515596e-06, + "loss": 0.0023, + "step": 49000 + }, + { + "epoch": 7.86, + "learning_rate": 4.322508398656216e-06, + "loss": 0.002, + "step": 49500 + }, + { + "epoch": 7.94, + "learning_rate": 4.16253399456087e-06, + "loss": 0.002, + "step": 50000 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9924226198957804, + "eval_f1": 0.0, + "eval_loss": 0.044451288878917694, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 469.8304, + "eval_samples_per_second": 429.121, + "eval_steps_per_second": 3.354, + "step": 50408 + }, + { + "epoch": 8.01, + "learning_rate": 4.002559590465526e-06, + "loss": 0.002, + "step": 50500 + }, + { + "epoch": 8.09, + "learning_rate": 3.842585186370181e-06, + "loss": 0.0016, + "step": 51000 + }, + { + "epoch": 8.17, + "learning_rate": 3.6826107822748364e-06, + "loss": 0.0018, + "step": 51500 + }, + { + "epoch": 8.25, + "learning_rate": 3.5226363781794914e-06, + "loss": 0.0016, + "step": 52000 + }, + { + "epoch": 8.33, + "learning_rate": 3.362661974084147e-06, + "loss": 0.0017, + "step": 52500 + }, + { + "epoch": 8.41, + "learning_rate": 3.202687569988802e-06, + "loss": 0.0016, + "step": 53000 + }, + { + "epoch": 8.49, + "learning_rate": 3.042713165893457e-06, + "loss": 0.0019, + "step": 53500 + }, + { + "epoch": 8.57, + "learning_rate": 2.8827387617981124e-06, + "loss": 0.0017, + "step": 54000 + }, + { + "epoch": 8.65, + "learning_rate": 2.7227643577027678e-06, + "loss": 0.0016, + "step": 54500 + }, + { + "epoch": 8.73, + "learning_rate": 2.562789953607423e-06, + "loss": 0.0018, + "step": 55000 + }, + { + "epoch": 8.81, + "learning_rate": 2.4028155495120785e-06, + "loss": 0.0016, + "step": 55500 + }, + { + "epoch": 8.89, + "learning_rate": 2.2428411454167334e-06, + "loss": 0.0017, + "step": 56000 + }, + { + "epoch": 8.97, + "learning_rate": 2.082866741321389e-06, + "loss": 0.0016, + "step": 56500 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.9922128170317965, + "eval_f1": 0.0, + "eval_loss": 0.04637894406914711, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 473.5258, + "eval_samples_per_second": 425.772, + "eval_steps_per_second": 3.328, + "step": 56709 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5.889595203837487e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-56709/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..271f1d6ce82b80b47d4f4ee5cb9118bc96529a84 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff4821fb2ed994c08c4ccef288236ca9e240e4ad564244b2c7e2ac3d27fa6a4 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..88cbf942817c9d846c03cd4e5f8707604104db1d --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8585220f023e363455a4da91c15574f2ea513dbeaee0285b19dc6816debe380a +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c02ec3aa160e16fb05167e422989753fd05e0a81 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca0c7ac3730770272690c09b313a6b2aa60dd10d7ddc6e00dbd4c3589e0e1dc3 +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7a88a8b3ec93f8c94f1cf80213af3e79e83825a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b837532c6edce019d98248ec454eb0673d95df48435463e13280b2a0bd81ce +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..27c6e59255d06e234917920e892f0a9a89197f7a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/trainer_state.json @@ -0,0 +1,103 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 6301, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 6.5439946709305416e+16, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-6301/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/config.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d950686f1f6295fdc397a1769300519970cd446 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "DeepPavlov/rubert-base-cased-conversational", + "architectures": [ + "BertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "U" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "O": 0, + "U": 1 + }, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 119547 +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/optimizer.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6815970c16e0d4c6b0c0778c99cec5038bbae57 --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca9ccd938c3a477b684b0afaac9b2711b53e3e36fb19c33b240753acc7c486e3 +size 1418281093 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/pytorch_model.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f57a67d97268201f8d6337e405934dde9b3d5cdb --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e60ed2c22b5e2f5746acd1be7c02f016aa87b189f7d7c7ba1a9281c9825ece7 +size 709125289 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/rng_state.pth b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..63dab72555e246feab76ec6fc49c45c9f337f42a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e14930242e6522c83348ea98898428ec4baa299b70b4569c171e01a7bd967e +size 14575 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/scheduler.pt b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a19a775e5a8172d1e3cf2381ca027ee709f8e86a --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d000deffbd2b6faf081f224b015b66e8a07a882bf20733ea59a82259036c4914 +size 627 diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/trainer_state.json b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6f5b7991d87ece1fd7b172d1bac1fdd8b808928d --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/trainer_state.json @@ -0,0 +1,895 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 63010, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 0.131, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9840025595904658e-05, + "loss": 0.0217, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9680051191809314e-05, + "loss": 0.0205, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 1.9520076787713967e-05, + "loss": 0.0187, + "step": 2000 + }, + { + "epoch": 0.4, + "learning_rate": 1.9360102383618623e-05, + "loss": 0.0177, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 1.9200127979523276e-05, + "loss": 0.0172, + "step": 3000 + }, + { + "epoch": 0.56, + "learning_rate": 1.9040153575427932e-05, + "loss": 0.0178, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 1.888017917133259e-05, + "loss": 0.0167, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 1.8720204767237245e-05, + "loss": 0.0166, + "step": 4500 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560230363141897e-05, + "loss": 0.0163, + "step": 5000 + }, + { + "epoch": 0.87, + "learning_rate": 1.8400255959046554e-05, + "loss": 0.0162, + "step": 5500 + }, + { + "epoch": 0.95, + "learning_rate": 1.824028155495121e-05, + "loss": 0.0158, + "step": 6000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9927355758345564, + "eval_f1": 0.0, + "eval_loss": 0.018518663942813873, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.4322, + "eval_samples_per_second": 424.065, + "eval_steps_per_second": 3.315, + "step": 6301 + }, + { + "epoch": 1.03, + "learning_rate": 1.8080307150855866e-05, + "loss": 0.0142, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.792033274676052e-05, + "loss": 0.0113, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7760358342665175e-05, + "loss": 0.0118, + "step": 7500 + }, + { + "epoch": 1.27, + "learning_rate": 1.7600383938569828e-05, + "loss": 0.0115, + "step": 8000 + }, + { + "epoch": 1.35, + "learning_rate": 1.7440409534474484e-05, + "loss": 0.0118, + "step": 8500 + }, + { + "epoch": 1.43, + "learning_rate": 1.728043513037914e-05, + "loss": 0.0112, + "step": 9000 + }, + { + "epoch": 1.51, + "learning_rate": 1.7120460726283797e-05, + "loss": 0.0112, + "step": 9500 + }, + { + "epoch": 1.59, + "learning_rate": 1.6960486322188453e-05, + "loss": 0.0115, + "step": 10000 + }, + { + "epoch": 1.67, + "learning_rate": 1.680051191809311e-05, + "loss": 0.0119, + "step": 10500 + }, + { + "epoch": 1.75, + "learning_rate": 1.6640537513997762e-05, + "loss": 0.0111, + "step": 11000 + }, + { + "epoch": 1.83, + "learning_rate": 1.6480563109902418e-05, + "loss": 0.0115, + "step": 11500 + }, + { + "epoch": 1.9, + "learning_rate": 1.632058870580707e-05, + "loss": 0.0117, + "step": 12000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6160614301711727e-05, + "loss": 0.0111, + "step": 12500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9928343580163489, + "eval_f1": 0.0, + "eval_loss": 0.020694203674793243, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.3755, + "eval_samples_per_second": 424.115, + "eval_steps_per_second": 3.315, + "step": 12602 + }, + { + "epoch": 2.06, + "learning_rate": 1.6000639897616383e-05, + "loss": 0.008, + "step": 13000 + }, + { + "epoch": 2.14, + "learning_rate": 1.5840665493521036e-05, + "loss": 0.0079, + "step": 13500 + }, + { + "epoch": 2.22, + "learning_rate": 1.5680691089425692e-05, + "loss": 0.0076, + "step": 14000 + }, + { + "epoch": 2.3, + "learning_rate": 1.552071668533035e-05, + "loss": 0.0078, + "step": 14500 + }, + { + "epoch": 2.38, + "learning_rate": 1.5360742281235005e-05, + "loss": 0.0079, + "step": 15000 + }, + { + "epoch": 2.46, + "learning_rate": 1.520076787713966e-05, + "loss": 0.0078, + "step": 15500 + }, + { + "epoch": 2.54, + "learning_rate": 1.5040793473044315e-05, + "loss": 0.0078, + "step": 16000 + }, + { + "epoch": 2.62, + "learning_rate": 1.4880819068948968e-05, + "loss": 0.0079, + "step": 16500 + }, + { + "epoch": 2.7, + "learning_rate": 1.4720844664853625e-05, + "loss": 0.0078, + "step": 17000 + }, + { + "epoch": 2.78, + "learning_rate": 1.4560870260758279e-05, + "loss": 0.0081, + "step": 17500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4400895856662935e-05, + "loss": 0.0079, + "step": 18000 + }, + { + "epoch": 2.94, + "learning_rate": 1.4240921452567591e-05, + "loss": 0.0077, + "step": 18500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9924637062899773, + "eval_f1": 0.0, + "eval_loss": 0.02258981019258499, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.2433, + "eval_samples_per_second": 422.455, + "eval_steps_per_second": 3.302, + "step": 18903 + }, + { + "epoch": 3.02, + "learning_rate": 1.4080947048472246e-05, + "loss": 0.0072, + "step": 19000 + }, + { + "epoch": 3.09, + "learning_rate": 1.39209726443769e-05, + "loss": 0.0053, + "step": 19500 + }, + { + "epoch": 3.17, + "learning_rate": 1.3760998240281555e-05, + "loss": 0.0054, + "step": 20000 + }, + { + "epoch": 3.25, + "learning_rate": 1.3601023836186211e-05, + "loss": 0.0053, + "step": 20500 + }, + { + "epoch": 3.33, + "learning_rate": 1.3441049432090867e-05, + "loss": 0.005, + "step": 21000 + }, + { + "epoch": 3.41, + "learning_rate": 1.3281075027995522e-05, + "loss": 0.0058, + "step": 21500 + }, + { + "epoch": 3.49, + "learning_rate": 1.3121100623900178e-05, + "loss": 0.0054, + "step": 22000 + }, + { + "epoch": 3.57, + "learning_rate": 1.2961126219804831e-05, + "loss": 0.0055, + "step": 22500 + }, + { + "epoch": 3.65, + "learning_rate": 1.2801151815709487e-05, + "loss": 0.0056, + "step": 23000 + }, + { + "epoch": 3.73, + "learning_rate": 1.2641177411614143e-05, + "loss": 0.0056, + "step": 23500 + }, + { + "epoch": 3.81, + "learning_rate": 1.2481203007518798e-05, + "loss": 0.0058, + "step": 24000 + }, + { + "epoch": 3.89, + "learning_rate": 1.2321228603423454e-05, + "loss": 0.0052, + "step": 24500 + }, + { + "epoch": 3.97, + "learning_rate": 1.2161254199328107e-05, + "loss": 0.0052, + "step": 25000 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9926866218329602, + "eval_f1": 0.0, + "eval_loss": 0.02687273919582367, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.5012, + "eval_samples_per_second": 424.003, + "eval_steps_per_second": 3.314, + "step": 25204 + }, + { + "epoch": 4.05, + "learning_rate": 1.2001279795232763e-05, + "loss": 0.0042, + "step": 25500 + }, + { + "epoch": 4.13, + "learning_rate": 1.184130539113742e-05, + "loss": 0.0037, + "step": 26000 + }, + { + "epoch": 4.21, + "learning_rate": 1.1681330987042074e-05, + "loss": 0.0039, + "step": 26500 + }, + { + "epoch": 4.29, + "learning_rate": 1.152135658294673e-05, + "loss": 0.0038, + "step": 27000 + }, + { + "epoch": 4.36, + "learning_rate": 1.1361382178851386e-05, + "loss": 0.0041, + "step": 27500 + }, + { + "epoch": 4.44, + "learning_rate": 1.120140777475604e-05, + "loss": 0.0041, + "step": 28000 + }, + { + "epoch": 4.52, + "learning_rate": 1.1041433370660695e-05, + "loss": 0.0042, + "step": 28500 + }, + { + "epoch": 4.6, + "learning_rate": 1.088145896656535e-05, + "loss": 0.0039, + "step": 29000 + }, + { + "epoch": 4.68, + "learning_rate": 1.0721484562470006e-05, + "loss": 0.0044, + "step": 29500 + }, + { + "epoch": 4.76, + "learning_rate": 1.056151015837466e-05, + "loss": 0.0043, + "step": 30000 + }, + { + "epoch": 4.84, + "learning_rate": 1.0401535754279317e-05, + "loss": 0.0042, + "step": 30500 + }, + { + "epoch": 4.92, + "learning_rate": 1.024156135018397e-05, + "loss": 0.0045, + "step": 31000 + }, + { + "epoch": 5.0, + "learning_rate": 1.0081586946088626e-05, + "loss": 0.0041, + "step": 31500 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9926359194741641, + "eval_f1": 0.0, + "eval_loss": 0.029035158455371857, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 469.3362, + "eval_samples_per_second": 429.573, + "eval_steps_per_second": 3.358, + "step": 31505 + }, + { + "epoch": 5.08, + "learning_rate": 9.921612541993282e-06, + "loss": 0.0032, + "step": 32000 + }, + { + "epoch": 5.16, + "learning_rate": 9.761638137897937e-06, + "loss": 0.0028, + "step": 32500 + }, + { + "epoch": 5.24, + "learning_rate": 9.601663733802591e-06, + "loss": 0.003, + "step": 33000 + }, + { + "epoch": 5.32, + "learning_rate": 9.441689329707247e-06, + "loss": 0.0031, + "step": 33500 + }, + { + "epoch": 5.4, + "learning_rate": 9.281714925611904e-06, + "loss": 0.0031, + "step": 34000 + }, + { + "epoch": 5.48, + "learning_rate": 9.121740521516558e-06, + "loss": 0.0032, + "step": 34500 + }, + { + "epoch": 5.55, + "learning_rate": 8.961766117421213e-06, + "loss": 0.0031, + "step": 35000 + }, + { + "epoch": 5.63, + "learning_rate": 8.801791713325869e-06, + "loss": 0.0031, + "step": 35500 + }, + { + "epoch": 5.71, + "learning_rate": 8.641817309230523e-06, + "loss": 0.0032, + "step": 36000 + }, + { + "epoch": 5.79, + "learning_rate": 8.48184290513518e-06, + "loss": 0.0032, + "step": 36500 + }, + { + "epoch": 5.87, + "learning_rate": 8.321868501039834e-06, + "loss": 0.0031, + "step": 37000 + }, + { + "epoch": 5.95, + "learning_rate": 8.161894096944489e-06, + "loss": 0.0034, + "step": 37500 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9926551514033626, + "eval_f1": 0.0, + "eval_loss": 0.03292802721261978, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 475.0131, + "eval_samples_per_second": 424.439, + "eval_steps_per_second": 3.318, + "step": 37806 + }, + { + "epoch": 6.03, + "learning_rate": 8.001919692849145e-06, + "loss": 0.003, + "step": 38000 + }, + { + "epoch": 6.11, + "learning_rate": 7.841945288753801e-06, + "loss": 0.0025, + "step": 38500 + }, + { + "epoch": 6.19, + "learning_rate": 7.681970884658456e-06, + "loss": 0.0023, + "step": 39000 + }, + { + "epoch": 6.27, + "learning_rate": 7.521996480563111e-06, + "loss": 0.0026, + "step": 39500 + }, + { + "epoch": 6.35, + "learning_rate": 7.3620220764677655e-06, + "loss": 0.0026, + "step": 40000 + }, + { + "epoch": 6.43, + "learning_rate": 7.202047672372421e-06, + "loss": 0.0027, + "step": 40500 + }, + { + "epoch": 6.51, + "learning_rate": 7.042073268277076e-06, + "loss": 0.0024, + "step": 41000 + }, + { + "epoch": 6.59, + "learning_rate": 6.882098864181731e-06, + "loss": 0.0026, + "step": 41500 + }, + { + "epoch": 6.67, + "learning_rate": 6.722124460086387e-06, + "loss": 0.0025, + "step": 42000 + }, + { + "epoch": 6.74, + "learning_rate": 6.562150055991042e-06, + "loss": 0.0026, + "step": 42500 + }, + { + "epoch": 6.82, + "learning_rate": 6.402175651895697e-06, + "loss": 0.0025, + "step": 43000 + }, + { + "epoch": 6.9, + "learning_rate": 6.242201247800352e-06, + "loss": 0.0024, + "step": 43500 + }, + { + "epoch": 6.98, + "learning_rate": 6.0822268437050084e-06, + "loss": 0.0025, + "step": 44000 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9924776931475762, + "eval_f1": 0.0, + "eval_loss": 0.04146205633878708, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 477.72, + "eval_samples_per_second": 422.034, + "eval_steps_per_second": 3.299, + "step": 44107 + }, + { + "epoch": 7.06, + "learning_rate": 5.922252439609663e-06, + "loss": 0.002, + "step": 44500 + }, + { + "epoch": 7.14, + "learning_rate": 5.762278035514318e-06, + "loss": 0.0019, + "step": 45000 + }, + { + "epoch": 7.22, + "learning_rate": 5.602303631418974e-06, + "loss": 0.0021, + "step": 45500 + }, + { + "epoch": 7.3, + "learning_rate": 5.442329227323628e-06, + "loss": 0.0021, + "step": 46000 + }, + { + "epoch": 7.38, + "learning_rate": 5.282354823228284e-06, + "loss": 0.0021, + "step": 46500 + }, + { + "epoch": 7.46, + "learning_rate": 5.12238041913294e-06, + "loss": 0.002, + "step": 47000 + }, + { + "epoch": 7.54, + "learning_rate": 4.962406015037594e-06, + "loss": 0.0019, + "step": 47500 + }, + { + "epoch": 7.62, + "learning_rate": 4.80243161094225e-06, + "loss": 0.002, + "step": 48000 + }, + { + "epoch": 7.7, + "learning_rate": 4.642457206846905e-06, + "loss": 0.002, + "step": 48500 + }, + { + "epoch": 7.78, + "learning_rate": 4.4824828027515596e-06, + "loss": 0.0023, + "step": 49000 + }, + { + "epoch": 7.86, + "learning_rate": 4.322508398656216e-06, + "loss": 0.002, + "step": 49500 + }, + { + "epoch": 7.94, + "learning_rate": 4.16253399456087e-06, + "loss": 0.002, + "step": 50000 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9924226198957804, + "eval_f1": 0.0, + "eval_loss": 0.044451288878917694, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 469.8304, + "eval_samples_per_second": 429.121, + "eval_steps_per_second": 3.354, + "step": 50408 + }, + { + "epoch": 8.01, + "learning_rate": 4.002559590465526e-06, + "loss": 0.002, + "step": 50500 + }, + { + "epoch": 8.09, + "learning_rate": 3.842585186370181e-06, + "loss": 0.0016, + "step": 51000 + }, + { + "epoch": 8.17, + "learning_rate": 3.6826107822748364e-06, + "loss": 0.0018, + "step": 51500 + }, + { + "epoch": 8.25, + "learning_rate": 3.5226363781794914e-06, + "loss": 0.0016, + "step": 52000 + }, + { + "epoch": 8.33, + "learning_rate": 3.362661974084147e-06, + "loss": 0.0017, + "step": 52500 + }, + { + "epoch": 8.41, + "learning_rate": 3.202687569988802e-06, + "loss": 0.0016, + "step": 53000 + }, + { + "epoch": 8.49, + "learning_rate": 3.042713165893457e-06, + "loss": 0.0019, + "step": 53500 + }, + { + "epoch": 8.57, + "learning_rate": 2.8827387617981124e-06, + "loss": 0.0017, + "step": 54000 + }, + { + "epoch": 8.65, + "learning_rate": 2.7227643577027678e-06, + "loss": 0.0016, + "step": 54500 + }, + { + "epoch": 8.73, + "learning_rate": 2.562789953607423e-06, + "loss": 0.0018, + "step": 55000 + }, + { + "epoch": 8.81, + "learning_rate": 2.4028155495120785e-06, + "loss": 0.0016, + "step": 55500 + }, + { + "epoch": 8.89, + "learning_rate": 2.2428411454167334e-06, + "loss": 0.0017, + "step": 56000 + }, + { + "epoch": 8.97, + "learning_rate": 2.082866741321389e-06, + "loss": 0.0016, + "step": 56500 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.9922128170317965, + "eval_f1": 0.0, + "eval_loss": 0.04637894406914711, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 473.5258, + "eval_samples_per_second": 425.772, + "eval_steps_per_second": 3.328, + "step": 56709 + }, + { + "epoch": 9.05, + "learning_rate": 1.922892337226044e-06, + "loss": 0.0017, + "step": 57000 + }, + { + "epoch": 9.13, + "learning_rate": 1.7629179331306991e-06, + "loss": 0.0016, + "step": 57500 + }, + { + "epoch": 9.2, + "learning_rate": 1.6029435290353545e-06, + "loss": 0.0014, + "step": 58000 + }, + { + "epoch": 9.28, + "learning_rate": 1.4429691249400096e-06, + "loss": 0.0013, + "step": 58500 + }, + { + "epoch": 9.36, + "learning_rate": 1.282994720844665e-06, + "loss": 0.0013, + "step": 59000 + }, + { + "epoch": 9.44, + "learning_rate": 1.1230203167493202e-06, + "loss": 0.0013, + "step": 59500 + }, + { + "epoch": 9.52, + "learning_rate": 9.630459126539753e-07, + "loss": 0.0015, + "step": 60000 + }, + { + "epoch": 9.6, + "learning_rate": 8.030715085586308e-07, + "loss": 0.0015, + "step": 60500 + }, + { + "epoch": 9.68, + "learning_rate": 6.43097104463286e-07, + "loss": 0.0015, + "step": 61000 + }, + { + "epoch": 9.76, + "learning_rate": 4.831227003679412e-07, + "loss": 0.0015, + "step": 61500 + }, + { + "epoch": 9.84, + "learning_rate": 3.2314829627259637e-07, + "loss": 0.0013, + "step": 62000 + }, + { + "epoch": 9.92, + "learning_rate": 1.6317389217725163e-07, + "loss": 0.0012, + "step": 62500 + }, + { + "epoch": 10.0, + "learning_rate": 3.199488081906895e-09, + "loss": 0.0015, + "step": 63000 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9924444743607788, + "eval_f1": 0.0, + "eval_loss": 0.05045689642429352, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 476.1579, + "eval_samples_per_second": 423.418, + "eval_steps_per_second": 3.31, + "step": 63010 + } + ], + "logging_steps": 500, + "max_steps": 63010, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 6.543994670930542e+17, + "trial_name": null, + "trial_params": null +} diff --git a/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/training_args.bin b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..353304a1247e3b1c2c9c128593afbb80600a515b --- /dev/null +++ b/rubert-base-cased-conversational-512-tatoeba_dataset/20-30-05/checkpoint-63010/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ae7a0bcc8e5c0968aa51ccef92422d7d995099a8b793f3c4819a8985402b6f +size 4027