Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- deberta-v3-finetuned/fold_0/0/checkpoint-500/config.json +42 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/optimizer.pt +3 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/rng_state.pth +3 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/scheduler.pt +3 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/trainer_state.json +70 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/training_args.bin +3 -0
- deberta-v3-finetuned/fold_0/config.json +42 -0
- deberta-v3-finetuned/fold_0/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_0/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_0/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_0/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/config.json +42 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/optimizer.pt +3 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/rng_state.pth +3 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/scheduler.pt +3 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/trainer_state.json +121 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/training_args.bin +3 -0
- deberta-v3-finetuned/fold_1/config.json +42 -0
- deberta-v3-finetuned/fold_1/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_1/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_1/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_1/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/config.json +42 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/optimizer.pt +3 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/rng_state.pth +3 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/scheduler.pt +3 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/trainer_state.json +55 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/training_args.bin +3 -0
- deberta-v3-finetuned/fold_2/config.json +42 -0
- deberta-v3-finetuned/fold_2/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_2/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_2/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_2/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/config.json +42 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/optimizer.pt +3 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/rng_state.pth +3 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/scheduler.pt +3 -0
deberta-v3-finetuned/fold_0/0/checkpoint-500/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DebertaV2ForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.007,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.007,
|
| 9 |
+
"hidden_size": 1024,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "LABEL_0"
|
| 12 |
+
},
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"label2id": {
|
| 16 |
+
"LABEL_0": 0
|
| 17 |
+
},
|
| 18 |
+
"layer_norm_eps": 1e-07,
|
| 19 |
+
"max_position_embeddings": 512,
|
| 20 |
+
"max_relative_positions": -1,
|
| 21 |
+
"model_type": "deberta-v2",
|
| 22 |
+
"norm_rel_ebd": "layer_norm",
|
| 23 |
+
"num_attention_heads": 16,
|
| 24 |
+
"num_hidden_layers": 24,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"pooler_dropout": 0,
|
| 27 |
+
"pooler_hidden_act": "gelu",
|
| 28 |
+
"pooler_hidden_size": 1024,
|
| 29 |
+
"pos_att_type": [
|
| 30 |
+
"p2c",
|
| 31 |
+
"c2p"
|
| 32 |
+
],
|
| 33 |
+
"position_biased_input": false,
|
| 34 |
+
"position_buckets": 256,
|
| 35 |
+
"problem_type": "regression",
|
| 36 |
+
"relative_attention": true,
|
| 37 |
+
"share_att_key": true,
|
| 38 |
+
"torch_dtype": "float32",
|
| 39 |
+
"transformers_version": "4.32.1",
|
| 40 |
+
"type_vocab_size": 0,
|
| 41 |
+
"vocab_size": 128100
|
| 42 |
+
}
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ce4b3555aaabd6ebf364eef18675e77db495a9c88a5af48454ebfefcddf1b52
|
| 3 |
+
size 3480831547
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdc5261521c4981b187281dea24a24d8290fd2c4e222a5efff1cf0e9234c7cf5
|
| 3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51783737039aaae77df9f6cc876318bdb54431cf6e9bffdfbb995a59239ef270
|
| 3 |
+
size 14575
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af2fa603561d1610ba73b457cac52ea6a0ab7ffa9c9c41a75a141811fc0185a3
|
| 3 |
+
size 627
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/special_tokens_map.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": "[UNK]"
|
| 9 |
+
}
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer_config.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"clean_up_tokenization_spaces": true,
|
| 4 |
+
"cls_token": "[CLS]",
|
| 5 |
+
"do_lower_case": false,
|
| 6 |
+
"eos_token": "[SEP]",
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 9 |
+
"pad_token": "[PAD]",
|
| 10 |
+
"sep_token": "[SEP]",
|
| 11 |
+
"sp_model_kwargs": {},
|
| 12 |
+
"split_by_punct": false,
|
| 13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 14 |
+
"unk_token": "[UNK]",
|
| 15 |
+
"vocab_type": "spm"
|
| 16 |
+
}
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/trainer_state.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 0.4025963544845581,
|
| 3 |
+
"best_model_checkpoint": "/gpfs/home/jc3821/kaggle/content/deberta-v3-finetuned/fold_0/0/checkpoint-500",
|
| 4 |
+
"epoch": 1.953125,
|
| 5 |
+
"eval_steps": 100,
|
| 6 |
+
"global_step": 500,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.39,
|
| 13 |
+
"eval_loss": 0.20960840582847595,
|
| 14 |
+
"eval_rmse": 0.4578301012516022,
|
| 15 |
+
"eval_runtime": 22.7398,
|
| 16 |
+
"eval_samples_per_second": 90.458,
|
| 17 |
+
"eval_steps_per_second": 11.346,
|
| 18 |
+
"step": 100
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"epoch": 0.78,
|
| 22 |
+
"eval_loss": 0.23187659680843353,
|
| 23 |
+
"eval_rmse": 0.4815356731414795,
|
| 24 |
+
"eval_runtime": 22.6916,
|
| 25 |
+
"eval_samples_per_second": 90.65,
|
| 26 |
+
"eval_steps_per_second": 11.37,
|
| 27 |
+
"step": 200
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 1.17,
|
| 31 |
+
"eval_loss": 0.16903835535049438,
|
| 32 |
+
"eval_rmse": 0.4111427366733551,
|
| 33 |
+
"eval_runtime": 22.6924,
|
| 34 |
+
"eval_samples_per_second": 90.647,
|
| 35 |
+
"eval_steps_per_second": 11.369,
|
| 36 |
+
"step": 300
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.56,
|
| 40 |
+
"eval_loss": 0.3062863349914551,
|
| 41 |
+
"eval_rmse": 0.5534313917160034,
|
| 42 |
+
"eval_runtime": 22.6866,
|
| 43 |
+
"eval_samples_per_second": 90.67,
|
| 44 |
+
"eval_steps_per_second": 11.372,
|
| 45 |
+
"step": 400
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 1.95,
|
| 49 |
+
"learning_rate": 9.140625e-06,
|
| 50 |
+
"loss": 0.2205,
|
| 51 |
+
"step": 500
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 1.95,
|
| 55 |
+
"eval_loss": 0.16208384931087494,
|
| 56 |
+
"eval_rmse": 0.4025963544845581,
|
| 57 |
+
"eval_runtime": 22.6795,
|
| 58 |
+
"eval_samples_per_second": 90.699,
|
| 59 |
+
"eval_steps_per_second": 11.376,
|
| 60 |
+
"step": 500
|
| 61 |
+
}
|
| 62 |
+
],
|
| 63 |
+
"logging_steps": 500,
|
| 64 |
+
"max_steps": 1280,
|
| 65 |
+
"num_train_epochs": 5,
|
| 66 |
+
"save_steps": 100,
|
| 67 |
+
"total_flos": 5457160921939152.0,
|
| 68 |
+
"trial_name": null,
|
| 69 |
+
"trial_params": null
|
| 70 |
+
}
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0439c9395e86496a1acd5f3dc9d68a57fea982eccfb5766d700c8191ec8b133
|
| 3 |
+
size 4091
|
deberta-v3-finetuned/fold_0/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DebertaV2ForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.007,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.007,
|
| 9 |
+
"hidden_size": 1024,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "LABEL_0"
|
| 12 |
+
},
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"label2id": {
|
| 16 |
+
"LABEL_0": 0
|
| 17 |
+
},
|
| 18 |
+
"layer_norm_eps": 1e-07,
|
| 19 |
+
"max_position_embeddings": 512,
|
| 20 |
+
"max_relative_positions": -1,
|
| 21 |
+
"model_type": "deberta-v2",
|
| 22 |
+
"norm_rel_ebd": "layer_norm",
|
| 23 |
+
"num_attention_heads": 16,
|
| 24 |
+
"num_hidden_layers": 24,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"pooler_dropout": 0,
|
| 27 |
+
"pooler_hidden_act": "gelu",
|
| 28 |
+
"pooler_hidden_size": 1024,
|
| 29 |
+
"pos_att_type": [
|
| 30 |
+
"p2c",
|
| 31 |
+
"c2p"
|
| 32 |
+
],
|
| 33 |
+
"position_biased_input": false,
|
| 34 |
+
"position_buckets": 256,
|
| 35 |
+
"problem_type": "regression",
|
| 36 |
+
"relative_attention": true,
|
| 37 |
+
"share_att_key": true,
|
| 38 |
+
"torch_dtype": "float32",
|
| 39 |
+
"transformers_version": "4.32.1",
|
| 40 |
+
"type_vocab_size": 0,
|
| 41 |
+
"vocab_size": 128100
|
| 42 |
+
}
|
deberta-v3-finetuned/fold_0/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdc5261521c4981b187281dea24a24d8290fd2c4e222a5efff1cf0e9234c7cf5
|
| 3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_0/special_tokens_map.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": "[UNK]"
|
| 9 |
+
}
|
deberta-v3-finetuned/fold_0/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
deberta-v3-finetuned/fold_0/tokenizer_config.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"clean_up_tokenization_spaces": true,
|
| 4 |
+
"cls_token": "[CLS]",
|
| 5 |
+
"do_lower_case": false,
|
| 6 |
+
"eos_token": "[SEP]",
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 9 |
+
"pad_token": "[PAD]",
|
| 10 |
+
"sep_token": "[SEP]",
|
| 11 |
+
"sp_model_kwargs": {},
|
| 12 |
+
"split_by_punct": false,
|
| 13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 14 |
+
"unk_token": "[UNK]",
|
| 15 |
+
"vocab_type": "spm"
|
| 16 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DebertaV2ForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.007,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.007,
|
| 9 |
+
"hidden_size": 1024,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "LABEL_0"
|
| 12 |
+
},
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"label2id": {
|
| 16 |
+
"LABEL_0": 0
|
| 17 |
+
},
|
| 18 |
+
"layer_norm_eps": 1e-07,
|
| 19 |
+
"max_position_embeddings": 512,
|
| 20 |
+
"max_relative_positions": -1,
|
| 21 |
+
"model_type": "deberta-v2",
|
| 22 |
+
"norm_rel_ebd": "layer_norm",
|
| 23 |
+
"num_attention_heads": 16,
|
| 24 |
+
"num_hidden_layers": 24,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"pooler_dropout": 0,
|
| 27 |
+
"pooler_hidden_act": "gelu",
|
| 28 |
+
"pooler_hidden_size": 1024,
|
| 29 |
+
"pos_att_type": [
|
| 30 |
+
"p2c",
|
| 31 |
+
"c2p"
|
| 32 |
+
],
|
| 33 |
+
"position_biased_input": false,
|
| 34 |
+
"position_buckets": 256,
|
| 35 |
+
"problem_type": "regression",
|
| 36 |
+
"relative_attention": true,
|
| 37 |
+
"share_att_key": true,
|
| 38 |
+
"torch_dtype": "float32",
|
| 39 |
+
"transformers_version": "4.32.1",
|
| 40 |
+
"type_vocab_size": 0,
|
| 41 |
+
"vocab_size": 128100
|
| 42 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:635821f4fe588432a6200b884f1b1d97ceabcde528ef510f99a3074b07be7eb9
|
| 3 |
+
size 3480831547
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f2cbd3bab0adc0d8c2db3cdd23f8fc8a30712e8f7908c9a31e7d2da1698518f
|
| 3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5458d3bcbcf4f0bc302eba1f212281704d725141b083eb254d6baf69117be06c
|
| 3 |
+
size 14575
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41a3073593c1c4cceb7a03282a47eee18756b89792798e8d3b798cb70f5a3bbe
|
| 3 |
+
size 627
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": "[UNK]"
|
| 9 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"clean_up_tokenization_spaces": true,
|
| 4 |
+
"cls_token": "[CLS]",
|
| 5 |
+
"do_lower_case": false,
|
| 6 |
+
"eos_token": "[SEP]",
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 9 |
+
"pad_token": "[PAD]",
|
| 10 |
+
"sep_token": "[SEP]",
|
| 11 |
+
"sp_model_kwargs": {},
|
| 12 |
+
"split_by_punct": false,
|
| 13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 14 |
+
"unk_token": "[UNK]",
|
| 15 |
+
"vocab_type": "spm"
|
| 16 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/trainer_state.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 0.4920215308666229,
|
| 3 |
+
"best_model_checkpoint": "/gpfs/home/jc3821/kaggle/content/deberta-v3-finetuned/fold_1/1/checkpoint-1000",
|
| 4 |
+
"epoch": 3.875968992248062,
|
| 5 |
+
"eval_steps": 100,
|
| 6 |
+
"global_step": 1000,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.39,
|
| 13 |
+
"eval_loss": 0.2649173140525818,
|
| 14 |
+
"eval_rmse": 0.5147011876106262,
|
| 15 |
+
"eval_runtime": 31.3445,
|
| 16 |
+
"eval_samples_per_second": 64.094,
|
| 17 |
+
"eval_steps_per_second": 8.04,
|
| 18 |
+
"step": 100
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"epoch": 0.78,
|
| 22 |
+
"eval_loss": 0.4311714172363281,
|
| 23 |
+
"eval_rmse": 0.6566364169120789,
|
| 24 |
+
"eval_runtime": 31.3047,
|
| 25 |
+
"eval_samples_per_second": 64.176,
|
| 26 |
+
"eval_steps_per_second": 8.05,
|
| 27 |
+
"step": 200
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 1.16,
|
| 31 |
+
"eval_loss": 0.3980819880962372,
|
| 32 |
+
"eval_rmse": 0.630937397480011,
|
| 33 |
+
"eval_runtime": 31.303,
|
| 34 |
+
"eval_samples_per_second": 64.179,
|
| 35 |
+
"eval_steps_per_second": 8.05,
|
| 36 |
+
"step": 300
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.55,
|
| 40 |
+
"eval_loss": 0.2832517623901367,
|
| 41 |
+
"eval_rmse": 0.5322140455245972,
|
| 42 |
+
"eval_runtime": 31.2992,
|
| 43 |
+
"eval_samples_per_second": 64.187,
|
| 44 |
+
"eval_steps_per_second": 8.051,
|
| 45 |
+
"step": 400
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 1.94,
|
| 49 |
+
"learning_rate": 9.186046511627908e-06,
|
| 50 |
+
"loss": 0.2244,
|
| 51 |
+
"step": 500
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 1.94,
|
| 55 |
+
"eval_loss": 0.26878467202186584,
|
| 56 |
+
"eval_rmse": 0.5184444785118103,
|
| 57 |
+
"eval_runtime": 31.2847,
|
| 58 |
+
"eval_samples_per_second": 64.217,
|
| 59 |
+
"eval_steps_per_second": 8.055,
|
| 60 |
+
"step": 500
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"epoch": 2.33,
|
| 64 |
+
"eval_loss": 0.2900581955909729,
|
| 65 |
+
"eval_rmse": 0.5385705232620239,
|
| 66 |
+
"eval_runtime": 31.3051,
|
| 67 |
+
"eval_samples_per_second": 64.175,
|
| 68 |
+
"eval_steps_per_second": 8.05,
|
| 69 |
+
"step": 600
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 2.71,
|
| 73 |
+
"eval_loss": 0.3477973937988281,
|
| 74 |
+
"eval_rmse": 0.5897434949874878,
|
| 75 |
+
"eval_runtime": 31.2983,
|
| 76 |
+
"eval_samples_per_second": 64.189,
|
| 77 |
+
"eval_steps_per_second": 8.052,
|
| 78 |
+
"step": 700
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"epoch": 3.1,
|
| 82 |
+
"eval_loss": 0.27153390645980835,
|
| 83 |
+
"eval_rmse": 0.5210891366004944,
|
| 84 |
+
"eval_runtime": 31.2968,
|
| 85 |
+
"eval_samples_per_second": 64.192,
|
| 86 |
+
"eval_steps_per_second": 8.052,
|
| 87 |
+
"step": 800
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 3.49,
|
| 91 |
+
"eval_loss": 0.24941422045230865,
|
| 92 |
+
"eval_rmse": 0.4994138777256012,
|
| 93 |
+
"eval_runtime": 31.3146,
|
| 94 |
+
"eval_samples_per_second": 64.155,
|
| 95 |
+
"eval_steps_per_second": 8.047,
|
| 96 |
+
"step": 900
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"epoch": 3.88,
|
| 100 |
+
"learning_rate": 3.372093023255814e-06,
|
| 101 |
+
"loss": 0.0945,
|
| 102 |
+
"step": 1000
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"epoch": 3.88,
|
| 106 |
+
"eval_loss": 0.24208517372608185,
|
| 107 |
+
"eval_rmse": 0.4920215308666229,
|
| 108 |
+
"eval_runtime": 31.2894,
|
| 109 |
+
"eval_samples_per_second": 64.207,
|
| 110 |
+
"eval_steps_per_second": 8.054,
|
| 111 |
+
"step": 1000
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"logging_steps": 500,
|
| 115 |
+
"max_steps": 1290,
|
| 116 |
+
"num_train_epochs": 5,
|
| 117 |
+
"save_steps": 100,
|
| 118 |
+
"total_flos": 9605995008551688.0,
|
| 119 |
+
"trial_name": null,
|
| 120 |
+
"trial_params": null
|
| 121 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4eadc822c2797edd54906ce4e0b7f9ef7987f46ee2743298e1f123e0e41dc785
|
| 3 |
+
size 4091
|
deberta-v3-finetuned/fold_1/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DebertaV2ForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.007,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.007,
|
| 9 |
+
"hidden_size": 1024,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "LABEL_0"
|
| 12 |
+
},
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"label2id": {
|
| 16 |
+
"LABEL_0": 0
|
| 17 |
+
},
|
| 18 |
+
"layer_norm_eps": 1e-07,
|
| 19 |
+
"max_position_embeddings": 512,
|
| 20 |
+
"max_relative_positions": -1,
|
| 21 |
+
"model_type": "deberta-v2",
|
| 22 |
+
"norm_rel_ebd": "layer_norm",
|
| 23 |
+
"num_attention_heads": 16,
|
| 24 |
+
"num_hidden_layers": 24,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"pooler_dropout": 0,
|
| 27 |
+
"pooler_hidden_act": "gelu",
|
| 28 |
+
"pooler_hidden_size": 1024,
|
| 29 |
+
"pos_att_type": [
|
| 30 |
+
"p2c",
|
| 31 |
+
"c2p"
|
| 32 |
+
],
|
| 33 |
+
"position_biased_input": false,
|
| 34 |
+
"position_buckets": 256,
|
| 35 |
+
"problem_type": "regression",
|
| 36 |
+
"relative_attention": true,
|
| 37 |
+
"share_att_key": true,
|
| 38 |
+
"torch_dtype": "float32",
|
| 39 |
+
"transformers_version": "4.32.1",
|
| 40 |
+
"type_vocab_size": 0,
|
| 41 |
+
"vocab_size": 128100
|
| 42 |
+
}
|
deberta-v3-finetuned/fold_1/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f2cbd3bab0adc0d8c2db3cdd23f8fc8a30712e8f7908c9a31e7d2da1698518f
|
| 3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_1/special_tokens_map.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": "[UNK]"
|
| 9 |
+
}
|
deberta-v3-finetuned/fold_1/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
deberta-v3-finetuned/fold_1/tokenizer_config.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"clean_up_tokenization_spaces": true,
|
| 4 |
+
"cls_token": "[CLS]",
|
| 5 |
+
"do_lower_case": false,
|
| 6 |
+
"eos_token": "[SEP]",
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 9 |
+
"pad_token": "[PAD]",
|
| 10 |
+
"sep_token": "[SEP]",
|
| 11 |
+
"sp_model_kwargs": {},
|
| 12 |
+
"split_by_punct": false,
|
| 13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 14 |
+
"unk_token": "[UNK]",
|
| 15 |
+
"vocab_type": "spm"
|
| 16 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DebertaV2ForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.007,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.007,
|
| 9 |
+
"hidden_size": 1024,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "LABEL_0"
|
| 12 |
+
},
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"label2id": {
|
| 16 |
+
"LABEL_0": 0
|
| 17 |
+
},
|
| 18 |
+
"layer_norm_eps": 1e-07,
|
| 19 |
+
"max_position_embeddings": 512,
|
| 20 |
+
"max_relative_positions": -1,
|
| 21 |
+
"model_type": "deberta-v2",
|
| 22 |
+
"norm_rel_ebd": "layer_norm",
|
| 23 |
+
"num_attention_heads": 16,
|
| 24 |
+
"num_hidden_layers": 24,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"pooler_dropout": 0,
|
| 27 |
+
"pooler_hidden_act": "gelu",
|
| 28 |
+
"pooler_hidden_size": 1024,
|
| 29 |
+
"pos_att_type": [
|
| 30 |
+
"p2c",
|
| 31 |
+
"c2p"
|
| 32 |
+
],
|
| 33 |
+
"position_biased_input": false,
|
| 34 |
+
"position_buckets": 256,
|
| 35 |
+
"problem_type": "regression",
|
| 36 |
+
"relative_attention": true,
|
| 37 |
+
"share_att_key": true,
|
| 38 |
+
"torch_dtype": "float32",
|
| 39 |
+
"transformers_version": "4.32.1",
|
| 40 |
+
"type_vocab_size": 0,
|
| 41 |
+
"vocab_size": 128100
|
| 42 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce44670793ed58c21f8c2cbce6afc4efc891a30dfde7518e85135bd442780680
|
| 3 |
+
size 3480831547
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d0bb0e3c058a48281a2b64af0f6ecdf014de51e36ea51b0251bfb5cc4c00691
|
| 3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4e100a81cf298499260f07579dde148991722b91ee300d8212533f095c23a93
|
| 3 |
+
size 14575
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7cd6229b27eb04441fb524f85b3a83bef58d5a81203fc33e818a099412769c8
|
| 3 |
+
size 627
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/special_tokens_map.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": "[UNK]"
|
| 9 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer_config.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"clean_up_tokenization_spaces": true,
|
| 4 |
+
"cls_token": "[CLS]",
|
| 5 |
+
"do_lower_case": false,
|
| 6 |
+
"eos_token": "[SEP]",
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 9 |
+
"pad_token": "[PAD]",
|
| 10 |
+
"sep_token": "[SEP]",
|
| 11 |
+
"sp_model_kwargs": {},
|
| 12 |
+
"split_by_punct": false,
|
| 13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 14 |
+
"unk_token": "[UNK]",
|
| 15 |
+
"vocab_type": "spm"
|
| 16 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/trainer_state.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 0.43666166067123413,
|
| 3 |
+
"best_model_checkpoint": "/gpfs/home/jc3821/kaggle/content/deberta-v3-finetuned/fold_2/2/checkpoint-400",
|
| 4 |
+
"epoch": 1.5444015444015444,
|
| 5 |
+
"eval_steps": 100,
|
| 6 |
+
"global_step": 400,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.39,
|
| 13 |
+
"eval_loss": 0.31523793935775757,
|
| 14 |
+
"eval_rmse": 0.561460554599762,
|
| 15 |
+
"eval_runtime": 28.1344,
|
| 16 |
+
"eval_samples_per_second": 70.945,
|
| 17 |
+
"eval_steps_per_second": 8.886,
|
| 18 |
+
"step": 100
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"epoch": 0.77,
|
| 22 |
+
"eval_loss": 0.30585768818855286,
|
| 23 |
+
"eval_rmse": 0.5530440211296082,
|
| 24 |
+
"eval_runtime": 28.135,
|
| 25 |
+
"eval_samples_per_second": 70.944,
|
| 26 |
+
"eval_steps_per_second": 8.886,
|
| 27 |
+
"step": 200
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 1.16,
|
| 31 |
+
"eval_loss": 0.33985063433647156,
|
| 32 |
+
"eval_rmse": 0.5829670429229736,
|
| 33 |
+
"eval_runtime": 28.1345,
|
| 34 |
+
"eval_samples_per_second": 70.945,
|
| 35 |
+
"eval_steps_per_second": 8.886,
|
| 36 |
+
"step": 300
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 1.54,
|
| 40 |
+
"eval_loss": 0.19067342579364777,
|
| 41 |
+
"eval_rmse": 0.43666166067123413,
|
| 42 |
+
"eval_runtime": 28.134,
|
| 43 |
+
"eval_samples_per_second": 70.946,
|
| 44 |
+
"eval_steps_per_second": 8.886,
|
| 45 |
+
"step": 400
|
| 46 |
+
}
|
| 47 |
+
],
|
| 48 |
+
"logging_steps": 500,
|
| 49 |
+
"max_steps": 1295,
|
| 50 |
+
"num_train_epochs": 5,
|
| 51 |
+
"save_steps": 100,
|
| 52 |
+
"total_flos": 4057351609521216.0,
|
| 53 |
+
"trial_name": null,
|
| 54 |
+
"trial_params": null
|
| 55 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41d0d43d565b7b3f73ca58f024d9c5d913daab26a6cc112d5207845da19c1431
|
| 3 |
+
size 4091
|
deberta-v3-finetuned/fold_2/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DebertaV2ForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.007,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.007,
|
| 9 |
+
"hidden_size": 1024,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "LABEL_0"
|
| 12 |
+
},
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"label2id": {
|
| 16 |
+
"LABEL_0": 0
|
| 17 |
+
},
|
| 18 |
+
"layer_norm_eps": 1e-07,
|
| 19 |
+
"max_position_embeddings": 512,
|
| 20 |
+
"max_relative_positions": -1,
|
| 21 |
+
"model_type": "deberta-v2",
|
| 22 |
+
"norm_rel_ebd": "layer_norm",
|
| 23 |
+
"num_attention_heads": 16,
|
| 24 |
+
"num_hidden_layers": 24,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"pooler_dropout": 0,
|
| 27 |
+
"pooler_hidden_act": "gelu",
|
| 28 |
+
"pooler_hidden_size": 1024,
|
| 29 |
+
"pos_att_type": [
|
| 30 |
+
"p2c",
|
| 31 |
+
"c2p"
|
| 32 |
+
],
|
| 33 |
+
"position_biased_input": false,
|
| 34 |
+
"position_buckets": 256,
|
| 35 |
+
"problem_type": "regression",
|
| 36 |
+
"relative_attention": true,
|
| 37 |
+
"share_att_key": true,
|
| 38 |
+
"torch_dtype": "float32",
|
| 39 |
+
"transformers_version": "4.32.1",
|
| 40 |
+
"type_vocab_size": 0,
|
| 41 |
+
"vocab_size": 128100
|
| 42 |
+
}
|
deberta-v3-finetuned/fold_2/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d0bb0e3c058a48281a2b64af0f6ecdf014de51e36ea51b0251bfb5cc4c00691
|
| 3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_2/special_tokens_map.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"eos_token": "[SEP]",
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"unk_token": "[UNK]"
|
| 9 |
+
}
|
deberta-v3-finetuned/fold_2/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
deberta-v3-finetuned/fold_2/tokenizer_config.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"clean_up_tokenization_spaces": true,
|
| 4 |
+
"cls_token": "[CLS]",
|
| 5 |
+
"do_lower_case": false,
|
| 6 |
+
"eos_token": "[SEP]",
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 9 |
+
"pad_token": "[PAD]",
|
| 10 |
+
"sep_token": "[SEP]",
|
| 11 |
+
"sp_model_kwargs": {},
|
| 12 |
+
"split_by_punct": false,
|
| 13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 14 |
+
"unk_token": "[UNK]",
|
| 15 |
+
"vocab_type": "spm"
|
| 16 |
+
}
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DebertaV2ForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.007,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.007,
|
| 9 |
+
"hidden_size": 1024,
|
| 10 |
+
"id2label": {
|
| 11 |
+
"0": "LABEL_0"
|
| 12 |
+
},
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"label2id": {
|
| 16 |
+
"LABEL_0": 0
|
| 17 |
+
},
|
| 18 |
+
"layer_norm_eps": 1e-07,
|
| 19 |
+
"max_position_embeddings": 512,
|
| 20 |
+
"max_relative_positions": -1,
|
| 21 |
+
"model_type": "deberta-v2",
|
| 22 |
+
"norm_rel_ebd": "layer_norm",
|
| 23 |
+
"num_attention_heads": 16,
|
| 24 |
+
"num_hidden_layers": 24,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"pooler_dropout": 0,
|
| 27 |
+
"pooler_hidden_act": "gelu",
|
| 28 |
+
"pooler_hidden_size": 1024,
|
| 29 |
+
"pos_att_type": [
|
| 30 |
+
"p2c",
|
| 31 |
+
"c2p"
|
| 32 |
+
],
|
| 33 |
+
"position_biased_input": false,
|
| 34 |
+
"position_buckets": 256,
|
| 35 |
+
"problem_type": "regression",
|
| 36 |
+
"relative_attention": true,
|
| 37 |
+
"share_att_key": true,
|
| 38 |
+
"torch_dtype": "float32",
|
| 39 |
+
"transformers_version": "4.32.1",
|
| 40 |
+
"type_vocab_size": 0,
|
| 41 |
+
"vocab_size": 128100
|
| 42 |
+
}
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b20b449044c88ca077f2e4d53ed3e7965841a392bfcc938260acb2ff57021f6c
|
| 3 |
+
size 3480831547
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c9b7956e94bf38e46350de37247628745d4153d0f030e66a40e9099c62a7e70
|
| 3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72b166cc4874ed59847a59cc4b7fa887c9be4c1c2d459fe1b29872f6ec46e8ea
|
| 3 |
+
size 14575
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64a05a810a830e4742e4818737ab479a0943e4c2e2dab122df5475f155021251
|
| 3 |
+
size 627
|