Instructions to use dzungpham/graphcodebert-code-classification with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use dzungpham/graphcodebert-code-classification with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("dzungpham/graphcodebert-code-classification", dtype="auto") - Notebooks
- Google Colab
- Kaggle
upload best checkpoints 200 with f1 score 0.68
Browse files- graphcodebert-robust/checkpoint-200/config.json +3 -2
- graphcodebert-robust/checkpoint-200/model.safetensors +1 -1
- graphcodebert-robust/checkpoint-200/optimizer.pt +2 -2
- graphcodebert-robust/checkpoint-200/rng_state.pth +2 -2
- graphcodebert-robust/checkpoint-200/scaler.pt +1 -1
- graphcodebert-robust/checkpoint-200/scheduler.pt +1 -1
- graphcodebert-robust/checkpoint-200/tokenizer.json +1 -6
- graphcodebert-robust/checkpoint-200/trainer_state.json +85 -85
- graphcodebert-robust/checkpoint-200/training_args.bin +1 -1
- graphcodebert-robust/checkpoint-400/model.safetensors +1 -1
- graphcodebert-robust/checkpoint-400/optimizer.pt +1 -1
- graphcodebert-robust/checkpoint-400/trainer_state.json +39 -39
- graphcodebert-robust/checkpoint-400/training_args.bin +1 -1
- graphcodebert-robust/checkpoint-600/model.safetensors +1 -1
- graphcodebert-robust/checkpoint-600/optimizer.pt +1 -1
- graphcodebert-robust/checkpoint-600/rng_state.pth +1 -1
- graphcodebert-robust/checkpoint-600/trainer_state.json +80 -80
- graphcodebert-robust/checkpoint-600/training_args.bin +1 -1
- graphcodebert-robust/checkpoint-800/model.safetensors +1 -1
- graphcodebert-robust/checkpoint-800/optimizer.pt +1 -1
- graphcodebert-robust/checkpoint-800/rng_state.pth +1 -1
- graphcodebert-robust/checkpoint-800/trainer_state.json +120 -120
- graphcodebert-robust/checkpoint-800/training_args.bin +1 -1
- graphcodebert-robust/training.log +45 -21
graphcodebert-robust/checkpoint-200/config.json
CHANGED
|
@@ -2,14 +2,14 @@
|
|
| 2 |
"architectures": [
|
| 3 |
"RobertaForSequenceClassification"
|
| 4 |
],
|
| 5 |
-
"attention_probs_dropout_prob": 0.
|
| 6 |
"bos_token_id": 0,
|
| 7 |
"classifier_dropout": null,
|
| 8 |
"dtype": "float32",
|
| 9 |
"eos_token_id": 2,
|
| 10 |
"gradient_checkpointing": false,
|
| 11 |
"hidden_act": "gelu",
|
| 12 |
-
"hidden_dropout_prob": 0.
|
| 13 |
"hidden_size": 768,
|
| 14 |
"initializer_range": 0.02,
|
| 15 |
"intermediate_size": 3072,
|
|
@@ -21,6 +21,7 @@
|
|
| 21 |
"output_past": true,
|
| 22 |
"pad_token_id": 1,
|
| 23 |
"position_embedding_type": "absolute",
|
|
|
|
| 24 |
"transformers_version": "4.56.0",
|
| 25 |
"type_vocab_size": 1,
|
| 26 |
"use_cache": true,
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"RobertaForSequenceClassification"
|
| 4 |
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
"bos_token_id": 0,
|
| 7 |
"classifier_dropout": null,
|
| 8 |
"dtype": "float32",
|
| 9 |
"eos_token_id": 2,
|
| 10 |
"gradient_checkpointing": false,
|
| 11 |
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
"hidden_size": 768,
|
| 14 |
"initializer_range": 0.02,
|
| 15 |
"intermediate_size": 3072,
|
|
|
|
| 21 |
"output_past": true,
|
| 22 |
"pad_token_id": 1,
|
| 23 |
"position_embedding_type": "absolute",
|
| 24 |
+
"problem_type": "single_label_classification",
|
| 25 |
"transformers_version": "4.56.0",
|
| 26 |
"type_vocab_size": 1,
|
| 27 |
"use_cache": true,
|
graphcodebert-robust/checkpoint-200/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498612824
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34f62f2e2935abbdd0f8d5567e447c234e77e119d414ca9ce31e3a1ce06552e2
|
| 3 |
size 498612824
|
graphcodebert-robust/checkpoint-200/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94ced15c772e225b8afaaa561ce73077f5f491b910b543982886ee79b2be71c0
|
| 3 |
+
size 4741859
|
graphcodebert-robust/checkpoint-200/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a69a2dd012809f4c1402b56a463f5f04ca5d8c3ea0ff42d1da133d0f80b1c5b9
|
| 3 |
+
size 14645
|
graphcodebert-robust/checkpoint-200/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b21c5349d5e7d02de630ebc1cb53ade1d9c6079eeb8594d223bb786011a0428b
|
| 3 |
size 1383
|
graphcodebert-robust/checkpoint-200/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1620ef2f1785b97a0cabdbea3b6cfd78a32feee0218de95157fc0dbbc14db4ba
|
| 3 |
size 1465
|
graphcodebert-robust/checkpoint-200/tokenizer.json
CHANGED
|
@@ -1,11 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"version": "1.0",
|
| 3 |
-
"truncation":
|
| 4 |
-
"direction": "Right",
|
| 5 |
-
"max_length": 512,
|
| 6 |
-
"strategy": "LongestFirst",
|
| 7 |
-
"stride": 0
|
| 8 |
-
},
|
| 9 |
"padding": null,
|
| 10 |
"added_tokens": [
|
| 11 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"version": "1.0",
|
| 3 |
+
"truncation": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
"padding": null,
|
| 5 |
"added_tokens": [
|
| 6 |
{
|
graphcodebert-robust/checkpoint-200/trainer_state.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 1000,
|
| 7 |
"global_step": 200,
|
| 8 |
"is_hyper_param_search": false,
|
|
@@ -10,150 +10,150 @@
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
-
"epoch": 0.
|
| 14 |
-
"grad_norm":
|
| 15 |
-
"learning_rate":
|
| 16 |
-
"loss": 0.
|
| 17 |
"step": 10
|
| 18 |
},
|
| 19 |
{
|
| 20 |
-
"epoch": 0.
|
| 21 |
-
"grad_norm":
|
| 22 |
-
"learning_rate":
|
| 23 |
-
"loss": 0.
|
| 24 |
"step": 20
|
| 25 |
},
|
| 26 |
{
|
| 27 |
-
"epoch": 0.
|
| 28 |
-
"grad_norm":
|
| 29 |
-
"learning_rate":
|
| 30 |
-
"loss": 0.
|
| 31 |
"step": 30
|
| 32 |
},
|
| 33 |
{
|
| 34 |
-
"epoch": 0.
|
| 35 |
-
"grad_norm":
|
| 36 |
-
"learning_rate":
|
| 37 |
-
"loss": 0.
|
| 38 |
"step": 40
|
| 39 |
},
|
| 40 |
{
|
| 41 |
-
"epoch": 0.
|
| 42 |
-
"grad_norm":
|
| 43 |
-
"learning_rate":
|
| 44 |
-
"loss": 0.
|
| 45 |
"step": 50
|
| 46 |
},
|
| 47 |
{
|
| 48 |
-
"epoch": 0.
|
| 49 |
-
"grad_norm":
|
| 50 |
-
"learning_rate":
|
| 51 |
-
"loss": 0.
|
| 52 |
"step": 60
|
| 53 |
},
|
| 54 |
{
|
| 55 |
-
"epoch": 0.
|
| 56 |
-
"grad_norm":
|
| 57 |
-
"learning_rate":
|
| 58 |
-
"loss": 0.
|
| 59 |
"step": 70
|
| 60 |
},
|
| 61 |
{
|
| 62 |
-
"epoch": 0.
|
| 63 |
-
"grad_norm":
|
| 64 |
-
"learning_rate":
|
| 65 |
-
"loss": 0.
|
| 66 |
"step": 80
|
| 67 |
},
|
| 68 |
{
|
| 69 |
-
"epoch": 0.
|
| 70 |
-
"grad_norm":
|
| 71 |
-
"learning_rate":
|
| 72 |
-
"loss": 0.
|
| 73 |
"step": 90
|
| 74 |
},
|
| 75 |
{
|
| 76 |
-
"epoch": 0.
|
| 77 |
-
"grad_norm":
|
| 78 |
-
"learning_rate":
|
| 79 |
-
"loss": 0.
|
| 80 |
"step": 100
|
| 81 |
},
|
| 82 |
{
|
| 83 |
-
"epoch": 0.
|
| 84 |
-
"grad_norm":
|
| 85 |
-
"learning_rate":
|
| 86 |
-
"loss": 0.
|
| 87 |
"step": 110
|
| 88 |
},
|
| 89 |
{
|
| 90 |
-
"epoch": 0.
|
| 91 |
-
"grad_norm":
|
| 92 |
-
"learning_rate":
|
| 93 |
-
"loss": 0.
|
| 94 |
"step": 120
|
| 95 |
},
|
| 96 |
{
|
| 97 |
-
"epoch": 0.
|
| 98 |
-
"grad_norm":
|
| 99 |
-
"learning_rate":
|
| 100 |
-
"loss": 0.
|
| 101 |
"step": 130
|
| 102 |
},
|
| 103 |
{
|
| 104 |
-
"epoch": 0.
|
| 105 |
-
"grad_norm":
|
| 106 |
-
"learning_rate":
|
| 107 |
-
"loss": 0.
|
| 108 |
"step": 140
|
| 109 |
},
|
| 110 |
{
|
| 111 |
-
"epoch": 0.
|
| 112 |
-
"grad_norm":
|
| 113 |
-
"learning_rate":
|
| 114 |
-
"loss": 0.
|
| 115 |
"step": 150
|
| 116 |
},
|
| 117 |
{
|
| 118 |
-
"epoch": 0.
|
| 119 |
-
"grad_norm":
|
| 120 |
-
"learning_rate":
|
| 121 |
-
"loss": 0.
|
| 122 |
"step": 160
|
| 123 |
},
|
| 124 |
{
|
| 125 |
-
"epoch": 0.
|
| 126 |
-
"grad_norm":
|
| 127 |
-
"learning_rate":
|
| 128 |
-
"loss": 0.
|
| 129 |
"step": 170
|
| 130 |
},
|
| 131 |
{
|
| 132 |
-
"epoch": 0.
|
| 133 |
-
"grad_norm":
|
| 134 |
-
"learning_rate":
|
| 135 |
-
"loss": 0.
|
| 136 |
"step": 180
|
| 137 |
},
|
| 138 |
{
|
| 139 |
-
"epoch": 0.
|
| 140 |
-
"grad_norm":
|
| 141 |
-
"learning_rate":
|
| 142 |
-
"loss": 0.
|
| 143 |
"step": 190
|
| 144 |
},
|
| 145 |
{
|
| 146 |
-
"epoch": 0.
|
| 147 |
-
"grad_norm":
|
| 148 |
-
"learning_rate":
|
| 149 |
-
"loss": 0.
|
| 150 |
"step": 200
|
| 151 |
}
|
| 152 |
],
|
| 153 |
"logging_steps": 10,
|
| 154 |
-
"max_steps":
|
| 155 |
"num_input_tokens_seen": 0,
|
| 156 |
-
"num_train_epochs":
|
| 157 |
"save_steps": 200,
|
| 158 |
"stateful_callbacks": {
|
| 159 |
"EarlyStoppingCallback": {
|
|
@@ -176,8 +176,8 @@
|
|
| 176 |
"attributes": {}
|
| 177 |
}
|
| 178 |
},
|
| 179 |
-
"total_flos":
|
| 180 |
-
"train_batch_size":
|
| 181 |
"trial_name": null,
|
| 182 |
"trial_params": null
|
| 183 |
}
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.02559836170485089,
|
| 6 |
"eval_steps": 1000,
|
| 7 |
"global_step": 200,
|
| 8 |
"is_hyper_param_search": false,
|
|
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
+
"epoch": 0.0012799180852425445,
|
| 14 |
+
"grad_norm": 89788.1796875,
|
| 15 |
+
"learning_rate": 2.304147465437788e-08,
|
| 16 |
+
"loss": 0.7088,
|
| 17 |
"step": 10
|
| 18 |
},
|
| 19 |
{
|
| 20 |
+
"epoch": 0.002559836170485089,
|
| 21 |
+
"grad_norm": 39479.36328125,
|
| 22 |
+
"learning_rate": 4.86431131592422e-08,
|
| 23 |
+
"loss": 0.7087,
|
| 24 |
"step": 20
|
| 25 |
},
|
| 26 |
{
|
| 27 |
+
"epoch": 0.0038397542557276334,
|
| 28 |
+
"grad_norm": 82478.765625,
|
| 29 |
+
"learning_rate": 7.424475166410652e-08,
|
| 30 |
+
"loss": 0.7074,
|
| 31 |
"step": 30
|
| 32 |
},
|
| 33 |
{
|
| 34 |
+
"epoch": 0.005119672340970178,
|
| 35 |
+
"grad_norm": 58003.75390625,
|
| 36 |
+
"learning_rate": 9.984639016897082e-08,
|
| 37 |
+
"loss": 0.703,
|
| 38 |
"step": 40
|
| 39 |
},
|
| 40 |
{
|
| 41 |
+
"epoch": 0.006399590426212722,
|
| 42 |
+
"grad_norm": 95491.0859375,
|
| 43 |
+
"learning_rate": 1.2544802867383514e-07,
|
| 44 |
+
"loss": 0.7073,
|
| 45 |
"step": 50
|
| 46 |
},
|
| 47 |
{
|
| 48 |
+
"epoch": 0.007679508511455267,
|
| 49 |
+
"grad_norm": 44903.296875,
|
| 50 |
+
"learning_rate": 1.5104966717869944e-07,
|
| 51 |
+
"loss": 0.7061,
|
| 52 |
"step": 60
|
| 53 |
},
|
| 54 |
{
|
| 55 |
+
"epoch": 0.008959426596697812,
|
| 56 |
+
"grad_norm": 142410.484375,
|
| 57 |
+
"learning_rate": 1.7665130568356375e-07,
|
| 58 |
+
"loss": 0.7082,
|
| 59 |
"step": 70
|
| 60 |
},
|
| 61 |
{
|
| 62 |
+
"epoch": 0.010239344681940356,
|
| 63 |
+
"grad_norm": 148763.109375,
|
| 64 |
+
"learning_rate": 2.0225294418842808e-07,
|
| 65 |
+
"loss": 0.707,
|
| 66 |
"step": 80
|
| 67 |
},
|
| 68 |
{
|
| 69 |
+
"epoch": 0.011519262767182901,
|
| 70 |
+
"grad_norm": 62031.30859375,
|
| 71 |
+
"learning_rate": 2.2785458269329238e-07,
|
| 72 |
+
"loss": 0.7036,
|
| 73 |
"step": 90
|
| 74 |
},
|
| 75 |
{
|
| 76 |
+
"epoch": 0.012799180852425445,
|
| 77 |
+
"grad_norm": 135708.875,
|
| 78 |
+
"learning_rate": 2.5345622119815674e-07,
|
| 79 |
+
"loss": 0.7078,
|
| 80 |
"step": 100
|
| 81 |
},
|
| 82 |
{
|
| 83 |
+
"epoch": 0.01407909893766799,
|
| 84 |
+
"grad_norm": 91129.421875,
|
| 85 |
+
"learning_rate": 2.79057859703021e-07,
|
| 86 |
+
"loss": 0.7035,
|
| 87 |
"step": 110
|
| 88 |
},
|
| 89 |
{
|
| 90 |
+
"epoch": 0.015359017022910534,
|
| 91 |
+
"grad_norm": 39290.72265625,
|
| 92 |
+
"learning_rate": 3.0465949820788535e-07,
|
| 93 |
+
"loss": 0.7083,
|
| 94 |
"step": 120
|
| 95 |
},
|
| 96 |
{
|
| 97 |
+
"epoch": 0.016638935108153077,
|
| 98 |
+
"grad_norm": 49473.61328125,
|
| 99 |
+
"learning_rate": 3.302611367127496e-07,
|
| 100 |
+
"loss": 0.7023,
|
| 101 |
"step": 130
|
| 102 |
},
|
| 103 |
{
|
| 104 |
+
"epoch": 0.017918853193395624,
|
| 105 |
+
"grad_norm": 61292.984375,
|
| 106 |
+
"learning_rate": 3.5586277521761395e-07,
|
| 107 |
+
"loss": 0.7014,
|
| 108 |
"step": 140
|
| 109 |
},
|
| 110 |
{
|
| 111 |
+
"epoch": 0.019198771278638168,
|
| 112 |
+
"grad_norm": 79102.0390625,
|
| 113 |
+
"learning_rate": 3.814644137224783e-07,
|
| 114 |
+
"loss": 0.7041,
|
| 115 |
"step": 150
|
| 116 |
},
|
| 117 |
{
|
| 118 |
+
"epoch": 0.02047868936388071,
|
| 119 |
+
"grad_norm": 61779.62890625,
|
| 120 |
+
"learning_rate": 4.0706605222734256e-07,
|
| 121 |
+
"loss": 0.7039,
|
| 122 |
"step": 160
|
| 123 |
},
|
| 124 |
{
|
| 125 |
+
"epoch": 0.021758607449123255,
|
| 126 |
+
"grad_norm": 63492.18359375,
|
| 127 |
+
"learning_rate": 4.326676907322069e-07,
|
| 128 |
+
"loss": 0.7035,
|
| 129 |
"step": 170
|
| 130 |
},
|
| 131 |
{
|
| 132 |
+
"epoch": 0.023038525534365802,
|
| 133 |
+
"grad_norm": 44190.3203125,
|
| 134 |
+
"learning_rate": 4.582693292370712e-07,
|
| 135 |
+
"loss": 0.7019,
|
| 136 |
"step": 180
|
| 137 |
},
|
| 138 |
{
|
| 139 |
+
"epoch": 0.024318443619608346,
|
| 140 |
+
"grad_norm": 67509.15625,
|
| 141 |
+
"learning_rate": 4.838709677419355e-07,
|
| 142 |
+
"loss": 0.6991,
|
| 143 |
"step": 190
|
| 144 |
},
|
| 145 |
{
|
| 146 |
+
"epoch": 0.02559836170485089,
|
| 147 |
+
"grad_norm": 94820.5078125,
|
| 148 |
+
"learning_rate": 5.094726062467999e-07,
|
| 149 |
+
"loss": 0.7011,
|
| 150 |
"step": 200
|
| 151 |
}
|
| 152 |
],
|
| 153 |
"logging_steps": 10,
|
| 154 |
+
"max_steps": 39065,
|
| 155 |
"num_input_tokens_seen": 0,
|
| 156 |
+
"num_train_epochs": 5,
|
| 157 |
"save_steps": 200,
|
| 158 |
"stateful_callbacks": {
|
| 159 |
"EarlyStoppingCallback": {
|
|
|
|
| 176 |
"attributes": {}
|
| 177 |
}
|
| 178 |
},
|
| 179 |
+
"total_flos": 3367821508608000.0,
|
| 180 |
+
"train_batch_size": 64,
|
| 181 |
"trial_name": null,
|
| 182 |
"trial_params": null
|
| 183 |
}
|
graphcodebert-robust/checkpoint-200/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5841
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ec2974753acccea9af7a8eb9c2abfaaba85cdcf89c926488b103f5662876bb0
|
| 3 |
size 5841
|
graphcodebert-robust/checkpoint-400/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498612824
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92bce3c4e38ffa8155e9197c360622fa05c939bec62afcbfa3bf8fd778f88527
|
| 3 |
size 498612824
|
graphcodebert-robust/checkpoint-400/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4741923
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a83a52f1a15705e175493b2425539a92f6edb4c30253eadc01cb8a3f3c98b492
|
| 3 |
size 4741923
|
graphcodebert-robust/checkpoint-400/trainer_state.json
CHANGED
|
@@ -151,142 +151,142 @@
|
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"epoch": 0.01344,
|
| 154 |
-
"grad_norm":
|
| 155 |
"learning_rate": 5.350742447516642e-07,
|
| 156 |
-
"loss": 0.
|
| 157 |
"step": 210
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"epoch": 0.01408,
|
| 161 |
-
"grad_norm":
|
| 162 |
"learning_rate": 5.606758832565284e-07,
|
| 163 |
-
"loss": 0.
|
| 164 |
"step": 220
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"epoch": 0.01472,
|
| 168 |
-
"grad_norm":
|
| 169 |
"learning_rate": 5.862775217613928e-07,
|
| 170 |
-
"loss": 0.
|
| 171 |
"step": 230
|
| 172 |
},
|
| 173 |
{
|
| 174 |
"epoch": 0.01536,
|
| 175 |
-
"grad_norm":
|
| 176 |
"learning_rate": 6.118791602662571e-07,
|
| 177 |
-
"loss": 0.
|
| 178 |
"step": 240
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"epoch": 0.016,
|
| 182 |
-
"grad_norm":
|
| 183 |
"learning_rate": 6.374807987711214e-07,
|
| 184 |
-
"loss": 0.
|
| 185 |
"step": 250
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"epoch": 0.01664,
|
| 189 |
-
"grad_norm":
|
| 190 |
"learning_rate": 6.630824372759858e-07,
|
| 191 |
-
"loss": 0.
|
| 192 |
"step": 260
|
| 193 |
},
|
| 194 |
{
|
| 195 |
"epoch": 0.01728,
|
| 196 |
-
"grad_norm":
|
| 197 |
"learning_rate": 6.8868407578085e-07,
|
| 198 |
-
"loss": 0.
|
| 199 |
"step": 270
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"epoch": 0.01792,
|
| 203 |
-
"grad_norm":
|
| 204 |
"learning_rate": 7.142857142857143e-07,
|
| 205 |
-
"loss": 0.
|
| 206 |
"step": 280
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"epoch": 0.01856,
|
| 210 |
-
"grad_norm":
|
| 211 |
"learning_rate": 7.398873527905787e-07,
|
| 212 |
-
"loss": 0.
|
| 213 |
"step": 290
|
| 214 |
},
|
| 215 |
{
|
| 216 |
"epoch": 0.0192,
|
| 217 |
-
"grad_norm":
|
| 218 |
"learning_rate": 7.65488991295443e-07,
|
| 219 |
"loss": 0.7122,
|
| 220 |
"step": 300
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 0.01984,
|
| 224 |
-
"grad_norm":
|
| 225 |
"learning_rate": 7.910906298003073e-07,
|
| 226 |
-
"loss": 0.
|
| 227 |
"step": 310
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"epoch": 0.02048,
|
| 231 |
-
"grad_norm":
|
| 232 |
"learning_rate": 8.166922683051716e-07,
|
| 233 |
-
"loss": 0.
|
| 234 |
"step": 320
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 0.02112,
|
| 238 |
-
"grad_norm":
|
| 239 |
"learning_rate": 8.422939068100359e-07,
|
| 240 |
-
"loss": 0.
|
| 241 |
"step": 330
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 0.02176,
|
| 245 |
-
"grad_norm":
|
| 246 |
"learning_rate": 8.678955453149002e-07,
|
| 247 |
-
"loss": 0.
|
| 248 |
"step": 340
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 0.0224,
|
| 252 |
-
"grad_norm":
|
| 253 |
"learning_rate": 8.934971838197646e-07,
|
| 254 |
-
"loss": 0.
|
| 255 |
"step": 350
|
| 256 |
},
|
| 257 |
{
|
| 258 |
"epoch": 0.02304,
|
| 259 |
-
"grad_norm":
|
| 260 |
"learning_rate": 9.190988223246289e-07,
|
| 261 |
-
"loss": 0.
|
| 262 |
"step": 360
|
| 263 |
},
|
| 264 |
{
|
| 265 |
"epoch": 0.02368,
|
| 266 |
-
"grad_norm":
|
| 267 |
"learning_rate": 9.447004608294931e-07,
|
| 268 |
-
"loss": 0.
|
| 269 |
"step": 370
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"epoch": 0.02432,
|
| 273 |
-
"grad_norm":
|
| 274 |
"learning_rate": 9.703020993343575e-07,
|
| 275 |
-
"loss": 0.
|
| 276 |
"step": 380
|
| 277 |
},
|
| 278 |
{
|
| 279 |
"epoch": 0.02496,
|
| 280 |
-
"grad_norm":
|
| 281 |
"learning_rate": 9.959037378392218e-07,
|
| 282 |
-
"loss": 0.
|
| 283 |
"step": 390
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"epoch": 0.0256,
|
| 287 |
-
"grad_norm":
|
| 288 |
"learning_rate": 1.021505376344086e-06,
|
| 289 |
-
"loss": 0.
|
| 290 |
"step": 400
|
| 291 |
}
|
| 292 |
],
|
|
|
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"epoch": 0.01344,
|
| 154 |
+
"grad_norm": 144219.625,
|
| 155 |
"learning_rate": 5.350742447516642e-07,
|
| 156 |
+
"loss": 0.7218,
|
| 157 |
"step": 210
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"epoch": 0.01408,
|
| 161 |
+
"grad_norm": 105046.0234375,
|
| 162 |
"learning_rate": 5.606758832565284e-07,
|
| 163 |
+
"loss": 0.718,
|
| 164 |
"step": 220
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"epoch": 0.01472,
|
| 168 |
+
"grad_norm": 126142.4296875,
|
| 169 |
"learning_rate": 5.862775217613928e-07,
|
| 170 |
+
"loss": 0.7107,
|
| 171 |
"step": 230
|
| 172 |
},
|
| 173 |
{
|
| 174 |
"epoch": 0.01536,
|
| 175 |
+
"grad_norm": 92423.2265625,
|
| 176 |
"learning_rate": 6.118791602662571e-07,
|
| 177 |
+
"loss": 0.7271,
|
| 178 |
"step": 240
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"epoch": 0.016,
|
| 182 |
+
"grad_norm": 98091.828125,
|
| 183 |
"learning_rate": 6.374807987711214e-07,
|
| 184 |
+
"loss": 0.7123,
|
| 185 |
"step": 250
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"epoch": 0.01664,
|
| 189 |
+
"grad_norm": 131949.578125,
|
| 190 |
"learning_rate": 6.630824372759858e-07,
|
| 191 |
+
"loss": 0.7204,
|
| 192 |
"step": 260
|
| 193 |
},
|
| 194 |
{
|
| 195 |
"epoch": 0.01728,
|
| 196 |
+
"grad_norm": 112228.5625,
|
| 197 |
"learning_rate": 6.8868407578085e-07,
|
| 198 |
+
"loss": 0.722,
|
| 199 |
"step": 270
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"epoch": 0.01792,
|
| 203 |
+
"grad_norm": 64587.734375,
|
| 204 |
"learning_rate": 7.142857142857143e-07,
|
| 205 |
+
"loss": 0.7263,
|
| 206 |
"step": 280
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"epoch": 0.01856,
|
| 210 |
+
"grad_norm": 99893.203125,
|
| 211 |
"learning_rate": 7.398873527905787e-07,
|
| 212 |
+
"loss": 0.7169,
|
| 213 |
"step": 290
|
| 214 |
},
|
| 215 |
{
|
| 216 |
"epoch": 0.0192,
|
| 217 |
+
"grad_norm": 135749.875,
|
| 218 |
"learning_rate": 7.65488991295443e-07,
|
| 219 |
"loss": 0.7122,
|
| 220 |
"step": 300
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 0.01984,
|
| 224 |
+
"grad_norm": 103292.5703125,
|
| 225 |
"learning_rate": 7.910906298003073e-07,
|
| 226 |
+
"loss": 0.7183,
|
| 227 |
"step": 310
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"epoch": 0.02048,
|
| 231 |
+
"grad_norm": 86927.28125,
|
| 232 |
"learning_rate": 8.166922683051716e-07,
|
| 233 |
+
"loss": 0.7192,
|
| 234 |
"step": 320
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 0.02112,
|
| 238 |
+
"grad_norm": 153738.390625,
|
| 239 |
"learning_rate": 8.422939068100359e-07,
|
| 240 |
+
"loss": 0.711,
|
| 241 |
"step": 330
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 0.02176,
|
| 245 |
+
"grad_norm": 69994.7734375,
|
| 246 |
"learning_rate": 8.678955453149002e-07,
|
| 247 |
+
"loss": 0.7176,
|
| 248 |
"step": 340
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 0.0224,
|
| 252 |
+
"grad_norm": 141370.6875,
|
| 253 |
"learning_rate": 8.934971838197646e-07,
|
| 254 |
+
"loss": 0.7105,
|
| 255 |
"step": 350
|
| 256 |
},
|
| 257 |
{
|
| 258 |
"epoch": 0.02304,
|
| 259 |
+
"grad_norm": 71139.453125,
|
| 260 |
"learning_rate": 9.190988223246289e-07,
|
| 261 |
+
"loss": 0.7126,
|
| 262 |
"step": 360
|
| 263 |
},
|
| 264 |
{
|
| 265 |
"epoch": 0.02368,
|
| 266 |
+
"grad_norm": 82039.1953125,
|
| 267 |
"learning_rate": 9.447004608294931e-07,
|
| 268 |
+
"loss": 0.7078,
|
| 269 |
"step": 370
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"epoch": 0.02432,
|
| 273 |
+
"grad_norm": 71275.7890625,
|
| 274 |
"learning_rate": 9.703020993343575e-07,
|
| 275 |
+
"loss": 0.7145,
|
| 276 |
"step": 380
|
| 277 |
},
|
| 278 |
{
|
| 279 |
"epoch": 0.02496,
|
| 280 |
+
"grad_norm": 145801.21875,
|
| 281 |
"learning_rate": 9.959037378392218e-07,
|
| 282 |
+
"loss": 0.7102,
|
| 283 |
"step": 390
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"epoch": 0.0256,
|
| 287 |
+
"grad_norm": 171507.0,
|
| 288 |
"learning_rate": 1.021505376344086e-06,
|
| 289 |
+
"loss": 0.7123,
|
| 290 |
"step": 400
|
| 291 |
}
|
| 292 |
],
|
graphcodebert-robust/checkpoint-400/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5841
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82e524f8f7de87947806acfb17c136195f8d3668b26513da260f1a2f14442156
|
| 3 |
size 5841
|
graphcodebert-robust/checkpoint-600/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498612824
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:320da2fc28dfd7f2b08f5a311e169db9c3172c660ca5f1f28958df59ff94a372
|
| 3 |
size 498612824
|
graphcodebert-robust/checkpoint-600/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4741923
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41ff1d1389d831b2bc7715b986dcf40f64372807ce80b3368515da1fcaa1cb7a
|
| 3 |
size 4741923
|
graphcodebert-robust/checkpoint-600/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14581
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d667b0153bf32427b60333b1fe4a206d72e36eefc1792fdf3d499d50e466bd30
|
| 3 |
size 14581
|
graphcodebert-robust/checkpoint-600/trainer_state.json
CHANGED
|
@@ -151,282 +151,282 @@
|
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"epoch": 0.01344,
|
| 154 |
-
"grad_norm":
|
| 155 |
"learning_rate": 5.350742447516642e-07,
|
| 156 |
-
"loss": 0.
|
| 157 |
"step": 210
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"epoch": 0.01408,
|
| 161 |
-
"grad_norm":
|
| 162 |
"learning_rate": 5.606758832565284e-07,
|
| 163 |
-
"loss": 0.
|
| 164 |
"step": 220
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"epoch": 0.01472,
|
| 168 |
-
"grad_norm":
|
| 169 |
"learning_rate": 5.862775217613928e-07,
|
| 170 |
-
"loss": 0.
|
| 171 |
"step": 230
|
| 172 |
},
|
| 173 |
{
|
| 174 |
"epoch": 0.01536,
|
| 175 |
-
"grad_norm":
|
| 176 |
"learning_rate": 6.118791602662571e-07,
|
| 177 |
-
"loss": 0.
|
| 178 |
"step": 240
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"epoch": 0.016,
|
| 182 |
-
"grad_norm":
|
| 183 |
"learning_rate": 6.374807987711214e-07,
|
| 184 |
-
"loss": 0.
|
| 185 |
"step": 250
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"epoch": 0.01664,
|
| 189 |
-
"grad_norm":
|
| 190 |
"learning_rate": 6.630824372759858e-07,
|
| 191 |
-
"loss": 0.
|
| 192 |
"step": 260
|
| 193 |
},
|
| 194 |
{
|
| 195 |
"epoch": 0.01728,
|
| 196 |
-
"grad_norm":
|
| 197 |
"learning_rate": 6.8868407578085e-07,
|
| 198 |
-
"loss": 0.
|
| 199 |
"step": 270
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"epoch": 0.01792,
|
| 203 |
-
"grad_norm":
|
| 204 |
"learning_rate": 7.142857142857143e-07,
|
| 205 |
-
"loss": 0.
|
| 206 |
"step": 280
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"epoch": 0.01856,
|
| 210 |
-
"grad_norm":
|
| 211 |
"learning_rate": 7.398873527905787e-07,
|
| 212 |
-
"loss": 0.
|
| 213 |
"step": 290
|
| 214 |
},
|
| 215 |
{
|
| 216 |
"epoch": 0.0192,
|
| 217 |
-
"grad_norm":
|
| 218 |
"learning_rate": 7.65488991295443e-07,
|
| 219 |
"loss": 0.7122,
|
| 220 |
"step": 300
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 0.01984,
|
| 224 |
-
"grad_norm":
|
| 225 |
"learning_rate": 7.910906298003073e-07,
|
| 226 |
-
"loss": 0.
|
| 227 |
"step": 310
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"epoch": 0.02048,
|
| 231 |
-
"grad_norm":
|
| 232 |
"learning_rate": 8.166922683051716e-07,
|
| 233 |
-
"loss": 0.
|
| 234 |
"step": 320
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 0.02112,
|
| 238 |
-
"grad_norm":
|
| 239 |
"learning_rate": 8.422939068100359e-07,
|
| 240 |
-
"loss": 0.
|
| 241 |
"step": 330
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 0.02176,
|
| 245 |
-
"grad_norm":
|
| 246 |
"learning_rate": 8.678955453149002e-07,
|
| 247 |
-
"loss": 0.
|
| 248 |
"step": 340
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 0.0224,
|
| 252 |
-
"grad_norm":
|
| 253 |
"learning_rate": 8.934971838197646e-07,
|
| 254 |
-
"loss": 0.
|
| 255 |
"step": 350
|
| 256 |
},
|
| 257 |
{
|
| 258 |
"epoch": 0.02304,
|
| 259 |
-
"grad_norm":
|
| 260 |
"learning_rate": 9.190988223246289e-07,
|
| 261 |
-
"loss": 0.
|
| 262 |
"step": 360
|
| 263 |
},
|
| 264 |
{
|
| 265 |
"epoch": 0.02368,
|
| 266 |
-
"grad_norm":
|
| 267 |
"learning_rate": 9.447004608294931e-07,
|
| 268 |
-
"loss": 0.
|
| 269 |
"step": 370
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"epoch": 0.02432,
|
| 273 |
-
"grad_norm":
|
| 274 |
"learning_rate": 9.703020993343575e-07,
|
| 275 |
-
"loss": 0.
|
| 276 |
"step": 380
|
| 277 |
},
|
| 278 |
{
|
| 279 |
"epoch": 0.02496,
|
| 280 |
-
"grad_norm":
|
| 281 |
"learning_rate": 9.959037378392218e-07,
|
| 282 |
-
"loss": 0.
|
| 283 |
"step": 390
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"epoch": 0.0256,
|
| 287 |
-
"grad_norm":
|
| 288 |
"learning_rate": 1.021505376344086e-06,
|
| 289 |
-
"loss": 0.
|
| 290 |
"step": 400
|
| 291 |
},
|
| 292 |
{
|
| 293 |
"epoch": 0.02624,
|
| 294 |
-
"grad_norm":
|
| 295 |
"learning_rate": 1.0471070148489503e-06,
|
| 296 |
-
"loss": 0.
|
| 297 |
"step": 410
|
| 298 |
},
|
| 299 |
{
|
| 300 |
"epoch": 0.02688,
|
| 301 |
-
"grad_norm":
|
| 302 |
"learning_rate": 1.0727086533538148e-06,
|
| 303 |
-
"loss": 0.
|
| 304 |
"step": 420
|
| 305 |
},
|
| 306 |
{
|
| 307 |
"epoch": 0.02752,
|
| 308 |
-
"grad_norm":
|
| 309 |
"learning_rate": 1.0983102918586791e-06,
|
| 310 |
-
"loss": 0.
|
| 311 |
"step": 430
|
| 312 |
},
|
| 313 |
{
|
| 314 |
"epoch": 0.02816,
|
| 315 |
-
"grad_norm":
|
| 316 |
"learning_rate": 1.1239119303635434e-06,
|
| 317 |
-
"loss": 0.
|
| 318 |
"step": 440
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"epoch": 0.0288,
|
| 322 |
-
"grad_norm":
|
| 323 |
"learning_rate": 1.1495135688684077e-06,
|
| 324 |
-
"loss": 0.
|
| 325 |
"step": 450
|
| 326 |
},
|
| 327 |
{
|
| 328 |
"epoch": 0.02944,
|
| 329 |
-
"grad_norm":
|
| 330 |
"learning_rate": 1.175115207373272e-06,
|
| 331 |
-
"loss": 0.
|
| 332 |
"step": 460
|
| 333 |
},
|
| 334 |
{
|
| 335 |
"epoch": 0.03008,
|
| 336 |
-
"grad_norm":
|
| 337 |
"learning_rate": 1.2007168458781362e-06,
|
| 338 |
-
"loss": 0.
|
| 339 |
"step": 470
|
| 340 |
},
|
| 341 |
{
|
| 342 |
"epoch": 0.03072,
|
| 343 |
-
"grad_norm":
|
| 344 |
"learning_rate": 1.2263184843830007e-06,
|
| 345 |
-
"loss": 0.
|
| 346 |
"step": 480
|
| 347 |
},
|
| 348 |
{
|
| 349 |
"epoch": 0.03136,
|
| 350 |
-
"grad_norm":
|
| 351 |
"learning_rate": 1.251920122887865e-06,
|
| 352 |
-
"loss": 0.
|
| 353 |
"step": 490
|
| 354 |
},
|
| 355 |
{
|
| 356 |
"epoch": 0.032,
|
| 357 |
-
"grad_norm":
|
| 358 |
"learning_rate": 1.2775217613927293e-06,
|
| 359 |
-
"loss": 0.
|
| 360 |
"step": 500
|
| 361 |
},
|
| 362 |
{
|
| 363 |
"epoch": 0.03264,
|
| 364 |
-
"grad_norm":
|
| 365 |
"learning_rate": 1.3031233998975938e-06,
|
| 366 |
-
"loss": 0.
|
| 367 |
"step": 510
|
| 368 |
},
|
| 369 |
{
|
| 370 |
"epoch": 0.03328,
|
| 371 |
-
"grad_norm":
|
| 372 |
"learning_rate": 1.3287250384024578e-06,
|
| 373 |
-
"loss": 0.
|
| 374 |
"step": 520
|
| 375 |
},
|
| 376 |
{
|
| 377 |
"epoch": 0.03392,
|
| 378 |
-
"grad_norm":
|
| 379 |
"learning_rate": 1.354326676907322e-06,
|
| 380 |
-
"loss": 0.
|
| 381 |
"step": 530
|
| 382 |
},
|
| 383 |
{
|
| 384 |
"epoch": 0.03456,
|
| 385 |
-
"grad_norm":
|
| 386 |
"learning_rate": 1.3799283154121864e-06,
|
| 387 |
-
"loss": 0.
|
| 388 |
"step": 540
|
| 389 |
},
|
| 390 |
{
|
| 391 |
"epoch": 0.0352,
|
| 392 |
-
"grad_norm":
|
| 393 |
"learning_rate": 1.4055299539170509e-06,
|
| 394 |
-
"loss": 0.
|
| 395 |
"step": 550
|
| 396 |
},
|
| 397 |
{
|
| 398 |
"epoch": 0.03584,
|
| 399 |
-
"grad_norm":
|
| 400 |
"learning_rate": 1.4311315924219151e-06,
|
| 401 |
-
"loss": 0.
|
| 402 |
"step": 560
|
| 403 |
},
|
| 404 |
{
|
| 405 |
"epoch": 0.03648,
|
| 406 |
-
"grad_norm":
|
| 407 |
"learning_rate": 1.4567332309267796e-06,
|
| 408 |
-
"loss": 0.
|
| 409 |
"step": 570
|
| 410 |
},
|
| 411 |
{
|
| 412 |
"epoch": 0.03712,
|
| 413 |
-
"grad_norm":
|
| 414 |
"learning_rate": 1.4823348694316437e-06,
|
| 415 |
-
"loss": 0.
|
| 416 |
"step": 580
|
| 417 |
},
|
| 418 |
{
|
| 419 |
"epoch": 0.03776,
|
| 420 |
-
"grad_norm":
|
| 421 |
"learning_rate": 1.507936507936508e-06,
|
| 422 |
-
"loss": 0.
|
| 423 |
"step": 590
|
| 424 |
},
|
| 425 |
{
|
| 426 |
"epoch": 0.0384,
|
| 427 |
-
"grad_norm":
|
| 428 |
"learning_rate": 1.5335381464413722e-06,
|
| 429 |
-
"loss": 0.
|
| 430 |
"step": 600
|
| 431 |
}
|
| 432 |
],
|
|
@@ -456,7 +456,7 @@
|
|
| 456 |
"attributes": {}
|
| 457 |
}
|
| 458 |
},
|
| 459 |
-
"total_flos":
|
| 460 |
"train_batch_size": 32,
|
| 461 |
"trial_name": null,
|
| 462 |
"trial_params": null
|
|
|
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"epoch": 0.01344,
|
| 154 |
+
"grad_norm": 144219.625,
|
| 155 |
"learning_rate": 5.350742447516642e-07,
|
| 156 |
+
"loss": 0.7218,
|
| 157 |
"step": 210
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"epoch": 0.01408,
|
| 161 |
+
"grad_norm": 105046.0234375,
|
| 162 |
"learning_rate": 5.606758832565284e-07,
|
| 163 |
+
"loss": 0.718,
|
| 164 |
"step": 220
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"epoch": 0.01472,
|
| 168 |
+
"grad_norm": 126142.4296875,
|
| 169 |
"learning_rate": 5.862775217613928e-07,
|
| 170 |
+
"loss": 0.7107,
|
| 171 |
"step": 230
|
| 172 |
},
|
| 173 |
{
|
| 174 |
"epoch": 0.01536,
|
| 175 |
+
"grad_norm": 92423.2265625,
|
| 176 |
"learning_rate": 6.118791602662571e-07,
|
| 177 |
+
"loss": 0.7271,
|
| 178 |
"step": 240
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"epoch": 0.016,
|
| 182 |
+
"grad_norm": 98091.828125,
|
| 183 |
"learning_rate": 6.374807987711214e-07,
|
| 184 |
+
"loss": 0.7123,
|
| 185 |
"step": 250
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"epoch": 0.01664,
|
| 189 |
+
"grad_norm": 131949.578125,
|
| 190 |
"learning_rate": 6.630824372759858e-07,
|
| 191 |
+
"loss": 0.7204,
|
| 192 |
"step": 260
|
| 193 |
},
|
| 194 |
{
|
| 195 |
"epoch": 0.01728,
|
| 196 |
+
"grad_norm": 112228.5625,
|
| 197 |
"learning_rate": 6.8868407578085e-07,
|
| 198 |
+
"loss": 0.722,
|
| 199 |
"step": 270
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"epoch": 0.01792,
|
| 203 |
+
"grad_norm": 64587.734375,
|
| 204 |
"learning_rate": 7.142857142857143e-07,
|
| 205 |
+
"loss": 0.7263,
|
| 206 |
"step": 280
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"epoch": 0.01856,
|
| 210 |
+
"grad_norm": 99893.203125,
|
| 211 |
"learning_rate": 7.398873527905787e-07,
|
| 212 |
+
"loss": 0.7169,
|
| 213 |
"step": 290
|
| 214 |
},
|
| 215 |
{
|
| 216 |
"epoch": 0.0192,
|
| 217 |
+
"grad_norm": 135749.875,
|
| 218 |
"learning_rate": 7.65488991295443e-07,
|
| 219 |
"loss": 0.7122,
|
| 220 |
"step": 300
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 0.01984,
|
| 224 |
+
"grad_norm": 103292.5703125,
|
| 225 |
"learning_rate": 7.910906298003073e-07,
|
| 226 |
+
"loss": 0.7183,
|
| 227 |
"step": 310
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"epoch": 0.02048,
|
| 231 |
+
"grad_norm": 86927.28125,
|
| 232 |
"learning_rate": 8.166922683051716e-07,
|
| 233 |
+
"loss": 0.7192,
|
| 234 |
"step": 320
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 0.02112,
|
| 238 |
+
"grad_norm": 153738.390625,
|
| 239 |
"learning_rate": 8.422939068100359e-07,
|
| 240 |
+
"loss": 0.711,
|
| 241 |
"step": 330
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 0.02176,
|
| 245 |
+
"grad_norm": 69994.7734375,
|
| 246 |
"learning_rate": 8.678955453149002e-07,
|
| 247 |
+
"loss": 0.7176,
|
| 248 |
"step": 340
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 0.0224,
|
| 252 |
+
"grad_norm": 141370.6875,
|
| 253 |
"learning_rate": 8.934971838197646e-07,
|
| 254 |
+
"loss": 0.7105,
|
| 255 |
"step": 350
|
| 256 |
},
|
| 257 |
{
|
| 258 |
"epoch": 0.02304,
|
| 259 |
+
"grad_norm": 71139.453125,
|
| 260 |
"learning_rate": 9.190988223246289e-07,
|
| 261 |
+
"loss": 0.7126,
|
| 262 |
"step": 360
|
| 263 |
},
|
| 264 |
{
|
| 265 |
"epoch": 0.02368,
|
| 266 |
+
"grad_norm": 82039.1953125,
|
| 267 |
"learning_rate": 9.447004608294931e-07,
|
| 268 |
+
"loss": 0.7078,
|
| 269 |
"step": 370
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"epoch": 0.02432,
|
| 273 |
+
"grad_norm": 71275.7890625,
|
| 274 |
"learning_rate": 9.703020993343575e-07,
|
| 275 |
+
"loss": 0.7145,
|
| 276 |
"step": 380
|
| 277 |
},
|
| 278 |
{
|
| 279 |
"epoch": 0.02496,
|
| 280 |
+
"grad_norm": 145801.21875,
|
| 281 |
"learning_rate": 9.959037378392218e-07,
|
| 282 |
+
"loss": 0.7102,
|
| 283 |
"step": 390
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"epoch": 0.0256,
|
| 287 |
+
"grad_norm": 171507.0,
|
| 288 |
"learning_rate": 1.021505376344086e-06,
|
| 289 |
+
"loss": 0.7123,
|
| 290 |
"step": 400
|
| 291 |
},
|
| 292 |
{
|
| 293 |
"epoch": 0.02624,
|
| 294 |
+
"grad_norm": 79134.203125,
|
| 295 |
"learning_rate": 1.0471070148489503e-06,
|
| 296 |
+
"loss": 0.7083,
|
| 297 |
"step": 410
|
| 298 |
},
|
| 299 |
{
|
| 300 |
"epoch": 0.02688,
|
| 301 |
+
"grad_norm": 69231.640625,
|
| 302 |
"learning_rate": 1.0727086533538148e-06,
|
| 303 |
+
"loss": 0.7105,
|
| 304 |
"step": 420
|
| 305 |
},
|
| 306 |
{
|
| 307 |
"epoch": 0.02752,
|
| 308 |
+
"grad_norm": 113099.3984375,
|
| 309 |
"learning_rate": 1.0983102918586791e-06,
|
| 310 |
+
"loss": 0.7141,
|
| 311 |
"step": 430
|
| 312 |
},
|
| 313 |
{
|
| 314 |
"epoch": 0.02816,
|
| 315 |
+
"grad_norm": 121013.734375,
|
| 316 |
"learning_rate": 1.1239119303635434e-06,
|
| 317 |
+
"loss": 0.7146,
|
| 318 |
"step": 440
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"epoch": 0.0288,
|
| 322 |
+
"grad_norm": 89184.609375,
|
| 323 |
"learning_rate": 1.1495135688684077e-06,
|
| 324 |
+
"loss": 0.7133,
|
| 325 |
"step": 450
|
| 326 |
},
|
| 327 |
{
|
| 328 |
"epoch": 0.02944,
|
| 329 |
+
"grad_norm": 176246.890625,
|
| 330 |
"learning_rate": 1.175115207373272e-06,
|
| 331 |
+
"loss": 0.7086,
|
| 332 |
"step": 460
|
| 333 |
},
|
| 334 |
{
|
| 335 |
"epoch": 0.03008,
|
| 336 |
+
"grad_norm": 88161.2265625,
|
| 337 |
"learning_rate": 1.2007168458781362e-06,
|
| 338 |
+
"loss": 0.709,
|
| 339 |
"step": 470
|
| 340 |
},
|
| 341 |
{
|
| 342 |
"epoch": 0.03072,
|
| 343 |
+
"grad_norm": 74441.015625,
|
| 344 |
"learning_rate": 1.2263184843830007e-06,
|
| 345 |
+
"loss": 0.7023,
|
| 346 |
"step": 480
|
| 347 |
},
|
| 348 |
{
|
| 349 |
"epoch": 0.03136,
|
| 350 |
+
"grad_norm": 96409.40625,
|
| 351 |
"learning_rate": 1.251920122887865e-06,
|
| 352 |
+
"loss": 0.715,
|
| 353 |
"step": 490
|
| 354 |
},
|
| 355 |
{
|
| 356 |
"epoch": 0.032,
|
| 357 |
+
"grad_norm": 81090.6484375,
|
| 358 |
"learning_rate": 1.2775217613927293e-06,
|
| 359 |
+
"loss": 0.7109,
|
| 360 |
"step": 500
|
| 361 |
},
|
| 362 |
{
|
| 363 |
"epoch": 0.03264,
|
| 364 |
+
"grad_norm": 98153.8828125,
|
| 365 |
"learning_rate": 1.3031233998975938e-06,
|
| 366 |
+
"loss": 0.7092,
|
| 367 |
"step": 510
|
| 368 |
},
|
| 369 |
{
|
| 370 |
"epoch": 0.03328,
|
| 371 |
+
"grad_norm": 78782.546875,
|
| 372 |
"learning_rate": 1.3287250384024578e-06,
|
| 373 |
+
"loss": 0.7048,
|
| 374 |
"step": 520
|
| 375 |
},
|
| 376 |
{
|
| 377 |
"epoch": 0.03392,
|
| 378 |
+
"grad_norm": 110360.5,
|
| 379 |
"learning_rate": 1.354326676907322e-06,
|
| 380 |
+
"loss": 0.7108,
|
| 381 |
"step": 530
|
| 382 |
},
|
| 383 |
{
|
| 384 |
"epoch": 0.03456,
|
| 385 |
+
"grad_norm": 88462.0703125,
|
| 386 |
"learning_rate": 1.3799283154121864e-06,
|
| 387 |
+
"loss": 0.7041,
|
| 388 |
"step": 540
|
| 389 |
},
|
| 390 |
{
|
| 391 |
"epoch": 0.0352,
|
| 392 |
+
"grad_norm": 97624.7421875,
|
| 393 |
"learning_rate": 1.4055299539170509e-06,
|
| 394 |
+
"loss": 0.7114,
|
| 395 |
"step": 550
|
| 396 |
},
|
| 397 |
{
|
| 398 |
"epoch": 0.03584,
|
| 399 |
+
"grad_norm": 99471.4375,
|
| 400 |
"learning_rate": 1.4311315924219151e-06,
|
| 401 |
+
"loss": 0.7191,
|
| 402 |
"step": 560
|
| 403 |
},
|
| 404 |
{
|
| 405 |
"epoch": 0.03648,
|
| 406 |
+
"grad_norm": 79087.90625,
|
| 407 |
"learning_rate": 1.4567332309267796e-06,
|
| 408 |
+
"loss": 0.7022,
|
| 409 |
"step": 570
|
| 410 |
},
|
| 411 |
{
|
| 412 |
"epoch": 0.03712,
|
| 413 |
+
"grad_norm": 65275.0,
|
| 414 |
"learning_rate": 1.4823348694316437e-06,
|
| 415 |
+
"loss": 0.7088,
|
| 416 |
"step": 580
|
| 417 |
},
|
| 418 |
{
|
| 419 |
"epoch": 0.03776,
|
| 420 |
+
"grad_norm": 153826.28125,
|
| 421 |
"learning_rate": 1.507936507936508e-06,
|
| 422 |
+
"loss": 0.7079,
|
| 423 |
"step": 590
|
| 424 |
},
|
| 425 |
{
|
| 426 |
"epoch": 0.0384,
|
| 427 |
+
"grad_norm": 64280.38671875,
|
| 428 |
"learning_rate": 1.5335381464413722e-06,
|
| 429 |
+
"loss": 0.7018,
|
| 430 |
"step": 600
|
| 431 |
}
|
| 432 |
],
|
|
|
|
| 456 |
"attributes": {}
|
| 457 |
}
|
| 458 |
},
|
| 459 |
+
"total_flos": 5049545152264320.0,
|
| 460 |
"train_batch_size": 32,
|
| 461 |
"trial_name": null,
|
| 462 |
"trial_params": null
|
graphcodebert-robust/checkpoint-600/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5841
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82e524f8f7de87947806acfb17c136195f8d3668b26513da260f1a2f14442156
|
| 3 |
size 5841
|
graphcodebert-robust/checkpoint-800/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498612824
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c038fee615aa3289704b6c8446543a8902b07b09cc79c21ef54c5fe8590f914e
|
| 3 |
size 498612824
|
graphcodebert-robust/checkpoint-800/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4741923
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:554c3a189d14a7538050afbd400501c37378790e4b17a4a388758bad08d098a0
|
| 3 |
size 4741923
|
graphcodebert-robust/checkpoint-800/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14581
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc41893e18124a5b4346e5ad2eec904a9b13636e7df7f9d4e28520206d9aac00
|
| 3 |
size 14581
|
graphcodebert-robust/checkpoint-800/trainer_state.json
CHANGED
|
@@ -151,422 +151,422 @@
|
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"epoch": 0.01344,
|
| 154 |
-
"grad_norm":
|
| 155 |
"learning_rate": 5.350742447516642e-07,
|
| 156 |
-
"loss": 0.
|
| 157 |
"step": 210
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"epoch": 0.01408,
|
| 161 |
-
"grad_norm":
|
| 162 |
"learning_rate": 5.606758832565284e-07,
|
| 163 |
-
"loss": 0.
|
| 164 |
"step": 220
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"epoch": 0.01472,
|
| 168 |
-
"grad_norm":
|
| 169 |
"learning_rate": 5.862775217613928e-07,
|
| 170 |
-
"loss": 0.
|
| 171 |
"step": 230
|
| 172 |
},
|
| 173 |
{
|
| 174 |
"epoch": 0.01536,
|
| 175 |
-
"grad_norm":
|
| 176 |
"learning_rate": 6.118791602662571e-07,
|
| 177 |
-
"loss": 0.
|
| 178 |
"step": 240
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"epoch": 0.016,
|
| 182 |
-
"grad_norm":
|
| 183 |
"learning_rate": 6.374807987711214e-07,
|
| 184 |
-
"loss": 0.
|
| 185 |
"step": 250
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"epoch": 0.01664,
|
| 189 |
-
"grad_norm":
|
| 190 |
"learning_rate": 6.630824372759858e-07,
|
| 191 |
-
"loss": 0.
|
| 192 |
"step": 260
|
| 193 |
},
|
| 194 |
{
|
| 195 |
"epoch": 0.01728,
|
| 196 |
-
"grad_norm":
|
| 197 |
"learning_rate": 6.8868407578085e-07,
|
| 198 |
-
"loss": 0.
|
| 199 |
"step": 270
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"epoch": 0.01792,
|
| 203 |
-
"grad_norm":
|
| 204 |
"learning_rate": 7.142857142857143e-07,
|
| 205 |
-
"loss": 0.
|
| 206 |
"step": 280
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"epoch": 0.01856,
|
| 210 |
-
"grad_norm":
|
| 211 |
"learning_rate": 7.398873527905787e-07,
|
| 212 |
-
"loss": 0.
|
| 213 |
"step": 290
|
| 214 |
},
|
| 215 |
{
|
| 216 |
"epoch": 0.0192,
|
| 217 |
-
"grad_norm":
|
| 218 |
"learning_rate": 7.65488991295443e-07,
|
| 219 |
"loss": 0.7122,
|
| 220 |
"step": 300
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 0.01984,
|
| 224 |
-
"grad_norm":
|
| 225 |
"learning_rate": 7.910906298003073e-07,
|
| 226 |
-
"loss": 0.
|
| 227 |
"step": 310
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"epoch": 0.02048,
|
| 231 |
-
"grad_norm":
|
| 232 |
"learning_rate": 8.166922683051716e-07,
|
| 233 |
-
"loss": 0.
|
| 234 |
"step": 320
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 0.02112,
|
| 238 |
-
"grad_norm":
|
| 239 |
"learning_rate": 8.422939068100359e-07,
|
| 240 |
-
"loss": 0.
|
| 241 |
"step": 330
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 0.02176,
|
| 245 |
-
"grad_norm":
|
| 246 |
"learning_rate": 8.678955453149002e-07,
|
| 247 |
-
"loss": 0.
|
| 248 |
"step": 340
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 0.0224,
|
| 252 |
-
"grad_norm":
|
| 253 |
"learning_rate": 8.934971838197646e-07,
|
| 254 |
-
"loss": 0.
|
| 255 |
"step": 350
|
| 256 |
},
|
| 257 |
{
|
| 258 |
"epoch": 0.02304,
|
| 259 |
-
"grad_norm":
|
| 260 |
"learning_rate": 9.190988223246289e-07,
|
| 261 |
-
"loss": 0.
|
| 262 |
"step": 360
|
| 263 |
},
|
| 264 |
{
|
| 265 |
"epoch": 0.02368,
|
| 266 |
-
"grad_norm":
|
| 267 |
"learning_rate": 9.447004608294931e-07,
|
| 268 |
-
"loss": 0.
|
| 269 |
"step": 370
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"epoch": 0.02432,
|
| 273 |
-
"grad_norm":
|
| 274 |
"learning_rate": 9.703020993343575e-07,
|
| 275 |
-
"loss": 0.
|
| 276 |
"step": 380
|
| 277 |
},
|
| 278 |
{
|
| 279 |
"epoch": 0.02496,
|
| 280 |
-
"grad_norm":
|
| 281 |
"learning_rate": 9.959037378392218e-07,
|
| 282 |
-
"loss": 0.
|
| 283 |
"step": 390
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"epoch": 0.0256,
|
| 287 |
-
"grad_norm":
|
| 288 |
"learning_rate": 1.021505376344086e-06,
|
| 289 |
-
"loss": 0.
|
| 290 |
"step": 400
|
| 291 |
},
|
| 292 |
{
|
| 293 |
"epoch": 0.02624,
|
| 294 |
-
"grad_norm":
|
| 295 |
"learning_rate": 1.0471070148489503e-06,
|
| 296 |
-
"loss": 0.
|
| 297 |
"step": 410
|
| 298 |
},
|
| 299 |
{
|
| 300 |
"epoch": 0.02688,
|
| 301 |
-
"grad_norm":
|
| 302 |
"learning_rate": 1.0727086533538148e-06,
|
| 303 |
-
"loss": 0.
|
| 304 |
"step": 420
|
| 305 |
},
|
| 306 |
{
|
| 307 |
"epoch": 0.02752,
|
| 308 |
-
"grad_norm":
|
| 309 |
"learning_rate": 1.0983102918586791e-06,
|
| 310 |
-
"loss": 0.
|
| 311 |
"step": 430
|
| 312 |
},
|
| 313 |
{
|
| 314 |
"epoch": 0.02816,
|
| 315 |
-
"grad_norm":
|
| 316 |
"learning_rate": 1.1239119303635434e-06,
|
| 317 |
-
"loss": 0.
|
| 318 |
"step": 440
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"epoch": 0.0288,
|
| 322 |
-
"grad_norm":
|
| 323 |
"learning_rate": 1.1495135688684077e-06,
|
| 324 |
-
"loss": 0.
|
| 325 |
"step": 450
|
| 326 |
},
|
| 327 |
{
|
| 328 |
"epoch": 0.02944,
|
| 329 |
-
"grad_norm":
|
| 330 |
"learning_rate": 1.175115207373272e-06,
|
| 331 |
-
"loss": 0.
|
| 332 |
"step": 460
|
| 333 |
},
|
| 334 |
{
|
| 335 |
"epoch": 0.03008,
|
| 336 |
-
"grad_norm":
|
| 337 |
"learning_rate": 1.2007168458781362e-06,
|
| 338 |
-
"loss": 0.
|
| 339 |
"step": 470
|
| 340 |
},
|
| 341 |
{
|
| 342 |
"epoch": 0.03072,
|
| 343 |
-
"grad_norm":
|
| 344 |
"learning_rate": 1.2263184843830007e-06,
|
| 345 |
-
"loss": 0.
|
| 346 |
"step": 480
|
| 347 |
},
|
| 348 |
{
|
| 349 |
"epoch": 0.03136,
|
| 350 |
-
"grad_norm":
|
| 351 |
"learning_rate": 1.251920122887865e-06,
|
| 352 |
-
"loss": 0.
|
| 353 |
"step": 490
|
| 354 |
},
|
| 355 |
{
|
| 356 |
"epoch": 0.032,
|
| 357 |
-
"grad_norm":
|
| 358 |
"learning_rate": 1.2775217613927293e-06,
|
| 359 |
-
"loss": 0.
|
| 360 |
"step": 500
|
| 361 |
},
|
| 362 |
{
|
| 363 |
"epoch": 0.03264,
|
| 364 |
-
"grad_norm":
|
| 365 |
"learning_rate": 1.3031233998975938e-06,
|
| 366 |
-
"loss": 0.
|
| 367 |
"step": 510
|
| 368 |
},
|
| 369 |
{
|
| 370 |
"epoch": 0.03328,
|
| 371 |
-
"grad_norm":
|
| 372 |
"learning_rate": 1.3287250384024578e-06,
|
| 373 |
-
"loss": 0.
|
| 374 |
"step": 520
|
| 375 |
},
|
| 376 |
{
|
| 377 |
"epoch": 0.03392,
|
| 378 |
-
"grad_norm":
|
| 379 |
"learning_rate": 1.354326676907322e-06,
|
| 380 |
-
"loss": 0.
|
| 381 |
"step": 530
|
| 382 |
},
|
| 383 |
{
|
| 384 |
"epoch": 0.03456,
|
| 385 |
-
"grad_norm":
|
| 386 |
"learning_rate": 1.3799283154121864e-06,
|
| 387 |
-
"loss": 0.
|
| 388 |
"step": 540
|
| 389 |
},
|
| 390 |
{
|
| 391 |
"epoch": 0.0352,
|
| 392 |
-
"grad_norm":
|
| 393 |
"learning_rate": 1.4055299539170509e-06,
|
| 394 |
-
"loss": 0.
|
| 395 |
"step": 550
|
| 396 |
},
|
| 397 |
{
|
| 398 |
"epoch": 0.03584,
|
| 399 |
-
"grad_norm":
|
| 400 |
"learning_rate": 1.4311315924219151e-06,
|
| 401 |
-
"loss": 0.
|
| 402 |
"step": 560
|
| 403 |
},
|
| 404 |
{
|
| 405 |
"epoch": 0.03648,
|
| 406 |
-
"grad_norm":
|
| 407 |
"learning_rate": 1.4567332309267796e-06,
|
| 408 |
-
"loss": 0.
|
| 409 |
"step": 570
|
| 410 |
},
|
| 411 |
{
|
| 412 |
"epoch": 0.03712,
|
| 413 |
-
"grad_norm":
|
| 414 |
"learning_rate": 1.4823348694316437e-06,
|
| 415 |
-
"loss": 0.
|
| 416 |
"step": 580
|
| 417 |
},
|
| 418 |
{
|
| 419 |
"epoch": 0.03776,
|
| 420 |
-
"grad_norm":
|
| 421 |
"learning_rate": 1.507936507936508e-06,
|
| 422 |
-
"loss": 0.
|
| 423 |
"step": 590
|
| 424 |
},
|
| 425 |
{
|
| 426 |
"epoch": 0.0384,
|
| 427 |
-
"grad_norm":
|
| 428 |
"learning_rate": 1.5335381464413722e-06,
|
| 429 |
-
"loss": 0.
|
| 430 |
"step": 600
|
| 431 |
},
|
| 432 |
{
|
| 433 |
"epoch": 0.03904,
|
| 434 |
-
"grad_norm":
|
| 435 |
"learning_rate": 1.5591397849462367e-06,
|
| 436 |
-
"loss": 0.
|
| 437 |
"step": 610
|
| 438 |
},
|
| 439 |
{
|
| 440 |
"epoch": 0.03968,
|
| 441 |
-
"grad_norm":
|
| 442 |
"learning_rate": 1.584741423451101e-06,
|
| 443 |
-
"loss": 0.
|
| 444 |
"step": 620
|
| 445 |
},
|
| 446 |
{
|
| 447 |
"epoch": 0.04032,
|
| 448 |
-
"grad_norm":
|
| 449 |
"learning_rate": 1.6103430619559655e-06,
|
| 450 |
-
"loss": 0.
|
| 451 |
"step": 630
|
| 452 |
},
|
| 453 |
{
|
| 454 |
"epoch": 0.04096,
|
| 455 |
-
"grad_norm":
|
| 456 |
"learning_rate": 1.6359447004608298e-06,
|
| 457 |
-
"loss": 0.
|
| 458 |
"step": 640
|
| 459 |
},
|
| 460 |
{
|
| 461 |
"epoch": 0.0416,
|
| 462 |
-
"grad_norm":
|
| 463 |
"learning_rate": 1.6615463389656938e-06,
|
| 464 |
-
"loss": 0.
|
| 465 |
"step": 650
|
| 466 |
},
|
| 467 |
{
|
| 468 |
"epoch": 0.04224,
|
| 469 |
-
"grad_norm":
|
| 470 |
"learning_rate": 1.6871479774705581e-06,
|
| 471 |
-
"loss": 0.
|
| 472 |
"step": 660
|
| 473 |
},
|
| 474 |
{
|
| 475 |
"epoch": 0.04288,
|
| 476 |
-
"grad_norm":
|
| 477 |
"learning_rate": 1.7127496159754226e-06,
|
| 478 |
-
"loss": 0.
|
| 479 |
"step": 670
|
| 480 |
},
|
| 481 |
{
|
| 482 |
"epoch": 0.04352,
|
| 483 |
-
"grad_norm":
|
| 484 |
"learning_rate": 1.7383512544802869e-06,
|
| 485 |
-
"loss": 0.
|
| 486 |
"step": 680
|
| 487 |
},
|
| 488 |
{
|
| 489 |
"epoch": 0.04416,
|
| 490 |
-
"grad_norm":
|
| 491 |
"learning_rate": 1.7639528929851512e-06,
|
| 492 |
-
"loss": 0.
|
| 493 |
"step": 690
|
| 494 |
},
|
| 495 |
{
|
| 496 |
"epoch": 0.0448,
|
| 497 |
-
"grad_norm":
|
| 498 |
"learning_rate": 1.7895545314900157e-06,
|
| 499 |
-
"loss": 0.
|
| 500 |
"step": 700
|
| 501 |
},
|
| 502 |
{
|
| 503 |
"epoch": 0.04544,
|
| 504 |
-
"grad_norm":
|
| 505 |
"learning_rate": 1.8151561699948797e-06,
|
| 506 |
-
"loss": 0.
|
| 507 |
"step": 710
|
| 508 |
},
|
| 509 |
{
|
| 510 |
"epoch": 0.04608,
|
| 511 |
-
"grad_norm":
|
| 512 |
"learning_rate": 1.840757808499744e-06,
|
| 513 |
-
"loss": 0.
|
| 514 |
"step": 720
|
| 515 |
},
|
| 516 |
{
|
| 517 |
"epoch": 0.04672,
|
| 518 |
-
"grad_norm":
|
| 519 |
"learning_rate": 1.8663594470046085e-06,
|
| 520 |
-
"loss": 0.
|
| 521 |
"step": 730
|
| 522 |
},
|
| 523 |
{
|
| 524 |
"epoch": 0.04736,
|
| 525 |
-
"grad_norm":
|
| 526 |
"learning_rate": 1.8919610855094728e-06,
|
| 527 |
-
"loss": 0.
|
| 528 |
"step": 740
|
| 529 |
},
|
| 530 |
{
|
| 531 |
"epoch": 0.048,
|
| 532 |
-
"grad_norm":
|
| 533 |
"learning_rate": 1.9175627240143373e-06,
|
| 534 |
-
"loss": 0.
|
| 535 |
"step": 750
|
| 536 |
},
|
| 537 |
{
|
| 538 |
"epoch": 0.04864,
|
| 539 |
-
"grad_norm":
|
| 540 |
"learning_rate": 1.9431643625192015e-06,
|
| 541 |
-
"loss": 0.
|
| 542 |
"step": 760
|
| 543 |
},
|
| 544 |
{
|
| 545 |
"epoch": 0.04928,
|
| 546 |
-
"grad_norm":
|
| 547 |
"learning_rate": 1.9687660010240654e-06,
|
| 548 |
-
"loss": 0.
|
| 549 |
"step": 770
|
| 550 |
},
|
| 551 |
{
|
| 552 |
"epoch": 0.04992,
|
| 553 |
-
"grad_norm":
|
| 554 |
"learning_rate": 1.99436763952893e-06,
|
| 555 |
-
"loss": 0.
|
| 556 |
"step": 780
|
| 557 |
},
|
| 558 |
{
|
| 559 |
"epoch": 0.05056,
|
| 560 |
-
"grad_norm":
|
| 561 |
"learning_rate": 2.0199692780337944e-06,
|
| 562 |
-
"loss": 0.
|
| 563 |
"step": 790
|
| 564 |
},
|
| 565 |
{
|
| 566 |
"epoch": 0.0512,
|
| 567 |
-
"grad_norm":
|
| 568 |
"learning_rate": 2.0455709165386586e-06,
|
| 569 |
-
"loss": 0.
|
| 570 |
"step": 800
|
| 571 |
}
|
| 572 |
],
|
|
@@ -596,7 +596,7 @@
|
|
| 596 |
"attributes": {}
|
| 597 |
}
|
| 598 |
},
|
| 599 |
-
"total_flos":
|
| 600 |
"train_batch_size": 32,
|
| 601 |
"trial_name": null,
|
| 602 |
"trial_params": null
|
|
|
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"epoch": 0.01344,
|
| 154 |
+
"grad_norm": 144219.625,
|
| 155 |
"learning_rate": 5.350742447516642e-07,
|
| 156 |
+
"loss": 0.7218,
|
| 157 |
"step": 210
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"epoch": 0.01408,
|
| 161 |
+
"grad_norm": 105046.0234375,
|
| 162 |
"learning_rate": 5.606758832565284e-07,
|
| 163 |
+
"loss": 0.718,
|
| 164 |
"step": 220
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"epoch": 0.01472,
|
| 168 |
+
"grad_norm": 126142.4296875,
|
| 169 |
"learning_rate": 5.862775217613928e-07,
|
| 170 |
+
"loss": 0.7107,
|
| 171 |
"step": 230
|
| 172 |
},
|
| 173 |
{
|
| 174 |
"epoch": 0.01536,
|
| 175 |
+
"grad_norm": 92423.2265625,
|
| 176 |
"learning_rate": 6.118791602662571e-07,
|
| 177 |
+
"loss": 0.7271,
|
| 178 |
"step": 240
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"epoch": 0.016,
|
| 182 |
+
"grad_norm": 98091.828125,
|
| 183 |
"learning_rate": 6.374807987711214e-07,
|
| 184 |
+
"loss": 0.7123,
|
| 185 |
"step": 250
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"epoch": 0.01664,
|
| 189 |
+
"grad_norm": 131949.578125,
|
| 190 |
"learning_rate": 6.630824372759858e-07,
|
| 191 |
+
"loss": 0.7204,
|
| 192 |
"step": 260
|
| 193 |
},
|
| 194 |
{
|
| 195 |
"epoch": 0.01728,
|
| 196 |
+
"grad_norm": 112228.5625,
|
| 197 |
"learning_rate": 6.8868407578085e-07,
|
| 198 |
+
"loss": 0.722,
|
| 199 |
"step": 270
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"epoch": 0.01792,
|
| 203 |
+
"grad_norm": 64587.734375,
|
| 204 |
"learning_rate": 7.142857142857143e-07,
|
| 205 |
+
"loss": 0.7263,
|
| 206 |
"step": 280
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"epoch": 0.01856,
|
| 210 |
+
"grad_norm": 99893.203125,
|
| 211 |
"learning_rate": 7.398873527905787e-07,
|
| 212 |
+
"loss": 0.7169,
|
| 213 |
"step": 290
|
| 214 |
},
|
| 215 |
{
|
| 216 |
"epoch": 0.0192,
|
| 217 |
+
"grad_norm": 135749.875,
|
| 218 |
"learning_rate": 7.65488991295443e-07,
|
| 219 |
"loss": 0.7122,
|
| 220 |
"step": 300
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 0.01984,
|
| 224 |
+
"grad_norm": 103292.5703125,
|
| 225 |
"learning_rate": 7.910906298003073e-07,
|
| 226 |
+
"loss": 0.7183,
|
| 227 |
"step": 310
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"epoch": 0.02048,
|
| 231 |
+
"grad_norm": 86927.28125,
|
| 232 |
"learning_rate": 8.166922683051716e-07,
|
| 233 |
+
"loss": 0.7192,
|
| 234 |
"step": 320
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 0.02112,
|
| 238 |
+
"grad_norm": 153738.390625,
|
| 239 |
"learning_rate": 8.422939068100359e-07,
|
| 240 |
+
"loss": 0.711,
|
| 241 |
"step": 330
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 0.02176,
|
| 245 |
+
"grad_norm": 69994.7734375,
|
| 246 |
"learning_rate": 8.678955453149002e-07,
|
| 247 |
+
"loss": 0.7176,
|
| 248 |
"step": 340
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 0.0224,
|
| 252 |
+
"grad_norm": 141370.6875,
|
| 253 |
"learning_rate": 8.934971838197646e-07,
|
| 254 |
+
"loss": 0.7105,
|
| 255 |
"step": 350
|
| 256 |
},
|
| 257 |
{
|
| 258 |
"epoch": 0.02304,
|
| 259 |
+
"grad_norm": 71139.453125,
|
| 260 |
"learning_rate": 9.190988223246289e-07,
|
| 261 |
+
"loss": 0.7126,
|
| 262 |
"step": 360
|
| 263 |
},
|
| 264 |
{
|
| 265 |
"epoch": 0.02368,
|
| 266 |
+
"grad_norm": 82039.1953125,
|
| 267 |
"learning_rate": 9.447004608294931e-07,
|
| 268 |
+
"loss": 0.7078,
|
| 269 |
"step": 370
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"epoch": 0.02432,
|
| 273 |
+
"grad_norm": 71275.7890625,
|
| 274 |
"learning_rate": 9.703020993343575e-07,
|
| 275 |
+
"loss": 0.7145,
|
| 276 |
"step": 380
|
| 277 |
},
|
| 278 |
{
|
| 279 |
"epoch": 0.02496,
|
| 280 |
+
"grad_norm": 145801.21875,
|
| 281 |
"learning_rate": 9.959037378392218e-07,
|
| 282 |
+
"loss": 0.7102,
|
| 283 |
"step": 390
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"epoch": 0.0256,
|
| 287 |
+
"grad_norm": 171507.0,
|
| 288 |
"learning_rate": 1.021505376344086e-06,
|
| 289 |
+
"loss": 0.7123,
|
| 290 |
"step": 400
|
| 291 |
},
|
| 292 |
{
|
| 293 |
"epoch": 0.02624,
|
| 294 |
+
"grad_norm": 79134.203125,
|
| 295 |
"learning_rate": 1.0471070148489503e-06,
|
| 296 |
+
"loss": 0.7083,
|
| 297 |
"step": 410
|
| 298 |
},
|
| 299 |
{
|
| 300 |
"epoch": 0.02688,
|
| 301 |
+
"grad_norm": 69231.640625,
|
| 302 |
"learning_rate": 1.0727086533538148e-06,
|
| 303 |
+
"loss": 0.7105,
|
| 304 |
"step": 420
|
| 305 |
},
|
| 306 |
{
|
| 307 |
"epoch": 0.02752,
|
| 308 |
+
"grad_norm": 113099.3984375,
|
| 309 |
"learning_rate": 1.0983102918586791e-06,
|
| 310 |
+
"loss": 0.7141,
|
| 311 |
"step": 430
|
| 312 |
},
|
| 313 |
{
|
| 314 |
"epoch": 0.02816,
|
| 315 |
+
"grad_norm": 121013.734375,
|
| 316 |
"learning_rate": 1.1239119303635434e-06,
|
| 317 |
+
"loss": 0.7146,
|
| 318 |
"step": 440
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"epoch": 0.0288,
|
| 322 |
+
"grad_norm": 89184.609375,
|
| 323 |
"learning_rate": 1.1495135688684077e-06,
|
| 324 |
+
"loss": 0.7133,
|
| 325 |
"step": 450
|
| 326 |
},
|
| 327 |
{
|
| 328 |
"epoch": 0.02944,
|
| 329 |
+
"grad_norm": 176246.890625,
|
| 330 |
"learning_rate": 1.175115207373272e-06,
|
| 331 |
+
"loss": 0.7086,
|
| 332 |
"step": 460
|
| 333 |
},
|
| 334 |
{
|
| 335 |
"epoch": 0.03008,
|
| 336 |
+
"grad_norm": 88161.2265625,
|
| 337 |
"learning_rate": 1.2007168458781362e-06,
|
| 338 |
+
"loss": 0.709,
|
| 339 |
"step": 470
|
| 340 |
},
|
| 341 |
{
|
| 342 |
"epoch": 0.03072,
|
| 343 |
+
"grad_norm": 74441.015625,
|
| 344 |
"learning_rate": 1.2263184843830007e-06,
|
| 345 |
+
"loss": 0.7023,
|
| 346 |
"step": 480
|
| 347 |
},
|
| 348 |
{
|
| 349 |
"epoch": 0.03136,
|
| 350 |
+
"grad_norm": 96409.40625,
|
| 351 |
"learning_rate": 1.251920122887865e-06,
|
| 352 |
+
"loss": 0.715,
|
| 353 |
"step": 490
|
| 354 |
},
|
| 355 |
{
|
| 356 |
"epoch": 0.032,
|
| 357 |
+
"grad_norm": 81090.6484375,
|
| 358 |
"learning_rate": 1.2775217613927293e-06,
|
| 359 |
+
"loss": 0.7109,
|
| 360 |
"step": 500
|
| 361 |
},
|
| 362 |
{
|
| 363 |
"epoch": 0.03264,
|
| 364 |
+
"grad_norm": 98153.8828125,
|
| 365 |
"learning_rate": 1.3031233998975938e-06,
|
| 366 |
+
"loss": 0.7092,
|
| 367 |
"step": 510
|
| 368 |
},
|
| 369 |
{
|
| 370 |
"epoch": 0.03328,
|
| 371 |
+
"grad_norm": 78782.546875,
|
| 372 |
"learning_rate": 1.3287250384024578e-06,
|
| 373 |
+
"loss": 0.7048,
|
| 374 |
"step": 520
|
| 375 |
},
|
| 376 |
{
|
| 377 |
"epoch": 0.03392,
|
| 378 |
+
"grad_norm": 110360.5,
|
| 379 |
"learning_rate": 1.354326676907322e-06,
|
| 380 |
+
"loss": 0.7108,
|
| 381 |
"step": 530
|
| 382 |
},
|
| 383 |
{
|
| 384 |
"epoch": 0.03456,
|
| 385 |
+
"grad_norm": 88462.0703125,
|
| 386 |
"learning_rate": 1.3799283154121864e-06,
|
| 387 |
+
"loss": 0.7041,
|
| 388 |
"step": 540
|
| 389 |
},
|
| 390 |
{
|
| 391 |
"epoch": 0.0352,
|
| 392 |
+
"grad_norm": 97624.7421875,
|
| 393 |
"learning_rate": 1.4055299539170509e-06,
|
| 394 |
+
"loss": 0.7114,
|
| 395 |
"step": 550
|
| 396 |
},
|
| 397 |
{
|
| 398 |
"epoch": 0.03584,
|
| 399 |
+
"grad_norm": 99471.4375,
|
| 400 |
"learning_rate": 1.4311315924219151e-06,
|
| 401 |
+
"loss": 0.7191,
|
| 402 |
"step": 560
|
| 403 |
},
|
| 404 |
{
|
| 405 |
"epoch": 0.03648,
|
| 406 |
+
"grad_norm": 79087.90625,
|
| 407 |
"learning_rate": 1.4567332309267796e-06,
|
| 408 |
+
"loss": 0.7022,
|
| 409 |
"step": 570
|
| 410 |
},
|
| 411 |
{
|
| 412 |
"epoch": 0.03712,
|
| 413 |
+
"grad_norm": 65275.0,
|
| 414 |
"learning_rate": 1.4823348694316437e-06,
|
| 415 |
+
"loss": 0.7088,
|
| 416 |
"step": 580
|
| 417 |
},
|
| 418 |
{
|
| 419 |
"epoch": 0.03776,
|
| 420 |
+
"grad_norm": 153826.28125,
|
| 421 |
"learning_rate": 1.507936507936508e-06,
|
| 422 |
+
"loss": 0.7079,
|
| 423 |
"step": 590
|
| 424 |
},
|
| 425 |
{
|
| 426 |
"epoch": 0.0384,
|
| 427 |
+
"grad_norm": 64280.38671875,
|
| 428 |
"learning_rate": 1.5335381464413722e-06,
|
| 429 |
+
"loss": 0.7018,
|
| 430 |
"step": 600
|
| 431 |
},
|
| 432 |
{
|
| 433 |
"epoch": 0.03904,
|
| 434 |
+
"grad_norm": 65060.80078125,
|
| 435 |
"learning_rate": 1.5591397849462367e-06,
|
| 436 |
+
"loss": 0.7027,
|
| 437 |
"step": 610
|
| 438 |
},
|
| 439 |
{
|
| 440 |
"epoch": 0.03968,
|
| 441 |
+
"grad_norm": 77339.2890625,
|
| 442 |
"learning_rate": 1.584741423451101e-06,
|
| 443 |
+
"loss": 0.7038,
|
| 444 |
"step": 620
|
| 445 |
},
|
| 446 |
{
|
| 447 |
"epoch": 0.04032,
|
| 448 |
+
"grad_norm": 123140.5546875,
|
| 449 |
"learning_rate": 1.6103430619559655e-06,
|
| 450 |
+
"loss": 0.7019,
|
| 451 |
"step": 630
|
| 452 |
},
|
| 453 |
{
|
| 454 |
"epoch": 0.04096,
|
| 455 |
+
"grad_norm": 67502.71875,
|
| 456 |
"learning_rate": 1.6359447004608298e-06,
|
| 457 |
+
"loss": 0.7094,
|
| 458 |
"step": 640
|
| 459 |
},
|
| 460 |
{
|
| 461 |
"epoch": 0.0416,
|
| 462 |
+
"grad_norm": 95452.1796875,
|
| 463 |
"learning_rate": 1.6615463389656938e-06,
|
| 464 |
+
"loss": 0.6998,
|
| 465 |
"step": 650
|
| 466 |
},
|
| 467 |
{
|
| 468 |
"epoch": 0.04224,
|
| 469 |
+
"grad_norm": 68556.421875,
|
| 470 |
"learning_rate": 1.6871479774705581e-06,
|
| 471 |
+
"loss": 0.694,
|
| 472 |
"step": 660
|
| 473 |
},
|
| 474 |
{
|
| 475 |
"epoch": 0.04288,
|
| 476 |
+
"grad_norm": 78265.8046875,
|
| 477 |
"learning_rate": 1.7127496159754226e-06,
|
| 478 |
+
"loss": 0.7051,
|
| 479 |
"step": 670
|
| 480 |
},
|
| 481 |
{
|
| 482 |
"epoch": 0.04352,
|
| 483 |
+
"grad_norm": 93559.3359375,
|
| 484 |
"learning_rate": 1.7383512544802869e-06,
|
| 485 |
+
"loss": 0.6997,
|
| 486 |
"step": 680
|
| 487 |
},
|
| 488 |
{
|
| 489 |
"epoch": 0.04416,
|
| 490 |
+
"grad_norm": 88091.9375,
|
| 491 |
"learning_rate": 1.7639528929851512e-06,
|
| 492 |
+
"loss": 0.6963,
|
| 493 |
"step": 690
|
| 494 |
},
|
| 495 |
{
|
| 496 |
"epoch": 0.0448,
|
| 497 |
+
"grad_norm": 73024.359375,
|
| 498 |
"learning_rate": 1.7895545314900157e-06,
|
| 499 |
+
"loss": 0.7021,
|
| 500 |
"step": 700
|
| 501 |
},
|
| 502 |
{
|
| 503 |
"epoch": 0.04544,
|
| 504 |
+
"grad_norm": 100058.2890625,
|
| 505 |
"learning_rate": 1.8151561699948797e-06,
|
| 506 |
+
"loss": 0.7022,
|
| 507 |
"step": 710
|
| 508 |
},
|
| 509 |
{
|
| 510 |
"epoch": 0.04608,
|
| 511 |
+
"grad_norm": 99197.1953125,
|
| 512 |
"learning_rate": 1.840757808499744e-06,
|
| 513 |
+
"loss": 0.7017,
|
| 514 |
"step": 720
|
| 515 |
},
|
| 516 |
{
|
| 517 |
"epoch": 0.04672,
|
| 518 |
+
"grad_norm": 102018.984375,
|
| 519 |
"learning_rate": 1.8663594470046085e-06,
|
| 520 |
+
"loss": 0.6985,
|
| 521 |
"step": 730
|
| 522 |
},
|
| 523 |
{
|
| 524 |
"epoch": 0.04736,
|
| 525 |
+
"grad_norm": 101586.0234375,
|
| 526 |
"learning_rate": 1.8919610855094728e-06,
|
| 527 |
+
"loss": 0.6991,
|
| 528 |
"step": 740
|
| 529 |
},
|
| 530 |
{
|
| 531 |
"epoch": 0.048,
|
| 532 |
+
"grad_norm": 151948.25,
|
| 533 |
"learning_rate": 1.9175627240143373e-06,
|
| 534 |
+
"loss": 0.6977,
|
| 535 |
"step": 750
|
| 536 |
},
|
| 537 |
{
|
| 538 |
"epoch": 0.04864,
|
| 539 |
+
"grad_norm": 88698.7109375,
|
| 540 |
"learning_rate": 1.9431643625192015e-06,
|
| 541 |
+
"loss": 0.6961,
|
| 542 |
"step": 760
|
| 543 |
},
|
| 544 |
{
|
| 545 |
"epoch": 0.04928,
|
| 546 |
+
"grad_norm": 82451.9296875,
|
| 547 |
"learning_rate": 1.9687660010240654e-06,
|
| 548 |
+
"loss": 0.6898,
|
| 549 |
"step": 770
|
| 550 |
},
|
| 551 |
{
|
| 552 |
"epoch": 0.04992,
|
| 553 |
+
"grad_norm": 82236.453125,
|
| 554 |
"learning_rate": 1.99436763952893e-06,
|
| 555 |
+
"loss": 0.6886,
|
| 556 |
"step": 780
|
| 557 |
},
|
| 558 |
{
|
| 559 |
"epoch": 0.05056,
|
| 560 |
+
"grad_norm": 155064.484375,
|
| 561 |
"learning_rate": 2.0199692780337944e-06,
|
| 562 |
+
"loss": 0.6921,
|
| 563 |
"step": 790
|
| 564 |
},
|
| 565 |
{
|
| 566 |
"epoch": 0.0512,
|
| 567 |
+
"grad_norm": 72238.6328125,
|
| 568 |
"learning_rate": 2.0455709165386586e-06,
|
| 569 |
+
"loss": 0.6932,
|
| 570 |
"step": 800
|
| 571 |
}
|
| 572 |
],
|
|
|
|
| 596 |
"attributes": {}
|
| 597 |
}
|
| 598 |
},
|
| 599 |
+
"total_flos": 6733455906568320.0,
|
| 600 |
"train_batch_size": 32,
|
| 601 |
"trial_name": null,
|
| 602 |
"trial_params": null
|
graphcodebert-robust/checkpoint-800/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5841
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82e524f8f7de87947806acfb17c136195f8d3668b26513da260f1a2f14442156
|
| 3 |
size 5841
|
graphcodebert-robust/training.log
CHANGED
|
@@ -1,10 +1,34 @@
|
|
| 1 |
-
2026-04-
|
| 2 |
-
2026-04-
|
| 3 |
-
2026-04-
|
| 4 |
-
2026-04-
|
| 5 |
-
2026-04-
|
| 6 |
-
2026-04-
|
| 7 |
-
2026-04-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
RobertaForSequenceClassification(
|
| 9 |
(roberta): RobertaModel(
|
| 10 |
(embeddings): RobertaEmbeddings(
|
|
@@ -12,7 +36,7 @@ RobertaForSequenceClassification(
|
|
| 12 |
(position_embeddings): Embedding(514, 768, padding_idx=1)
|
| 13 |
(token_type_embeddings): Embedding(1, 768)
|
| 14 |
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 15 |
-
(dropout): Dropout(p=0.
|
| 16 |
)
|
| 17 |
(encoder): RobertaEncoder(
|
| 18 |
(layer): ModuleList(
|
|
@@ -22,12 +46,12 @@ RobertaForSequenceClassification(
|
|
| 22 |
(query): Linear(in_features=768, out_features=768, bias=True)
|
| 23 |
(key): Linear(in_features=768, out_features=768, bias=True)
|
| 24 |
(value): Linear(in_features=768, out_features=768, bias=True)
|
| 25 |
-
(dropout): Dropout(p=0.
|
| 26 |
)
|
| 27 |
(output): RobertaSelfOutput(
|
| 28 |
(dense): Linear(in_features=768, out_features=768, bias=True)
|
| 29 |
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 30 |
-
(dropout): Dropout(p=0.
|
| 31 |
)
|
| 32 |
)
|
| 33 |
(intermediate): RobertaIntermediate(
|
|
@@ -37,7 +61,7 @@ RobertaForSequenceClassification(
|
|
| 37 |
(output): RobertaOutput(
|
| 38 |
(dense): Linear(in_features=3072, out_features=768, bias=True)
|
| 39 |
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 40 |
-
(dropout): Dropout(p=0.
|
| 41 |
)
|
| 42 |
)
|
| 43 |
)
|
|
@@ -45,16 +69,16 @@ RobertaForSequenceClassification(
|
|
| 45 |
)
|
| 46 |
(classifier): RobertaClassificationHead(
|
| 47 |
(dense): Linear(in_features=768, out_features=768, bias=True)
|
| 48 |
-
(dropout): Dropout(p=0.
|
| 49 |
(out_proj): Linear(in_features=768, out_features=2, bias=True)
|
| 50 |
)
|
| 51 |
)
|
| 52 |
-
2026-04-
|
| 53 |
-
2026-04-
|
| 54 |
-
2026-04-
|
| 55 |
-
2026-04-
|
| 56 |
-
2026-04-
|
| 57 |
-
2026-04-
|
| 58 |
-
2026-04-
|
| 59 |
-
2026-04-
|
| 60 |
-
2026-04-
|
|
|
|
| 1 |
+
2026-04-17 08:00:34,522 - INFO - train_pipeline - Logging to ./output_checkpoints/graphcodebert-robust/training.log
|
| 2 |
+
2026-04-17 08:00:34,525 - INFO - train_pipeline - ===== Training Configuration =====
|
| 3 |
+
2026-04-17 08:00:34,526 - INFO - train_pipeline - model_name : microsoft/graphcodebert-base
|
| 4 |
+
2026-04-17 08:00:34,528 - INFO - train_pipeline - output_dir : ./output_checkpoints/graphcodebert-robust
|
| 5 |
+
2026-04-17 08:00:34,529 - INFO - train_pipeline - num_epochs : 5
|
| 6 |
+
2026-04-17 08:00:34,531 - INFO - train_pipeline - batch_size : 32
|
| 7 |
+
2026-04-17 08:00:34,533 - INFO - train_pipeline - learning_rate : 2e-05
|
| 8 |
+
2026-04-17 08:00:34,535 - INFO - train_pipeline - max_length : 512
|
| 9 |
+
2026-04-17 08:00:34,536 - INFO - train_pipeline - num_labels : 2
|
| 10 |
+
2026-04-17 08:00:34,538 - INFO - train_pipeline - use_wandb : True
|
| 11 |
+
2026-04-17 08:00:34,540 - INFO - train_pipeline - freeze_base : True
|
| 12 |
+
2026-04-17 08:00:34,541 - INFO - train_pipeline - loss_type : r-drop
|
| 13 |
+
2026-04-17 08:00:34,542 - INFO - train_pipeline - focal_alpha : 1.0
|
| 14 |
+
2026-04-17 08:00:34,544 - INFO - train_pipeline - focal_gamma : 2.0
|
| 15 |
+
2026-04-17 08:00:34,545 - INFO - train_pipeline - r_drop_alpha : 4.0
|
| 16 |
+
2026-04-17 08:00:34,546 - INFO - train_pipeline - infonce_temperature : 0.07
|
| 17 |
+
2026-04-17 08:00:34,548 - INFO - train_pipeline - infonce_weight : 0.5
|
| 18 |
+
2026-04-17 08:00:34,550 - INFO - train_pipeline - seed : 42
|
| 19 |
+
2026-04-17 08:00:34,552 - INFO - train_pipeline - resume_from_checkpoint : None
|
| 20 |
+
2026-04-17 08:00:34,553 - INFO - train_pipeline - label_smoothing : 0.1
|
| 21 |
+
2026-04-17 08:00:34,554 - INFO - train_pipeline - adversarial_epsilon : 0.5
|
| 22 |
+
2026-04-17 08:00:34,556 - INFO - train_pipeline - use_swa : True
|
| 23 |
+
2026-04-17 08:00:34,557 - INFO - train_pipeline - swa_start_epoch : 2
|
| 24 |
+
2026-04-17 08:00:34,558 - INFO - train_pipeline - swa_lr : 1e-05
|
| 25 |
+
2026-04-17 08:00:34,559 - INFO - train_pipeline - data_augmentation : True
|
| 26 |
+
2026-04-17 08:00:34,561 - INFO - train_pipeline - aug_rename_prob : 0.3
|
| 27 |
+
2026-04-17 08:00:34,562 - INFO - train_pipeline - aug_format_prob : 0.3
|
| 28 |
+
2026-04-17 08:00:34,564 - INFO - train_pipeline - =================================
|
| 29 |
+
2026-04-17 08:00:35,711 - INFO - train_pipeline - Model placed on cuda
|
| 30 |
+
2026-04-17 08:00:35,716 - INFO - train_pipeline - ===== Model Architecture =====
|
| 31 |
+
2026-04-17 08:00:35,718 - INFO - train_pipeline -
|
| 32 |
RobertaForSequenceClassification(
|
| 33 |
(roberta): RobertaModel(
|
| 34 |
(embeddings): RobertaEmbeddings(
|
|
|
|
| 36 |
(position_embeddings): Embedding(514, 768, padding_idx=1)
|
| 37 |
(token_type_embeddings): Embedding(1, 768)
|
| 38 |
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 39 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 40 |
)
|
| 41 |
(encoder): RobertaEncoder(
|
| 42 |
(layer): ModuleList(
|
|
|
|
| 46 |
(query): Linear(in_features=768, out_features=768, bias=True)
|
| 47 |
(key): Linear(in_features=768, out_features=768, bias=True)
|
| 48 |
(value): Linear(in_features=768, out_features=768, bias=True)
|
| 49 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 50 |
)
|
| 51 |
(output): RobertaSelfOutput(
|
| 52 |
(dense): Linear(in_features=768, out_features=768, bias=True)
|
| 53 |
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 54 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 55 |
)
|
| 56 |
)
|
| 57 |
(intermediate): RobertaIntermediate(
|
|
|
|
| 61 |
(output): RobertaOutput(
|
| 62 |
(dense): Linear(in_features=3072, out_features=768, bias=True)
|
| 63 |
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 64 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 65 |
)
|
| 66 |
)
|
| 67 |
)
|
|
|
|
| 69 |
)
|
| 70 |
(classifier): RobertaClassificationHead(
|
| 71 |
(dense): Linear(in_features=768, out_features=768, bias=True)
|
| 72 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 73 |
(out_proj): Linear(in_features=768, out_features=2, bias=True)
|
| 74 |
)
|
| 75 |
)
|
| 76 |
+
2026-04-17 08:00:35,722 - INFO - train_pipeline - ===== Parameter Summary =====
|
| 77 |
+
2026-04-17 08:00:35,723 - INFO - train_pipeline - Total Parameters: 124,647,170
|
| 78 |
+
2026-04-17 08:00:35,724 - INFO - train_pipeline - Trainable Parameters: 592,130
|
| 79 |
+
2026-04-17 08:00:35,725 - INFO - train_pipeline - Non-trainable Parameters: 124,055,040
|
| 80 |
+
2026-04-17 08:00:35,727 - INFO - train_pipeline - ===== Tokenizer Summary =====
|
| 81 |
+
2026-04-17 08:00:35,747 - INFO - train_pipeline - Vocab size: 50265 | Special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']
|
| 82 |
+
2026-04-17 08:00:35,749 - INFO - train_pipeline - ===== End of Architecture Log =====
|
| 83 |
+
2026-04-17 08:00:35,751 - INFO - train_pipeline - Data augmentation enabled (rename=0.3, format=0.3)
|
| 84 |
+
2026-04-17 08:00:36,645 - INFO - train_pipeline - === Starting training with robust regularisation ===
|