dzungpham commited on
Commit
90ab2a3
·
verified ·
1 Parent(s): 647ce03

upload checkpoints

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. graphcodebert-vanilla/checkpoint-100/config.json +29 -0
  2. graphcodebert-vanilla/checkpoint-100/config_hyperparams.json +53 -0
  3. graphcodebert-vanilla/checkpoint-100/hyperparams.json +39 -0
  4. graphcodebert-vanilla/checkpoint-100/merges.txt +0 -0
  5. graphcodebert-vanilla/checkpoint-100/model.safetensors +3 -0
  6. graphcodebert-vanilla/checkpoint-100/optimizer.pt +3 -0
  7. graphcodebert-vanilla/checkpoint-100/rng_state.pth +3 -0
  8. graphcodebert-vanilla/checkpoint-100/scaler.pt +3 -0
  9. graphcodebert-vanilla/checkpoint-100/scheduler.pt +3 -0
  10. graphcodebert-vanilla/checkpoint-100/special_tokens_map.json +51 -0
  11. graphcodebert-vanilla/checkpoint-100/tokenizer.json +0 -0
  12. graphcodebert-vanilla/checkpoint-100/tokenizer_config.json +58 -0
  13. graphcodebert-vanilla/checkpoint-100/trainer_state.json +207 -0
  14. graphcodebert-vanilla/checkpoint-100/training_args.bin +3 -0
  15. graphcodebert-vanilla/checkpoint-100/vocab.json +0 -0
  16. graphcodebert-vanilla/checkpoint-200/config.json +29 -0
  17. graphcodebert-vanilla/checkpoint-200/config_hyperparams.json +53 -0
  18. graphcodebert-vanilla/checkpoint-200/hyperparams.json +39 -0
  19. graphcodebert-vanilla/checkpoint-200/merges.txt +0 -0
  20. graphcodebert-vanilla/checkpoint-200/model.safetensors +3 -0
  21. graphcodebert-vanilla/checkpoint-200/optimizer.pt +3 -0
  22. graphcodebert-vanilla/checkpoint-200/rng_state.pth +3 -0
  23. graphcodebert-vanilla/checkpoint-200/scaler.pt +3 -0
  24. graphcodebert-vanilla/checkpoint-200/scheduler.pt +3 -0
  25. graphcodebert-vanilla/checkpoint-200/special_tokens_map.json +51 -0
  26. graphcodebert-vanilla/checkpoint-200/tokenizer.json +0 -0
  27. graphcodebert-vanilla/checkpoint-200/tokenizer_config.json +58 -0
  28. graphcodebert-vanilla/checkpoint-200/trainer_state.json +371 -0
  29. graphcodebert-vanilla/checkpoint-200/training_args.bin +3 -0
  30. graphcodebert-vanilla/checkpoint-200/vocab.json +0 -0
  31. graphcodebert-vanilla/checkpoint-300/config.json +29 -0
  32. graphcodebert-vanilla/checkpoint-300/config_hyperparams.json +53 -0
  33. graphcodebert-vanilla/checkpoint-300/hyperparams.json +39 -0
  34. graphcodebert-vanilla/checkpoint-300/merges.txt +0 -0
  35. graphcodebert-vanilla/checkpoint-300/model.safetensors +3 -0
  36. graphcodebert-vanilla/checkpoint-300/optimizer.pt +3 -0
  37. graphcodebert-vanilla/checkpoint-300/rng_state.pth +3 -0
  38. graphcodebert-vanilla/checkpoint-300/scaler.pt +3 -0
  39. graphcodebert-vanilla/checkpoint-300/scheduler.pt +3 -0
  40. graphcodebert-vanilla/checkpoint-300/special_tokens_map.json +51 -0
  41. graphcodebert-vanilla/checkpoint-300/tokenizer.json +0 -0
  42. graphcodebert-vanilla/checkpoint-300/tokenizer_config.json +58 -0
  43. graphcodebert-vanilla/checkpoint-300/trainer_state.json +535 -0
  44. graphcodebert-vanilla/checkpoint-300/training_args.bin +3 -0
  45. graphcodebert-vanilla/checkpoint-300/vocab.json +0 -0
  46. graphcodebert-vanilla/final_model/config.json +29 -0
  47. graphcodebert-vanilla/final_model/merges.txt +0 -0
  48. graphcodebert-vanilla/final_model/model.safetensors +3 -0
  49. graphcodebert-vanilla/final_model/special_tokens_map.json +51 -0
  50. graphcodebert-vanilla/final_model/tokenizer.json +0 -0
graphcodebert-vanilla/checkpoint-100/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.3,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": 0.3,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.3,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "absolute",
24
+ "problem_type": "single_label_classification",
25
+ "transformers_version": "4.56.0",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 50265
29
+ }
graphcodebert-vanilla/checkpoint-100/config_hyperparams.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_config": {
3
+ "model_name": "microsoft/graphcodebert-base",
4
+ "num_epochs": 3,
5
+ "batch_size": 256,
6
+ "learning_rate": 2e-05,
7
+ "max_length": 512,
8
+ "num_labels": 2,
9
+ "loss_type": "ce",
10
+ "focal_alpha": 1.0,
11
+ "focal_gamma": 2.0,
12
+ "r_drop_alpha": 6.0,
13
+ "infonce_temperature": 0.07,
14
+ "infonce_weight": 0.5,
15
+ "label_smoothing": 0,
16
+ "adversarial_epsilon": 0,
17
+ "use_swa": false,
18
+ "swa_start_epoch": 0,
19
+ "swa_lr": 1e-05,
20
+ "data_augmentation": false,
21
+ "aug_rename_prob": 0.0,
22
+ "aug_format_prob": 0.0,
23
+ "freeze_base": true,
24
+ "seed": 42,
25
+ "use_wandb": true,
26
+ "mixup_alpha": 0.0,
27
+ "low_pass_keep_ratio": 0.5,
28
+ "freq_consistency_weight": 0.0
29
+ },
30
+ "training_arguments": {
31
+ "output_dir": "output_checkpoints/graphcodebert-vanilla/",
32
+ "num_train_epochs": 3,
33
+ "per_device_train_batch_size": 256,
34
+ "per_device_eval_batch_size": 512,
35
+ "learning_rate": 2e-05,
36
+ "warmup_steps": 612,
37
+ "weight_decay": 0.1,
38
+ "logging_steps": 5,
39
+ "eval_steps": 50,
40
+ "save_steps": 100,
41
+ "metric_for_best_model": "macro_f1",
42
+ "greater_is_better": true,
43
+ "save_total_limit": 5,
44
+ "fp16": true,
45
+ "seed": 42
46
+ },
47
+ "training_state": {
48
+ "global_step": 100,
49
+ "epoch": 0.09784735812133072,
50
+ "best_metric": 0.5260673147136775,
51
+ "best_model_checkpoint": "output_checkpoints/graphcodebert-vanilla/checkpoint-100"
52
+ }
53
+ }
graphcodebert-vanilla/checkpoint-100/hyperparams.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "microsoft/graphcodebert-base",
3
+ "output_dir": "output_checkpoints/graphcodebert-vanilla/",
4
+ "num_epochs": 3,
5
+ "max_steps": -1,
6
+ "batch_size": 256,
7
+ "learning_rate": 2e-05,
8
+ "max_length": 512,
9
+ "num_labels": 2,
10
+ "use_wandb": true,
11
+ "freeze_base": true,
12
+ "loss_type": "ce",
13
+ "focal_alpha": 1.0,
14
+ "focal_gamma": 2.0,
15
+ "r_drop_alpha": 6.0,
16
+ "infonce_temperature": 0.07,
17
+ "infonce_weight": 0.5,
18
+ "seed": 42,
19
+ "wandb_run_name": "graphcodebert-vanilla",
20
+ "resume_from_checkpoint": null,
21
+ "save_steps": 100,
22
+ "eval_steps": 50,
23
+ "logging_steps": 5,
24
+ "label_smoothing": 0,
25
+ "adversarial_epsilon": 0,
26
+ "use_swa": false,
27
+ "swa_start_epoch": 0,
28
+ "swa_lr": 1e-05,
29
+ "data_augmentation": false,
30
+ "aug_rename_prob": 0.0,
31
+ "aug_format_prob": 0.0,
32
+ "mixup_alpha": 0.0,
33
+ "low_pass_keep_ratio": 0.5,
34
+ "freq_consistency_weight": 0.0,
35
+ "hidden_dropout_prob": 0.3,
36
+ "attention_probs_dropout_prob": 0.3,
37
+ "classifier_dropout": 0.3,
38
+ "device": "cuda"
39
+ }
graphcodebert-vanilla/checkpoint-100/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/checkpoint-100/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da3f281851fd71b6943f4e6fc58ef17ba54c6d167a319cabac7deab1eafcd599
3
+ size 498612824
graphcodebert-vanilla/checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54f9e03da26cb2cf86cb6f1f437f201f3cbc236e6729b9abd7d153a24ce31ee8
3
+ size 4741859
graphcodebert-vanilla/checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddddccf76b63f161c92bc8774c3df6f375bf2ce43e44a910de13434d0630025e
3
+ size 14645
graphcodebert-vanilla/checkpoint-100/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b21c5349d5e7d02de630ebc1cb53ade1d9c6079eeb8594d223bb786011a0428b
3
+ size 1383
graphcodebert-vanilla/checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b683a9e0270ef59bda524139bbaf1cd9071993f5d3a698ac0dcacdd374cee064
3
+ size 1465
graphcodebert-vanilla/checkpoint-100/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
graphcodebert-vanilla/checkpoint-100/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/checkpoint-100/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
graphcodebert-vanilla/checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 0.5260673147136775,
4
+ "best_model_checkpoint": "output_checkpoints/graphcodebert-vanilla/checkpoint-100",
5
+ "epoch": 0.09784735812133072,
6
+ "eval_steps": 50,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.004892367906066536,
14
+ "grad_norm": 33435.953125,
15
+ "learning_rate": 1.3071895424836603e-07,
16
+ "loss": 0.7006,
17
+ "step": 5
18
+ },
19
+ {
20
+ "epoch": 0.009784735812133072,
21
+ "grad_norm": 34831.7421875,
22
+ "learning_rate": 2.9411764705882356e-07,
23
+ "loss": 0.7023,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.014677103718199608,
28
+ "grad_norm": 32986.62109375,
29
+ "learning_rate": 4.5751633986928105e-07,
30
+ "loss": 0.7019,
31
+ "step": 15
32
+ },
33
+ {
34
+ "epoch": 0.019569471624266144,
35
+ "grad_norm": 34257.4921875,
36
+ "learning_rate": 6.209150326797386e-07,
37
+ "loss": 0.7025,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 0.02446183953033268,
42
+ "grad_norm": 35511.3359375,
43
+ "learning_rate": 7.843137254901962e-07,
44
+ "loss": 0.7032,
45
+ "step": 25
46
+ },
47
+ {
48
+ "epoch": 0.029354207436399216,
49
+ "grad_norm": 33043.3515625,
50
+ "learning_rate": 9.477124183006536e-07,
51
+ "loss": 0.6968,
52
+ "step": 30
53
+ },
54
+ {
55
+ "epoch": 0.03424657534246575,
56
+ "grad_norm": 38538.30078125,
57
+ "learning_rate": 1.111111111111111e-06,
58
+ "loss": 0.6992,
59
+ "step": 35
60
+ },
61
+ {
62
+ "epoch": 0.03913894324853229,
63
+ "grad_norm": 43255.37890625,
64
+ "learning_rate": 1.2745098039215686e-06,
65
+ "loss": 0.6991,
66
+ "step": 40
67
+ },
68
+ {
69
+ "epoch": 0.04403131115459882,
70
+ "grad_norm": 34170.8046875,
71
+ "learning_rate": 1.4379084967320261e-06,
72
+ "loss": 0.6996,
73
+ "step": 45
74
+ },
75
+ {
76
+ "epoch": 0.04892367906066536,
77
+ "grad_norm": 35399.70703125,
78
+ "learning_rate": 1.6013071895424837e-06,
79
+ "loss": 0.6937,
80
+ "step": 50
81
+ },
82
+ {
83
+ "epoch": 0.04892367906066536,
84
+ "eval_accuracy": 0.771,
85
+ "eval_loss": 0.6569265127182007,
86
+ "eval_macro_f1": 0.5082027063884254,
87
+ "eval_precision": 0.6109971743687904,
88
+ "eval_recall": 0.5281120325963375,
89
+ "eval_runtime": 17.5156,
90
+ "eval_samples_per_second": 57.092,
91
+ "eval_steps_per_second": 0.057,
92
+ "step": 50
93
+ },
94
+ {
95
+ "epoch": 0.053816046966731895,
96
+ "grad_norm": 34677.1796875,
97
+ "learning_rate": 1.7647058823529414e-06,
98
+ "loss": 0.6963,
99
+ "step": 55
100
+ },
101
+ {
102
+ "epoch": 0.05870841487279843,
103
+ "grad_norm": 31789.291015625,
104
+ "learning_rate": 1.928104575163399e-06,
105
+ "loss": 0.6971,
106
+ "step": 60
107
+ },
108
+ {
109
+ "epoch": 0.06360078277886497,
110
+ "grad_norm": 30762.345703125,
111
+ "learning_rate": 2.0915032679738565e-06,
112
+ "loss": 0.6946,
113
+ "step": 65
114
+ },
115
+ {
116
+ "epoch": 0.0684931506849315,
117
+ "grad_norm": 29590.083984375,
118
+ "learning_rate": 2.254901960784314e-06,
119
+ "loss": 0.693,
120
+ "step": 70
121
+ },
122
+ {
123
+ "epoch": 0.07338551859099804,
124
+ "grad_norm": 29346.84765625,
125
+ "learning_rate": 2.4183006535947716e-06,
126
+ "loss": 0.6905,
127
+ "step": 75
128
+ },
129
+ {
130
+ "epoch": 0.07827788649706457,
131
+ "grad_norm": 37289.77734375,
132
+ "learning_rate": 2.581699346405229e-06,
133
+ "loss": 0.6942,
134
+ "step": 80
135
+ },
136
+ {
137
+ "epoch": 0.08317025440313111,
138
+ "grad_norm": 33970.94140625,
139
+ "learning_rate": 2.7450980392156867e-06,
140
+ "loss": 0.69,
141
+ "step": 85
142
+ },
143
+ {
144
+ "epoch": 0.08806262230919765,
145
+ "grad_norm": 28671.583984375,
146
+ "learning_rate": 2.9084967320261443e-06,
147
+ "loss": 0.6889,
148
+ "step": 90
149
+ },
150
+ {
151
+ "epoch": 0.09295499021526418,
152
+ "grad_norm": 30156.005859375,
153
+ "learning_rate": 3.071895424836602e-06,
154
+ "loss": 0.6866,
155
+ "step": 95
156
+ },
157
+ {
158
+ "epoch": 0.09784735812133072,
159
+ "grad_norm": 33484.71484375,
160
+ "learning_rate": 3.2352941176470594e-06,
161
+ "loss": 0.6888,
162
+ "step": 100
163
+ },
164
+ {
165
+ "epoch": 0.09784735812133072,
166
+ "eval_accuracy": 0.767,
167
+ "eval_loss": 0.6559821367263794,
168
+ "eval_macro_f1": 0.5260673147136775,
169
+ "eval_precision": 0.6062366452991452,
170
+ "eval_recall": 0.5367285927824044,
171
+ "eval_runtime": 17.3972,
172
+ "eval_samples_per_second": 57.481,
173
+ "eval_steps_per_second": 0.057,
174
+ "step": 100
175
+ }
176
+ ],
177
+ "logging_steps": 5,
178
+ "max_steps": 3066,
179
+ "num_input_tokens_seen": 0,
180
+ "num_train_epochs": 3,
181
+ "save_steps": 100,
182
+ "stateful_callbacks": {
183
+ "EarlyStoppingCallback": {
184
+ "args": {
185
+ "early_stopping_patience": 3,
186
+ "early_stopping_threshold": 0.0
187
+ },
188
+ "attributes": {
189
+ "early_stopping_patience_counter": 0
190
+ }
191
+ },
192
+ "TrainerControl": {
193
+ "args": {
194
+ "should_epoch_stop": false,
195
+ "should_evaluate": false,
196
+ "should_log": false,
197
+ "should_save": true,
198
+ "should_training_stop": false
199
+ },
200
+ "attributes": {}
201
+ }
202
+ },
203
+ "total_flos": 1.3471286034432e+16,
204
+ "train_batch_size": 512,
205
+ "trial_name": null,
206
+ "trial_params": null
207
+ }
graphcodebert-vanilla/checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d448040cc8193c39a3aa43e3972db0566234234cb8950ae351de6781f2f556
3
+ size 5905
graphcodebert-vanilla/checkpoint-100/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/checkpoint-200/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.3,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": 0.3,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.3,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "absolute",
24
+ "problem_type": "single_label_classification",
25
+ "transformers_version": "4.56.0",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 50265
29
+ }
graphcodebert-vanilla/checkpoint-200/config_hyperparams.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_config": {
3
+ "model_name": "microsoft/graphcodebert-base",
4
+ "num_epochs": 3,
5
+ "batch_size": 256,
6
+ "learning_rate": 2e-05,
7
+ "max_length": 512,
8
+ "num_labels": 2,
9
+ "loss_type": "ce",
10
+ "focal_alpha": 1.0,
11
+ "focal_gamma": 2.0,
12
+ "r_drop_alpha": 6.0,
13
+ "infonce_temperature": 0.07,
14
+ "infonce_weight": 0.5,
15
+ "label_smoothing": 0,
16
+ "adversarial_epsilon": 0,
17
+ "use_swa": false,
18
+ "swa_start_epoch": 0,
19
+ "swa_lr": 1e-05,
20
+ "data_augmentation": false,
21
+ "aug_rename_prob": 0.0,
22
+ "aug_format_prob": 0.0,
23
+ "freeze_base": true,
24
+ "seed": 42,
25
+ "use_wandb": true,
26
+ "mixup_alpha": 0.0,
27
+ "low_pass_keep_ratio": 0.5,
28
+ "freq_consistency_weight": 0.0
29
+ },
30
+ "training_arguments": {
31
+ "output_dir": "output_checkpoints/graphcodebert-vanilla/",
32
+ "num_train_epochs": 3,
33
+ "per_device_train_batch_size": 256,
34
+ "per_device_eval_batch_size": 512,
35
+ "learning_rate": 2e-05,
36
+ "warmup_steps": 612,
37
+ "weight_decay": 0.1,
38
+ "logging_steps": 5,
39
+ "eval_steps": 50,
40
+ "save_steps": 100,
41
+ "metric_for_best_model": "macro_f1",
42
+ "greater_is_better": true,
43
+ "save_total_limit": 5,
44
+ "fp16": true,
45
+ "seed": 42
46
+ },
47
+ "training_state": {
48
+ "global_step": 200,
49
+ "epoch": 0.19569471624266144,
50
+ "best_metric": 0.5962545254252696,
51
+ "best_model_checkpoint": "output_checkpoints/graphcodebert-vanilla/checkpoint-200"
52
+ }
53
+ }
graphcodebert-vanilla/checkpoint-200/hyperparams.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "microsoft/graphcodebert-base",
3
+ "output_dir": "output_checkpoints/graphcodebert-vanilla/",
4
+ "num_epochs": 3,
5
+ "max_steps": -1,
6
+ "batch_size": 256,
7
+ "learning_rate": 2e-05,
8
+ "max_length": 512,
9
+ "num_labels": 2,
10
+ "use_wandb": true,
11
+ "freeze_base": true,
12
+ "loss_type": "ce",
13
+ "focal_alpha": 1.0,
14
+ "focal_gamma": 2.0,
15
+ "r_drop_alpha": 6.0,
16
+ "infonce_temperature": 0.07,
17
+ "infonce_weight": 0.5,
18
+ "seed": 42,
19
+ "wandb_run_name": "graphcodebert-vanilla",
20
+ "resume_from_checkpoint": null,
21
+ "save_steps": 100,
22
+ "eval_steps": 50,
23
+ "logging_steps": 5,
24
+ "label_smoothing": 0,
25
+ "adversarial_epsilon": 0,
26
+ "use_swa": false,
27
+ "swa_start_epoch": 0,
28
+ "swa_lr": 1e-05,
29
+ "data_augmentation": false,
30
+ "aug_rename_prob": 0.0,
31
+ "aug_format_prob": 0.0,
32
+ "mixup_alpha": 0.0,
33
+ "low_pass_keep_ratio": 0.5,
34
+ "freq_consistency_weight": 0.0,
35
+ "hidden_dropout_prob": 0.3,
36
+ "attention_probs_dropout_prob": 0.3,
37
+ "classifier_dropout": 0.3,
38
+ "device": "cuda"
39
+ }
graphcodebert-vanilla/checkpoint-200/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/checkpoint-200/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aeaccc7446a00f35bec59d86fa902e66ebf161710cff77f1fbc7e23c5c62aa4
3
+ size 498612824
graphcodebert-vanilla/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b722cecb86725f9e176547eea1fa82aaf7883c091259493a9743214cfe3e4807
3
+ size 4741859
graphcodebert-vanilla/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2533ee5d1fa769cc2164b95d88e0df14f136fb5e6d1e47fc9541a03a10815bcb
3
+ size 14645
graphcodebert-vanilla/checkpoint-200/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b21c5349d5e7d02de630ebc1cb53ade1d9c6079eeb8594d223bb786011a0428b
3
+ size 1383
graphcodebert-vanilla/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1a24cde04d57e738ef50ad7ff8ffdc9c34b1e5155cdccf9430834307ea21fd7
3
+ size 1465
graphcodebert-vanilla/checkpoint-200/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
graphcodebert-vanilla/checkpoint-200/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
graphcodebert-vanilla/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 200,
3
+ "best_metric": 0.5962545254252696,
4
+ "best_model_checkpoint": "output_checkpoints/graphcodebert-vanilla/checkpoint-200",
5
+ "epoch": 0.19569471624266144,
6
+ "eval_steps": 50,
7
+ "global_step": 200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.004892367906066536,
14
+ "grad_norm": 33435.953125,
15
+ "learning_rate": 1.3071895424836603e-07,
16
+ "loss": 0.7006,
17
+ "step": 5
18
+ },
19
+ {
20
+ "epoch": 0.009784735812133072,
21
+ "grad_norm": 34831.7421875,
22
+ "learning_rate": 2.9411764705882356e-07,
23
+ "loss": 0.7023,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.014677103718199608,
28
+ "grad_norm": 32986.62109375,
29
+ "learning_rate": 4.5751633986928105e-07,
30
+ "loss": 0.7019,
31
+ "step": 15
32
+ },
33
+ {
34
+ "epoch": 0.019569471624266144,
35
+ "grad_norm": 34257.4921875,
36
+ "learning_rate": 6.209150326797386e-07,
37
+ "loss": 0.7025,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 0.02446183953033268,
42
+ "grad_norm": 35511.3359375,
43
+ "learning_rate": 7.843137254901962e-07,
44
+ "loss": 0.7032,
45
+ "step": 25
46
+ },
47
+ {
48
+ "epoch": 0.029354207436399216,
49
+ "grad_norm": 33043.3515625,
50
+ "learning_rate": 9.477124183006536e-07,
51
+ "loss": 0.6968,
52
+ "step": 30
53
+ },
54
+ {
55
+ "epoch": 0.03424657534246575,
56
+ "grad_norm": 38538.30078125,
57
+ "learning_rate": 1.111111111111111e-06,
58
+ "loss": 0.6992,
59
+ "step": 35
60
+ },
61
+ {
62
+ "epoch": 0.03913894324853229,
63
+ "grad_norm": 43255.37890625,
64
+ "learning_rate": 1.2745098039215686e-06,
65
+ "loss": 0.6991,
66
+ "step": 40
67
+ },
68
+ {
69
+ "epoch": 0.04403131115459882,
70
+ "grad_norm": 34170.8046875,
71
+ "learning_rate": 1.4379084967320261e-06,
72
+ "loss": 0.6996,
73
+ "step": 45
74
+ },
75
+ {
76
+ "epoch": 0.04892367906066536,
77
+ "grad_norm": 35399.70703125,
78
+ "learning_rate": 1.6013071895424837e-06,
79
+ "loss": 0.6937,
80
+ "step": 50
81
+ },
82
+ {
83
+ "epoch": 0.04892367906066536,
84
+ "eval_accuracy": 0.771,
85
+ "eval_loss": 0.6569265127182007,
86
+ "eval_macro_f1": 0.5082027063884254,
87
+ "eval_precision": 0.6109971743687904,
88
+ "eval_recall": 0.5281120325963375,
89
+ "eval_runtime": 17.5156,
90
+ "eval_samples_per_second": 57.092,
91
+ "eval_steps_per_second": 0.057,
92
+ "step": 50
93
+ },
94
+ {
95
+ "epoch": 0.053816046966731895,
96
+ "grad_norm": 34677.1796875,
97
+ "learning_rate": 1.7647058823529414e-06,
98
+ "loss": 0.6963,
99
+ "step": 55
100
+ },
101
+ {
102
+ "epoch": 0.05870841487279843,
103
+ "grad_norm": 31789.291015625,
104
+ "learning_rate": 1.928104575163399e-06,
105
+ "loss": 0.6971,
106
+ "step": 60
107
+ },
108
+ {
109
+ "epoch": 0.06360078277886497,
110
+ "grad_norm": 30762.345703125,
111
+ "learning_rate": 2.0915032679738565e-06,
112
+ "loss": 0.6946,
113
+ "step": 65
114
+ },
115
+ {
116
+ "epoch": 0.0684931506849315,
117
+ "grad_norm": 29590.083984375,
118
+ "learning_rate": 2.254901960784314e-06,
119
+ "loss": 0.693,
120
+ "step": 70
121
+ },
122
+ {
123
+ "epoch": 0.07338551859099804,
124
+ "grad_norm": 29346.84765625,
125
+ "learning_rate": 2.4183006535947716e-06,
126
+ "loss": 0.6905,
127
+ "step": 75
128
+ },
129
+ {
130
+ "epoch": 0.07827788649706457,
131
+ "grad_norm": 37289.77734375,
132
+ "learning_rate": 2.581699346405229e-06,
133
+ "loss": 0.6942,
134
+ "step": 80
135
+ },
136
+ {
137
+ "epoch": 0.08317025440313111,
138
+ "grad_norm": 33970.94140625,
139
+ "learning_rate": 2.7450980392156867e-06,
140
+ "loss": 0.69,
141
+ "step": 85
142
+ },
143
+ {
144
+ "epoch": 0.08806262230919765,
145
+ "grad_norm": 28671.583984375,
146
+ "learning_rate": 2.9084967320261443e-06,
147
+ "loss": 0.6889,
148
+ "step": 90
149
+ },
150
+ {
151
+ "epoch": 0.09295499021526418,
152
+ "grad_norm": 30156.005859375,
153
+ "learning_rate": 3.071895424836602e-06,
154
+ "loss": 0.6866,
155
+ "step": 95
156
+ },
157
+ {
158
+ "epoch": 0.09784735812133072,
159
+ "grad_norm": 33484.71484375,
160
+ "learning_rate": 3.2352941176470594e-06,
161
+ "loss": 0.6888,
162
+ "step": 100
163
+ },
164
+ {
165
+ "epoch": 0.09784735812133072,
166
+ "eval_accuracy": 0.767,
167
+ "eval_loss": 0.6559821367263794,
168
+ "eval_macro_f1": 0.5260673147136775,
169
+ "eval_precision": 0.6062366452991452,
170
+ "eval_recall": 0.5367285927824044,
171
+ "eval_runtime": 17.3972,
172
+ "eval_samples_per_second": 57.481,
173
+ "eval_steps_per_second": 0.057,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.10273972602739725,
178
+ "grad_norm": 35390.96484375,
179
+ "learning_rate": 3.398692810457517e-06,
180
+ "loss": 0.6893,
181
+ "step": 105
182
+ },
183
+ {
184
+ "epoch": 0.10763209393346379,
185
+ "grad_norm": 34452.11328125,
186
+ "learning_rate": 3.562091503267974e-06,
187
+ "loss": 0.6849,
188
+ "step": 110
189
+ },
190
+ {
191
+ "epoch": 0.11252446183953033,
192
+ "grad_norm": 41847.0625,
193
+ "learning_rate": 3.7254901960784316e-06,
194
+ "loss": 0.6853,
195
+ "step": 115
196
+ },
197
+ {
198
+ "epoch": 0.11741682974559686,
199
+ "grad_norm": 31951.4140625,
200
+ "learning_rate": 3.88888888888889e-06,
201
+ "loss": 0.6802,
202
+ "step": 120
203
+ },
204
+ {
205
+ "epoch": 0.1223091976516634,
206
+ "grad_norm": 34176.79296875,
207
+ "learning_rate": 4.052287581699347e-06,
208
+ "loss": 0.6871,
209
+ "step": 125
210
+ },
211
+ {
212
+ "epoch": 0.12720156555772993,
213
+ "grad_norm": 35482.6640625,
214
+ "learning_rate": 4.215686274509805e-06,
215
+ "loss": 0.6789,
216
+ "step": 130
217
+ },
218
+ {
219
+ "epoch": 0.13209393346379647,
220
+ "grad_norm": 60289.4296875,
221
+ "learning_rate": 4.379084967320262e-06,
222
+ "loss": 0.6844,
223
+ "step": 135
224
+ },
225
+ {
226
+ "epoch": 0.136986301369863,
227
+ "grad_norm": 41964.47265625,
228
+ "learning_rate": 4.542483660130719e-06,
229
+ "loss": 0.6808,
230
+ "step": 140
231
+ },
232
+ {
233
+ "epoch": 0.14187866927592954,
234
+ "grad_norm": 35805.8203125,
235
+ "learning_rate": 4.705882352941177e-06,
236
+ "loss": 0.6748,
237
+ "step": 145
238
+ },
239
+ {
240
+ "epoch": 0.14677103718199608,
241
+ "grad_norm": 25688.607421875,
242
+ "learning_rate": 4.869281045751634e-06,
243
+ "loss": 0.6769,
244
+ "step": 150
245
+ },
246
+ {
247
+ "epoch": 0.14677103718199608,
248
+ "eval_accuracy": 0.728,
249
+ "eval_loss": 0.6580190658569336,
250
+ "eval_macro_f1": 0.5668100015926103,
251
+ "eval_precision": 0.5782073308365263,
252
+ "eval_recall": 0.5627889260176255,
253
+ "eval_runtime": 17.3848,
254
+ "eval_samples_per_second": 57.521,
255
+ "eval_steps_per_second": 0.058,
256
+ "step": 150
257
+ },
258
+ {
259
+ "epoch": 0.15166340508806261,
260
+ "grad_norm": 30171.484375,
261
+ "learning_rate": 5.032679738562092e-06,
262
+ "loss": 0.6706,
263
+ "step": 155
264
+ },
265
+ {
266
+ "epoch": 0.15655577299412915,
267
+ "grad_norm": 29393.716796875,
268
+ "learning_rate": 5.19607843137255e-06,
269
+ "loss": 0.6764,
270
+ "step": 160
271
+ },
272
+ {
273
+ "epoch": 0.16144814090019569,
274
+ "grad_norm": 34377.2578125,
275
+ "learning_rate": 5.359477124183007e-06,
276
+ "loss": 0.6688,
277
+ "step": 165
278
+ },
279
+ {
280
+ "epoch": 0.16634050880626222,
281
+ "grad_norm": 34805.3671875,
282
+ "learning_rate": 5.522875816993465e-06,
283
+ "loss": 0.6673,
284
+ "step": 170
285
+ },
286
+ {
287
+ "epoch": 0.17123287671232876,
288
+ "grad_norm": 29948.095703125,
289
+ "learning_rate": 5.686274509803922e-06,
290
+ "loss": 0.6652,
291
+ "step": 175
292
+ },
293
+ {
294
+ "epoch": 0.1761252446183953,
295
+ "grad_norm": 34735.33984375,
296
+ "learning_rate": 5.84967320261438e-06,
297
+ "loss": 0.6709,
298
+ "step": 180
299
+ },
300
+ {
301
+ "epoch": 0.18101761252446183,
302
+ "grad_norm": 36469.0703125,
303
+ "learning_rate": 6.0130718954248365e-06,
304
+ "loss": 0.6606,
305
+ "step": 185
306
+ },
307
+ {
308
+ "epoch": 0.18590998043052837,
309
+ "grad_norm": 27843.798828125,
310
+ "learning_rate": 6.176470588235295e-06,
311
+ "loss": 0.6666,
312
+ "step": 190
313
+ },
314
+ {
315
+ "epoch": 0.1908023483365949,
316
+ "grad_norm": 27965.966796875,
317
+ "learning_rate": 6.3398692810457515e-06,
318
+ "loss": 0.6626,
319
+ "step": 195
320
+ },
321
+ {
322
+ "epoch": 0.19569471624266144,
323
+ "grad_norm": 28469.09375,
324
+ "learning_rate": 6.5032679738562095e-06,
325
+ "loss": 0.6601,
326
+ "step": 200
327
+ },
328
+ {
329
+ "epoch": 0.19569471624266144,
330
+ "eval_accuracy": 0.698,
331
+ "eval_loss": 0.6574791669845581,
332
+ "eval_macro_f1": 0.5962545254252696,
333
+ "eval_precision": 0.591974921630094,
334
+ "eval_recall": 0.605831327804422,
335
+ "eval_runtime": 17.3909,
336
+ "eval_samples_per_second": 57.501,
337
+ "eval_steps_per_second": 0.058,
338
+ "step": 200
339
+ }
340
+ ],
341
+ "logging_steps": 5,
342
+ "max_steps": 3066,
343
+ "num_input_tokens_seen": 0,
344
+ "num_train_epochs": 3,
345
+ "save_steps": 100,
346
+ "stateful_callbacks": {
347
+ "EarlyStoppingCallback": {
348
+ "args": {
349
+ "early_stopping_patience": 3,
350
+ "early_stopping_threshold": 0.0
351
+ },
352
+ "attributes": {
353
+ "early_stopping_patience_counter": 0
354
+ }
355
+ },
356
+ "TrainerControl": {
357
+ "args": {
358
+ "should_epoch_stop": false,
359
+ "should_evaluate": false,
360
+ "should_log": false,
361
+ "should_save": true,
362
+ "should_training_stop": false
363
+ },
364
+ "attributes": {}
365
+ }
366
+ },
367
+ "total_flos": 2.6942572068864e+16,
368
+ "train_batch_size": 512,
369
+ "trial_name": null,
370
+ "trial_params": null
371
+ }
graphcodebert-vanilla/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d448040cc8193c39a3aa43e3972db0566234234cb8950ae351de6781f2f556
3
+ size 5905
graphcodebert-vanilla/checkpoint-200/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/checkpoint-300/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.3,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": 0.3,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.3,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "absolute",
24
+ "problem_type": "single_label_classification",
25
+ "transformers_version": "4.56.0",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 50265
29
+ }
graphcodebert-vanilla/checkpoint-300/config_hyperparams.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_config": {
3
+ "model_name": "microsoft/graphcodebert-base",
4
+ "num_epochs": 3,
5
+ "batch_size": 256,
6
+ "learning_rate": 2e-05,
7
+ "max_length": 512,
8
+ "num_labels": 2,
9
+ "loss_type": "ce",
10
+ "focal_alpha": 1.0,
11
+ "focal_gamma": 2.0,
12
+ "r_drop_alpha": 6.0,
13
+ "infonce_temperature": 0.07,
14
+ "infonce_weight": 0.5,
15
+ "label_smoothing": 0,
16
+ "adversarial_epsilon": 0,
17
+ "use_swa": false,
18
+ "swa_start_epoch": 0,
19
+ "swa_lr": 1e-05,
20
+ "data_augmentation": false,
21
+ "aug_rename_prob": 0.0,
22
+ "aug_format_prob": 0.0,
23
+ "freeze_base": true,
24
+ "seed": 42,
25
+ "use_wandb": true,
26
+ "mixup_alpha": 0.0,
27
+ "low_pass_keep_ratio": 0.5,
28
+ "freq_consistency_weight": 0.0
29
+ },
30
+ "training_arguments": {
31
+ "output_dir": "output_checkpoints/graphcodebert-vanilla/",
32
+ "num_train_epochs": 3,
33
+ "per_device_train_batch_size": 256,
34
+ "per_device_eval_batch_size": 512,
35
+ "learning_rate": 2e-05,
36
+ "warmup_steps": 612,
37
+ "weight_decay": 0.1,
38
+ "logging_steps": 5,
39
+ "eval_steps": 50,
40
+ "save_steps": 100,
41
+ "metric_for_best_model": "macro_f1",
42
+ "greater_is_better": true,
43
+ "save_total_limit": 5,
44
+ "fp16": true,
45
+ "seed": 42
46
+ },
47
+ "training_state": {
48
+ "global_step": 300,
49
+ "epoch": 0.29354207436399216,
50
+ "best_metric": 0.5962545254252696,
51
+ "best_model_checkpoint": "output_checkpoints/graphcodebert-vanilla/checkpoint-200"
52
+ }
53
+ }
graphcodebert-vanilla/checkpoint-300/hyperparams.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "microsoft/graphcodebert-base",
3
+ "output_dir": "output_checkpoints/graphcodebert-vanilla/",
4
+ "num_epochs": 3,
5
+ "max_steps": -1,
6
+ "batch_size": 256,
7
+ "learning_rate": 2e-05,
8
+ "max_length": 512,
9
+ "num_labels": 2,
10
+ "use_wandb": true,
11
+ "freeze_base": true,
12
+ "loss_type": "ce",
13
+ "focal_alpha": 1.0,
14
+ "focal_gamma": 2.0,
15
+ "r_drop_alpha": 6.0,
16
+ "infonce_temperature": 0.07,
17
+ "infonce_weight": 0.5,
18
+ "seed": 42,
19
+ "wandb_run_name": "graphcodebert-vanilla",
20
+ "resume_from_checkpoint": null,
21
+ "save_steps": 100,
22
+ "eval_steps": 50,
23
+ "logging_steps": 5,
24
+ "label_smoothing": 0,
25
+ "adversarial_epsilon": 0,
26
+ "use_swa": false,
27
+ "swa_start_epoch": 0,
28
+ "swa_lr": 1e-05,
29
+ "data_augmentation": false,
30
+ "aug_rename_prob": 0.0,
31
+ "aug_format_prob": 0.0,
32
+ "mixup_alpha": 0.0,
33
+ "low_pass_keep_ratio": 0.5,
34
+ "freq_consistency_weight": 0.0,
35
+ "hidden_dropout_prob": 0.3,
36
+ "attention_probs_dropout_prob": 0.3,
37
+ "classifier_dropout": 0.3,
38
+ "device": "cuda"
39
+ }
graphcodebert-vanilla/checkpoint-300/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/checkpoint-300/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edd3840232c2ce57c3ca99599da5b4b6c3d927d433e8f6cade8f19eb82e1c7d4
3
+ size 498612824
graphcodebert-vanilla/checkpoint-300/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3bc2bb4faa59c5b4d4444402a371d35e399ec5c0395497516d5b8a18204541d
3
+ size 4741859
graphcodebert-vanilla/checkpoint-300/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16d01df9df5e3d357f43a862c0d6dbd8af3871aefdeaa647afae0764a9686751
3
+ size 14645
graphcodebert-vanilla/checkpoint-300/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b21c5349d5e7d02de630ebc1cb53ade1d9c6079eeb8594d223bb786011a0428b
3
+ size 1383
graphcodebert-vanilla/checkpoint-300/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82af26824e4d089a1392ee4f564d426e3fd92b73b1c7ab1766017647df7f455d
3
+ size 1465
graphcodebert-vanilla/checkpoint-300/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
graphcodebert-vanilla/checkpoint-300/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/checkpoint-300/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
graphcodebert-vanilla/checkpoint-300/trainer_state.json ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 200,
3
+ "best_metric": 0.5962545254252696,
4
+ "best_model_checkpoint": "output_checkpoints/graphcodebert-vanilla/checkpoint-200",
5
+ "epoch": 0.29354207436399216,
6
+ "eval_steps": 50,
7
+ "global_step": 300,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.004892367906066536,
14
+ "grad_norm": 33435.953125,
15
+ "learning_rate": 1.3071895424836603e-07,
16
+ "loss": 0.7006,
17
+ "step": 5
18
+ },
19
+ {
20
+ "epoch": 0.009784735812133072,
21
+ "grad_norm": 34831.7421875,
22
+ "learning_rate": 2.9411764705882356e-07,
23
+ "loss": 0.7023,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.014677103718199608,
28
+ "grad_norm": 32986.62109375,
29
+ "learning_rate": 4.5751633986928105e-07,
30
+ "loss": 0.7019,
31
+ "step": 15
32
+ },
33
+ {
34
+ "epoch": 0.019569471624266144,
35
+ "grad_norm": 34257.4921875,
36
+ "learning_rate": 6.209150326797386e-07,
37
+ "loss": 0.7025,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 0.02446183953033268,
42
+ "grad_norm": 35511.3359375,
43
+ "learning_rate": 7.843137254901962e-07,
44
+ "loss": 0.7032,
45
+ "step": 25
46
+ },
47
+ {
48
+ "epoch": 0.029354207436399216,
49
+ "grad_norm": 33043.3515625,
50
+ "learning_rate": 9.477124183006536e-07,
51
+ "loss": 0.6968,
52
+ "step": 30
53
+ },
54
+ {
55
+ "epoch": 0.03424657534246575,
56
+ "grad_norm": 38538.30078125,
57
+ "learning_rate": 1.111111111111111e-06,
58
+ "loss": 0.6992,
59
+ "step": 35
60
+ },
61
+ {
62
+ "epoch": 0.03913894324853229,
63
+ "grad_norm": 43255.37890625,
64
+ "learning_rate": 1.2745098039215686e-06,
65
+ "loss": 0.6991,
66
+ "step": 40
67
+ },
68
+ {
69
+ "epoch": 0.04403131115459882,
70
+ "grad_norm": 34170.8046875,
71
+ "learning_rate": 1.4379084967320261e-06,
72
+ "loss": 0.6996,
73
+ "step": 45
74
+ },
75
+ {
76
+ "epoch": 0.04892367906066536,
77
+ "grad_norm": 35399.70703125,
78
+ "learning_rate": 1.6013071895424837e-06,
79
+ "loss": 0.6937,
80
+ "step": 50
81
+ },
82
+ {
83
+ "epoch": 0.04892367906066536,
84
+ "eval_accuracy": 0.771,
85
+ "eval_loss": 0.6569265127182007,
86
+ "eval_macro_f1": 0.5082027063884254,
87
+ "eval_precision": 0.6109971743687904,
88
+ "eval_recall": 0.5281120325963375,
89
+ "eval_runtime": 17.5156,
90
+ "eval_samples_per_second": 57.092,
91
+ "eval_steps_per_second": 0.057,
92
+ "step": 50
93
+ },
94
+ {
95
+ "epoch": 0.053816046966731895,
96
+ "grad_norm": 34677.1796875,
97
+ "learning_rate": 1.7647058823529414e-06,
98
+ "loss": 0.6963,
99
+ "step": 55
100
+ },
101
+ {
102
+ "epoch": 0.05870841487279843,
103
+ "grad_norm": 31789.291015625,
104
+ "learning_rate": 1.928104575163399e-06,
105
+ "loss": 0.6971,
106
+ "step": 60
107
+ },
108
+ {
109
+ "epoch": 0.06360078277886497,
110
+ "grad_norm": 30762.345703125,
111
+ "learning_rate": 2.0915032679738565e-06,
112
+ "loss": 0.6946,
113
+ "step": 65
114
+ },
115
+ {
116
+ "epoch": 0.0684931506849315,
117
+ "grad_norm": 29590.083984375,
118
+ "learning_rate": 2.254901960784314e-06,
119
+ "loss": 0.693,
120
+ "step": 70
121
+ },
122
+ {
123
+ "epoch": 0.07338551859099804,
124
+ "grad_norm": 29346.84765625,
125
+ "learning_rate": 2.4183006535947716e-06,
126
+ "loss": 0.6905,
127
+ "step": 75
128
+ },
129
+ {
130
+ "epoch": 0.07827788649706457,
131
+ "grad_norm": 37289.77734375,
132
+ "learning_rate": 2.581699346405229e-06,
133
+ "loss": 0.6942,
134
+ "step": 80
135
+ },
136
+ {
137
+ "epoch": 0.08317025440313111,
138
+ "grad_norm": 33970.94140625,
139
+ "learning_rate": 2.7450980392156867e-06,
140
+ "loss": 0.69,
141
+ "step": 85
142
+ },
143
+ {
144
+ "epoch": 0.08806262230919765,
145
+ "grad_norm": 28671.583984375,
146
+ "learning_rate": 2.9084967320261443e-06,
147
+ "loss": 0.6889,
148
+ "step": 90
149
+ },
150
+ {
151
+ "epoch": 0.09295499021526418,
152
+ "grad_norm": 30156.005859375,
153
+ "learning_rate": 3.071895424836602e-06,
154
+ "loss": 0.6866,
155
+ "step": 95
156
+ },
157
+ {
158
+ "epoch": 0.09784735812133072,
159
+ "grad_norm": 33484.71484375,
160
+ "learning_rate": 3.2352941176470594e-06,
161
+ "loss": 0.6888,
162
+ "step": 100
163
+ },
164
+ {
165
+ "epoch": 0.09784735812133072,
166
+ "eval_accuracy": 0.767,
167
+ "eval_loss": 0.6559821367263794,
168
+ "eval_macro_f1": 0.5260673147136775,
169
+ "eval_precision": 0.6062366452991452,
170
+ "eval_recall": 0.5367285927824044,
171
+ "eval_runtime": 17.3972,
172
+ "eval_samples_per_second": 57.481,
173
+ "eval_steps_per_second": 0.057,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.10273972602739725,
178
+ "grad_norm": 35390.96484375,
179
+ "learning_rate": 3.398692810457517e-06,
180
+ "loss": 0.6893,
181
+ "step": 105
182
+ },
183
+ {
184
+ "epoch": 0.10763209393346379,
185
+ "grad_norm": 34452.11328125,
186
+ "learning_rate": 3.562091503267974e-06,
187
+ "loss": 0.6849,
188
+ "step": 110
189
+ },
190
+ {
191
+ "epoch": 0.11252446183953033,
192
+ "grad_norm": 41847.0625,
193
+ "learning_rate": 3.7254901960784316e-06,
194
+ "loss": 0.6853,
195
+ "step": 115
196
+ },
197
+ {
198
+ "epoch": 0.11741682974559686,
199
+ "grad_norm": 31951.4140625,
200
+ "learning_rate": 3.88888888888889e-06,
201
+ "loss": 0.6802,
202
+ "step": 120
203
+ },
204
+ {
205
+ "epoch": 0.1223091976516634,
206
+ "grad_norm": 34176.79296875,
207
+ "learning_rate": 4.052287581699347e-06,
208
+ "loss": 0.6871,
209
+ "step": 125
210
+ },
211
+ {
212
+ "epoch": 0.12720156555772993,
213
+ "grad_norm": 35482.6640625,
214
+ "learning_rate": 4.215686274509805e-06,
215
+ "loss": 0.6789,
216
+ "step": 130
217
+ },
218
+ {
219
+ "epoch": 0.13209393346379647,
220
+ "grad_norm": 60289.4296875,
221
+ "learning_rate": 4.379084967320262e-06,
222
+ "loss": 0.6844,
223
+ "step": 135
224
+ },
225
+ {
226
+ "epoch": 0.136986301369863,
227
+ "grad_norm": 41964.47265625,
228
+ "learning_rate": 4.542483660130719e-06,
229
+ "loss": 0.6808,
230
+ "step": 140
231
+ },
232
+ {
233
+ "epoch": 0.14187866927592954,
234
+ "grad_norm": 35805.8203125,
235
+ "learning_rate": 4.705882352941177e-06,
236
+ "loss": 0.6748,
237
+ "step": 145
238
+ },
239
+ {
240
+ "epoch": 0.14677103718199608,
241
+ "grad_norm": 25688.607421875,
242
+ "learning_rate": 4.869281045751634e-06,
243
+ "loss": 0.6769,
244
+ "step": 150
245
+ },
246
+ {
247
+ "epoch": 0.14677103718199608,
248
+ "eval_accuracy": 0.728,
249
+ "eval_loss": 0.6580190658569336,
250
+ "eval_macro_f1": 0.5668100015926103,
251
+ "eval_precision": 0.5782073308365263,
252
+ "eval_recall": 0.5627889260176255,
253
+ "eval_runtime": 17.3848,
254
+ "eval_samples_per_second": 57.521,
255
+ "eval_steps_per_second": 0.058,
256
+ "step": 150
257
+ },
258
+ {
259
+ "epoch": 0.15166340508806261,
260
+ "grad_norm": 30171.484375,
261
+ "learning_rate": 5.032679738562092e-06,
262
+ "loss": 0.6706,
263
+ "step": 155
264
+ },
265
+ {
266
+ "epoch": 0.15655577299412915,
267
+ "grad_norm": 29393.716796875,
268
+ "learning_rate": 5.19607843137255e-06,
269
+ "loss": 0.6764,
270
+ "step": 160
271
+ },
272
+ {
273
+ "epoch": 0.16144814090019569,
274
+ "grad_norm": 34377.2578125,
275
+ "learning_rate": 5.359477124183007e-06,
276
+ "loss": 0.6688,
277
+ "step": 165
278
+ },
279
+ {
280
+ "epoch": 0.16634050880626222,
281
+ "grad_norm": 34805.3671875,
282
+ "learning_rate": 5.522875816993465e-06,
283
+ "loss": 0.6673,
284
+ "step": 170
285
+ },
286
+ {
287
+ "epoch": 0.17123287671232876,
288
+ "grad_norm": 29948.095703125,
289
+ "learning_rate": 5.686274509803922e-06,
290
+ "loss": 0.6652,
291
+ "step": 175
292
+ },
293
+ {
294
+ "epoch": 0.1761252446183953,
295
+ "grad_norm": 34735.33984375,
296
+ "learning_rate": 5.84967320261438e-06,
297
+ "loss": 0.6709,
298
+ "step": 180
299
+ },
300
+ {
301
+ "epoch": 0.18101761252446183,
302
+ "grad_norm": 36469.0703125,
303
+ "learning_rate": 6.0130718954248365e-06,
304
+ "loss": 0.6606,
305
+ "step": 185
306
+ },
307
+ {
308
+ "epoch": 0.18590998043052837,
309
+ "grad_norm": 27843.798828125,
310
+ "learning_rate": 6.176470588235295e-06,
311
+ "loss": 0.6666,
312
+ "step": 190
313
+ },
314
+ {
315
+ "epoch": 0.1908023483365949,
316
+ "grad_norm": 27965.966796875,
317
+ "learning_rate": 6.3398692810457515e-06,
318
+ "loss": 0.6626,
319
+ "step": 195
320
+ },
321
+ {
322
+ "epoch": 0.19569471624266144,
323
+ "grad_norm": 28469.09375,
324
+ "learning_rate": 6.5032679738562095e-06,
325
+ "loss": 0.6601,
326
+ "step": 200
327
+ },
328
+ {
329
+ "epoch": 0.19569471624266144,
330
+ "eval_accuracy": 0.698,
331
+ "eval_loss": 0.6574791669845581,
332
+ "eval_macro_f1": 0.5962545254252696,
333
+ "eval_precision": 0.591974921630094,
334
+ "eval_recall": 0.605831327804422,
335
+ "eval_runtime": 17.3909,
336
+ "eval_samples_per_second": 57.501,
337
+ "eval_steps_per_second": 0.058,
338
+ "step": 200
339
+ },
340
+ {
341
+ "epoch": 0.20058708414872797,
342
+ "grad_norm": 26626.736328125,
343
+ "learning_rate": 6.666666666666667e-06,
344
+ "loss": 0.6598,
345
+ "step": 205
346
+ },
347
+ {
348
+ "epoch": 0.2054794520547945,
349
+ "grad_norm": 26082.599609375,
350
+ "learning_rate": 6.830065359477125e-06,
351
+ "loss": 0.6568,
352
+ "step": 210
353
+ },
354
+ {
355
+ "epoch": 0.21037181996086105,
356
+ "grad_norm": 33339.375,
357
+ "learning_rate": 6.993464052287582e-06,
358
+ "loss": 0.6563,
359
+ "step": 215
360
+ },
361
+ {
362
+ "epoch": 0.21526418786692758,
363
+ "grad_norm": 30894.603515625,
364
+ "learning_rate": 7.15686274509804e-06,
365
+ "loss": 0.652,
366
+ "step": 220
367
+ },
368
+ {
369
+ "epoch": 0.22015655577299412,
370
+ "grad_norm": 29593.958984375,
371
+ "learning_rate": 7.320261437908497e-06,
372
+ "loss": 0.6491,
373
+ "step": 225
374
+ },
375
+ {
376
+ "epoch": 0.22504892367906065,
377
+ "grad_norm": 39939.2890625,
378
+ "learning_rate": 7.483660130718955e-06,
379
+ "loss": 0.6479,
380
+ "step": 230
381
+ },
382
+ {
383
+ "epoch": 0.2299412915851272,
384
+ "grad_norm": 32097.01953125,
385
+ "learning_rate": 7.647058823529411e-06,
386
+ "loss": 0.6464,
387
+ "step": 235
388
+ },
389
+ {
390
+ "epoch": 0.23483365949119372,
391
+ "grad_norm": 31191.142578125,
392
+ "learning_rate": 7.81045751633987e-06,
393
+ "loss": 0.6469,
394
+ "step": 240
395
+ },
396
+ {
397
+ "epoch": 0.23972602739726026,
398
+ "grad_norm": 30402.431640625,
399
+ "learning_rate": 7.973856209150329e-06,
400
+ "loss": 0.6407,
401
+ "step": 245
402
+ },
403
+ {
404
+ "epoch": 0.2446183953033268,
405
+ "grad_norm": 30825.1328125,
406
+ "learning_rate": 8.137254901960784e-06,
407
+ "loss": 0.6391,
408
+ "step": 250
409
+ },
410
+ {
411
+ "epoch": 0.2446183953033268,
412
+ "eval_accuracy": 0.623,
413
+ "eval_loss": 0.6709860563278198,
414
+ "eval_macro_f1": 0.5814715106436125,
415
+ "eval_precision": 0.6025402726146221,
416
+ "eval_recall": 0.6470932816224296,
417
+ "eval_runtime": 17.5511,
418
+ "eval_samples_per_second": 56.977,
419
+ "eval_steps_per_second": 0.057,
420
+ "step": 250
421
+ },
422
+ {
423
+ "epoch": 0.24951076320939333,
424
+ "grad_norm": 26645.732421875,
425
+ "learning_rate": 8.300653594771243e-06,
426
+ "loss": 0.6397,
427
+ "step": 255
428
+ },
429
+ {
430
+ "epoch": 0.25440313111545987,
431
+ "grad_norm": 29653.92578125,
432
+ "learning_rate": 8.4640522875817e-06,
433
+ "loss": 0.6437,
434
+ "step": 260
435
+ },
436
+ {
437
+ "epoch": 0.25929549902152643,
438
+ "grad_norm": 31826.6484375,
439
+ "learning_rate": 8.627450980392157e-06,
440
+ "loss": 0.6373,
441
+ "step": 265
442
+ },
443
+ {
444
+ "epoch": 0.26418786692759294,
445
+ "grad_norm": 35353.4765625,
446
+ "learning_rate": 8.790849673202614e-06,
447
+ "loss": 0.6319,
448
+ "step": 270
449
+ },
450
+ {
451
+ "epoch": 0.2690802348336595,
452
+ "grad_norm": 30883.482421875,
453
+ "learning_rate": 8.954248366013073e-06,
454
+ "loss": 0.6355,
455
+ "step": 275
456
+ },
457
+ {
458
+ "epoch": 0.273972602739726,
459
+ "grad_norm": 26454.451171875,
460
+ "learning_rate": 9.11764705882353e-06,
461
+ "loss": 0.6261,
462
+ "step": 280
463
+ },
464
+ {
465
+ "epoch": 0.2788649706457926,
466
+ "grad_norm": 27102.65234375,
467
+ "learning_rate": 9.281045751633987e-06,
468
+ "loss": 0.6291,
469
+ "step": 285
470
+ },
471
+ {
472
+ "epoch": 0.2837573385518591,
473
+ "grad_norm": 26538.755859375,
474
+ "learning_rate": 9.444444444444445e-06,
475
+ "loss": 0.6265,
476
+ "step": 290
477
+ },
478
+ {
479
+ "epoch": 0.28864970645792565,
480
+ "grad_norm": 27230.40625,
481
+ "learning_rate": 9.607843137254903e-06,
482
+ "loss": 0.617,
483
+ "step": 295
484
+ },
485
+ {
486
+ "epoch": 0.29354207436399216,
487
+ "grad_norm": 27588.068359375,
488
+ "learning_rate": 9.77124183006536e-06,
489
+ "loss": 0.618,
490
+ "step": 300
491
+ },
492
+ {
493
+ "epoch": 0.29354207436399216,
494
+ "eval_accuracy": 0.59,
495
+ "eval_loss": 0.6805335283279419,
496
+ "eval_macro_f1": 0.5635902261241277,
497
+ "eval_precision": 0.6075875666060336,
498
+ "eval_recall": 0.6546334931985156,
499
+ "eval_runtime": 17.4391,
500
+ "eval_samples_per_second": 57.342,
501
+ "eval_steps_per_second": 0.057,
502
+ "step": 300
503
+ }
504
+ ],
505
+ "logging_steps": 5,
506
+ "max_steps": 3066,
507
+ "num_input_tokens_seen": 0,
508
+ "num_train_epochs": 3,
509
+ "save_steps": 100,
510
+ "stateful_callbacks": {
511
+ "EarlyStoppingCallback": {
512
+ "args": {
513
+ "early_stopping_patience": 3,
514
+ "early_stopping_threshold": 0.0
515
+ },
516
+ "attributes": {
517
+ "early_stopping_patience_counter": 2
518
+ }
519
+ },
520
+ "TrainerControl": {
521
+ "args": {
522
+ "should_epoch_stop": false,
523
+ "should_evaluate": false,
524
+ "should_log": false,
525
+ "should_save": true,
526
+ "should_training_stop": false
527
+ },
528
+ "attributes": {}
529
+ }
530
+ },
531
+ "total_flos": 4.0413858103296e+16,
532
+ "train_batch_size": 512,
533
+ "trial_name": null,
534
+ "trial_params": null
535
+ }
graphcodebert-vanilla/checkpoint-300/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d448040cc8193c39a3aa43e3972db0566234234cb8950ae351de6781f2f556
3
+ size 5905
graphcodebert-vanilla/checkpoint-300/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/final_model/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.3,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": 0.3,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.3,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "absolute",
24
+ "problem_type": "single_label_classification",
25
+ "transformers_version": "4.56.0",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 50265
29
+ }
graphcodebert-vanilla/final_model/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
graphcodebert-vanilla/final_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:babd5890473a83a2cc134eea6510f56a09e9b665511011c2ddbd1e2d9d7bbf66
3
+ size 498612824
graphcodebert-vanilla/final_model/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
graphcodebert-vanilla/final_model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff