thilaksai77 commited on
Commit
a19cf62
·
verified ·
1 Parent(s): 352e4c7

Upload folder using huggingface_hub

Browse files
checkpoint-105/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5,
27
+ "LABEL_6": 6
28
+ },
29
+ "max_position_embeddings": 512,
30
+ "model_type": "distilbert",
31
+ "n_heads": 12,
32
+ "n_layers": 6,
33
+ "pad_token_id": 0,
34
+ "problem_type": "single_label_classification",
35
+ "qa_dropout": 0.1,
36
+ "seq_classif_dropout": 0.2,
37
+ "sinusoidal_pos_embds": false,
38
+ "tie_weights_": true,
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.51.3",
41
+ "vocab_size": 30522
42
+ }
checkpoint-105/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9674d4cbbf736675ae2019e96130ab18f551beff985e1169feb4d88a957ba80e
3
+ size 267847948
checkpoint-105/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1f47852ccf7c02315b15f7e5ff3145c06d963002b3646c9fdcd94364dbcfba
3
+ size 535755130
checkpoint-105/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b182573f61d8bcf5eaefcbf8f98d8734b6db51b44ad36aed3a305c431539fa1
3
+ size 13990
checkpoint-105/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c6f4dcfbdba540c43139013dc73d454183ce3b0666c57d85ed4d09b51bd261
3
+ size 1064
checkpoint-105/trainer_state.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 105,
3
+ "best_metric": 0.40600600600600606,
4
+ "best_model_checkpoint": "./emotion_classifier/checkpoint-105",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 105,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.47619047619047616,
14
+ "grad_norm": 3.9467227458953857,
15
+ "learning_rate": 4.5333333333333335e-05,
16
+ "loss": 1.5659,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.9523809523809523,
21
+ "grad_norm": 5.448686122894287,
22
+ "learning_rate": 4.057142857142857e-05,
23
+ "loss": 1.5183,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5047619047619047,
29
+ "eval_f1": 0.40600600600600606,
30
+ "eval_loss": 1.4387609958648682,
31
+ "eval_runtime": 0.7949,
32
+ "eval_samples_per_second": 132.095,
33
+ "eval_steps_per_second": 17.613,
34
+ "step": 105
35
+ }
36
+ ],
37
+ "logging_steps": 50,
38
+ "max_steps": 525,
39
+ "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 5,
41
+ "save_steps": 500,
42
+ "stateful_callbacks": {
43
+ "TrainerControl": {
44
+ "args": {
45
+ "should_epoch_stop": false,
46
+ "should_evaluate": false,
47
+ "should_log": false,
48
+ "should_save": true,
49
+ "should_training_stop": false
50
+ },
51
+ "attributes": {}
52
+ }
53
+ },
54
+ "total_flos": 27820634204160.0,
55
+ "train_batch_size": 8,
56
+ "trial_name": null,
57
+ "trial_params": null
58
+ }
checkpoint-105/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a49a894af2c442b2f9e672d65a2bd022e494e122da2ed91ab9df1e3b6223efc4
3
+ size 5240
checkpoint-210/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5,
27
+ "LABEL_6": 6
28
+ },
29
+ "max_position_embeddings": 512,
30
+ "model_type": "distilbert",
31
+ "n_heads": 12,
32
+ "n_layers": 6,
33
+ "pad_token_id": 0,
34
+ "problem_type": "single_label_classification",
35
+ "qa_dropout": 0.1,
36
+ "seq_classif_dropout": 0.2,
37
+ "sinusoidal_pos_embds": false,
38
+ "tie_weights_": true,
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.51.3",
41
+ "vocab_size": 30522
42
+ }
checkpoint-210/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e32c326b08530484bb680be45def3c0c05aa51062e9bd4ea3ecbdac86d45554
3
+ size 267847948
checkpoint-210/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54189f627d80098080472a760995abaa8dc795b1a41dfeef78aee57343862600
3
+ size 535755130
checkpoint-210/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e55bfc8723f269a626afca0be6f7def5753f3bb265436b94c5580b703cfcc7
3
+ size 13990
checkpoint-210/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15ed379ceff0536ca0daf23c0161da7311b05f76788d30fa7fb573968607c2e1
3
+ size 1064
checkpoint-210/trainer_state.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 210,
3
+ "best_metric": 0.48246327929496247,
4
+ "best_model_checkpoint": "./emotion_classifier/checkpoint-210",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 210,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.47619047619047616,
14
+ "grad_norm": 3.9467227458953857,
15
+ "learning_rate": 4.5333333333333335e-05,
16
+ "loss": 1.5659,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.9523809523809523,
21
+ "grad_norm": 5.448686122894287,
22
+ "learning_rate": 4.057142857142857e-05,
23
+ "loss": 1.5183,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5047619047619047,
29
+ "eval_f1": 0.40600600600600606,
30
+ "eval_loss": 1.4387609958648682,
31
+ "eval_runtime": 0.7949,
32
+ "eval_samples_per_second": 132.095,
33
+ "eval_steps_per_second": 17.613,
34
+ "step": 105
35
+ },
36
+ {
37
+ "epoch": 1.4285714285714286,
38
+ "grad_norm": 16.634593963623047,
39
+ "learning_rate": 3.580952380952381e-05,
40
+ "loss": 1.272,
41
+ "step": 150
42
+ },
43
+ {
44
+ "epoch": 1.9047619047619047,
45
+ "grad_norm": 6.955718517303467,
46
+ "learning_rate": 3.104761904761905e-05,
47
+ "loss": 1.2448,
48
+ "step": 200
49
+ },
50
+ {
51
+ "epoch": 2.0,
52
+ "eval_accuracy": 0.5238095238095238,
53
+ "eval_f1": 0.48246327929496247,
54
+ "eval_loss": 1.3625617027282715,
55
+ "eval_runtime": 0.6094,
56
+ "eval_samples_per_second": 172.306,
57
+ "eval_steps_per_second": 22.974,
58
+ "step": 210
59
+ }
60
+ ],
61
+ "logging_steps": 50,
62
+ "max_steps": 525,
63
+ "num_input_tokens_seen": 0,
64
+ "num_train_epochs": 5,
65
+ "save_steps": 500,
66
+ "stateful_callbacks": {
67
+ "TrainerControl": {
68
+ "args": {
69
+ "should_epoch_stop": false,
70
+ "should_evaluate": false,
71
+ "should_log": false,
72
+ "should_save": true,
73
+ "should_training_stop": false
74
+ },
75
+ "attributes": {}
76
+ }
77
+ },
78
+ "total_flos": 55641268408320.0,
79
+ "train_batch_size": 8,
80
+ "trial_name": null,
81
+ "trial_params": null
82
+ }
checkpoint-210/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a49a894af2c442b2f9e672d65a2bd022e494e122da2ed91ab9df1e3b6223efc4
3
+ size 5240
checkpoint-315/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5,
27
+ "LABEL_6": 6
28
+ },
29
+ "max_position_embeddings": 512,
30
+ "model_type": "distilbert",
31
+ "n_heads": 12,
32
+ "n_layers": 6,
33
+ "pad_token_id": 0,
34
+ "problem_type": "single_label_classification",
35
+ "qa_dropout": 0.1,
36
+ "seq_classif_dropout": 0.2,
37
+ "sinusoidal_pos_embds": false,
38
+ "tie_weights_": true,
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.51.3",
41
+ "vocab_size": 30522
42
+ }
checkpoint-315/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0251a64f98594d73df6c25e207523f85b989e8038dbeb3edfa7dbadf4233a58
3
+ size 267847948
checkpoint-315/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a0e08d90f2c4e4c72d2fc5aa7c85384da5095b3850e2cd33f653cfbf1582b9
3
+ size 535755130
checkpoint-315/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d32051101ec51c2b04c4ee6a6d2c7f40562e56836cbb02d6e6e3126490484d
3
+ size 13990
checkpoint-315/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ff7933474f721fed33083fbae70a6999309b5b3222d037455709e97a053f9f
3
+ size 1064
checkpoint-315/trainer_state.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 315,
3
+ "best_metric": 0.4981603538892402,
4
+ "best_model_checkpoint": "./emotion_classifier/checkpoint-315",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 315,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.47619047619047616,
14
+ "grad_norm": 3.9467227458953857,
15
+ "learning_rate": 4.5333333333333335e-05,
16
+ "loss": 1.5659,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.9523809523809523,
21
+ "grad_norm": 5.448686122894287,
22
+ "learning_rate": 4.057142857142857e-05,
23
+ "loss": 1.5183,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5047619047619047,
29
+ "eval_f1": 0.40600600600600606,
30
+ "eval_loss": 1.4387609958648682,
31
+ "eval_runtime": 0.7949,
32
+ "eval_samples_per_second": 132.095,
33
+ "eval_steps_per_second": 17.613,
34
+ "step": 105
35
+ },
36
+ {
37
+ "epoch": 1.4285714285714286,
38
+ "grad_norm": 16.634593963623047,
39
+ "learning_rate": 3.580952380952381e-05,
40
+ "loss": 1.272,
41
+ "step": 150
42
+ },
43
+ {
44
+ "epoch": 1.9047619047619047,
45
+ "grad_norm": 6.955718517303467,
46
+ "learning_rate": 3.104761904761905e-05,
47
+ "loss": 1.2448,
48
+ "step": 200
49
+ },
50
+ {
51
+ "epoch": 2.0,
52
+ "eval_accuracy": 0.5238095238095238,
53
+ "eval_f1": 0.48246327929496247,
54
+ "eval_loss": 1.3625617027282715,
55
+ "eval_runtime": 0.6094,
56
+ "eval_samples_per_second": 172.306,
57
+ "eval_steps_per_second": 22.974,
58
+ "step": 210
59
+ },
60
+ {
61
+ "epoch": 2.380952380952381,
62
+ "grad_norm": 6.055703639984131,
63
+ "learning_rate": 2.6285714285714286e-05,
64
+ "loss": 1.0137,
65
+ "step": 250
66
+ },
67
+ {
68
+ "epoch": 2.857142857142857,
69
+ "grad_norm": 8.498906135559082,
70
+ "learning_rate": 2.1523809523809525e-05,
71
+ "loss": 0.8427,
72
+ "step": 300
73
+ },
74
+ {
75
+ "epoch": 3.0,
76
+ "eval_accuracy": 0.5333333333333333,
77
+ "eval_f1": 0.4981603538892402,
78
+ "eval_loss": 1.4221067428588867,
79
+ "eval_runtime": 0.6125,
80
+ "eval_samples_per_second": 171.431,
81
+ "eval_steps_per_second": 22.857,
82
+ "step": 315
83
+ }
84
+ ],
85
+ "logging_steps": 50,
86
+ "max_steps": 525,
87
+ "num_input_tokens_seen": 0,
88
+ "num_train_epochs": 5,
89
+ "save_steps": 500,
90
+ "stateful_callbacks": {
91
+ "TrainerControl": {
92
+ "args": {
93
+ "should_epoch_stop": false,
94
+ "should_evaluate": false,
95
+ "should_log": false,
96
+ "should_save": true,
97
+ "should_training_stop": false
98
+ },
99
+ "attributes": {}
100
+ }
101
+ },
102
+ "total_flos": 83461902612480.0,
103
+ "train_batch_size": 8,
104
+ "trial_name": null,
105
+ "trial_params": null
106
+ }
checkpoint-315/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a49a894af2c442b2f9e672d65a2bd022e494e122da2ed91ab9df1e3b6223efc4
3
+ size 5240
checkpoint-420/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5,
27
+ "LABEL_6": 6
28
+ },
29
+ "max_position_embeddings": 512,
30
+ "model_type": "distilbert",
31
+ "n_heads": 12,
32
+ "n_layers": 6,
33
+ "pad_token_id": 0,
34
+ "problem_type": "single_label_classification",
35
+ "qa_dropout": 0.1,
36
+ "seq_classif_dropout": 0.2,
37
+ "sinusoidal_pos_embds": false,
38
+ "tie_weights_": true,
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.51.3",
41
+ "vocab_size": 30522
42
+ }
checkpoint-420/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:377b91f2fb91021914da4fa399ee8a6ab4886cb83e20678771cdf05a8dbbd700
3
+ size 267847948
checkpoint-420/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0055dcd2fe8d96751e3a18567935daf5bbf920f37ab8c4f893862e8c7b7bdf35
3
+ size 535755130
checkpoint-420/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fed77b14fe062f6db72d68cedd6fd95bae3305b7a735eef3c85da43fd15d476
3
+ size 13990
checkpoint-420/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5911cac5d0bf7691467c616988b4571da0505eb3093bd9c9df30f3813b102738
3
+ size 1064
checkpoint-420/trainer_state.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 315,
3
+ "best_metric": 0.4981603538892402,
4
+ "best_model_checkpoint": "./emotion_classifier/checkpoint-315",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 420,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.47619047619047616,
14
+ "grad_norm": 3.9467227458953857,
15
+ "learning_rate": 4.5333333333333335e-05,
16
+ "loss": 1.5659,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.9523809523809523,
21
+ "grad_norm": 5.448686122894287,
22
+ "learning_rate": 4.057142857142857e-05,
23
+ "loss": 1.5183,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5047619047619047,
29
+ "eval_f1": 0.40600600600600606,
30
+ "eval_loss": 1.4387609958648682,
31
+ "eval_runtime": 0.7949,
32
+ "eval_samples_per_second": 132.095,
33
+ "eval_steps_per_second": 17.613,
34
+ "step": 105
35
+ },
36
+ {
37
+ "epoch": 1.4285714285714286,
38
+ "grad_norm": 16.634593963623047,
39
+ "learning_rate": 3.580952380952381e-05,
40
+ "loss": 1.272,
41
+ "step": 150
42
+ },
43
+ {
44
+ "epoch": 1.9047619047619047,
45
+ "grad_norm": 6.955718517303467,
46
+ "learning_rate": 3.104761904761905e-05,
47
+ "loss": 1.2448,
48
+ "step": 200
49
+ },
50
+ {
51
+ "epoch": 2.0,
52
+ "eval_accuracy": 0.5238095238095238,
53
+ "eval_f1": 0.48246327929496247,
54
+ "eval_loss": 1.3625617027282715,
55
+ "eval_runtime": 0.6094,
56
+ "eval_samples_per_second": 172.306,
57
+ "eval_steps_per_second": 22.974,
58
+ "step": 210
59
+ },
60
+ {
61
+ "epoch": 2.380952380952381,
62
+ "grad_norm": 6.055703639984131,
63
+ "learning_rate": 2.6285714285714286e-05,
64
+ "loss": 1.0137,
65
+ "step": 250
66
+ },
67
+ {
68
+ "epoch": 2.857142857142857,
69
+ "grad_norm": 8.498906135559082,
70
+ "learning_rate": 2.1523809523809525e-05,
71
+ "loss": 0.8427,
72
+ "step": 300
73
+ },
74
+ {
75
+ "epoch": 3.0,
76
+ "eval_accuracy": 0.5333333333333333,
77
+ "eval_f1": 0.4981603538892402,
78
+ "eval_loss": 1.4221067428588867,
79
+ "eval_runtime": 0.6125,
80
+ "eval_samples_per_second": 171.431,
81
+ "eval_steps_per_second": 22.857,
82
+ "step": 315
83
+ },
84
+ {
85
+ "epoch": 3.3333333333333335,
86
+ "grad_norm": 6.134776592254639,
87
+ "learning_rate": 1.676190476190476e-05,
88
+ "loss": 0.7736,
89
+ "step": 350
90
+ },
91
+ {
92
+ "epoch": 3.8095238095238093,
93
+ "grad_norm": 5.722315788269043,
94
+ "learning_rate": 1.2e-05,
95
+ "loss": 0.5872,
96
+ "step": 400
97
+ },
98
+ {
99
+ "epoch": 4.0,
100
+ "eval_accuracy": 0.47619047619047616,
101
+ "eval_f1": 0.46248745780765493,
102
+ "eval_loss": 1.577803373336792,
103
+ "eval_runtime": 0.6116,
104
+ "eval_samples_per_second": 171.683,
105
+ "eval_steps_per_second": 22.891,
106
+ "step": 420
107
+ }
108
+ ],
109
+ "logging_steps": 50,
110
+ "max_steps": 525,
111
+ "num_input_tokens_seen": 0,
112
+ "num_train_epochs": 5,
113
+ "save_steps": 500,
114
+ "stateful_callbacks": {
115
+ "TrainerControl": {
116
+ "args": {
117
+ "should_epoch_stop": false,
118
+ "should_evaluate": false,
119
+ "should_log": false,
120
+ "should_save": true,
121
+ "should_training_stop": false
122
+ },
123
+ "attributes": {}
124
+ }
125
+ },
126
+ "total_flos": 111282536816640.0,
127
+ "train_batch_size": 8,
128
+ "trial_name": null,
129
+ "trial_params": null
130
+ }
checkpoint-420/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a49a894af2c442b2f9e672d65a2bd022e494e122da2ed91ab9df1e3b6223efc4
3
+ size 5240
checkpoint-525/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5,
27
+ "LABEL_6": 6
28
+ },
29
+ "max_position_embeddings": 512,
30
+ "model_type": "distilbert",
31
+ "n_heads": 12,
32
+ "n_layers": 6,
33
+ "pad_token_id": 0,
34
+ "problem_type": "single_label_classification",
35
+ "qa_dropout": 0.1,
36
+ "seq_classif_dropout": 0.2,
37
+ "sinusoidal_pos_embds": false,
38
+ "tie_weights_": true,
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.51.3",
41
+ "vocab_size": 30522
42
+ }
checkpoint-525/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c06cb77a2a9a78921fbd895c24d9bdd78636312bcebd9060d23f3a7bae9efa66
3
+ size 267847948
checkpoint-525/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de371b01e484f2e92c7ca565b17eecb2a61b09fad1b95bc7af232f387f51040d
3
+ size 535755130
checkpoint-525/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:013d13e4b269a8648da58d2f40bf48e004388ea0631771b8c8e588ab75375092
3
+ size 13990
checkpoint-525/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e2ec906fb7f5cf8c3a005865d08edd02cda76be349266c5ce9d66daa3b6a5e
3
+ size 1064
checkpoint-525/trainer_state.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 315,
3
+ "best_metric": 0.4981603538892402,
4
+ "best_model_checkpoint": "./emotion_classifier/checkpoint-315",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 525,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.47619047619047616,
14
+ "grad_norm": 3.9467227458953857,
15
+ "learning_rate": 4.5333333333333335e-05,
16
+ "loss": 1.5659,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.9523809523809523,
21
+ "grad_norm": 5.448686122894287,
22
+ "learning_rate": 4.057142857142857e-05,
23
+ "loss": 1.5183,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5047619047619047,
29
+ "eval_f1": 0.40600600600600606,
30
+ "eval_loss": 1.4387609958648682,
31
+ "eval_runtime": 0.7949,
32
+ "eval_samples_per_second": 132.095,
33
+ "eval_steps_per_second": 17.613,
34
+ "step": 105
35
+ },
36
+ {
37
+ "epoch": 1.4285714285714286,
38
+ "grad_norm": 16.634593963623047,
39
+ "learning_rate": 3.580952380952381e-05,
40
+ "loss": 1.272,
41
+ "step": 150
42
+ },
43
+ {
44
+ "epoch": 1.9047619047619047,
45
+ "grad_norm": 6.955718517303467,
46
+ "learning_rate": 3.104761904761905e-05,
47
+ "loss": 1.2448,
48
+ "step": 200
49
+ },
50
+ {
51
+ "epoch": 2.0,
52
+ "eval_accuracy": 0.5238095238095238,
53
+ "eval_f1": 0.48246327929496247,
54
+ "eval_loss": 1.3625617027282715,
55
+ "eval_runtime": 0.6094,
56
+ "eval_samples_per_second": 172.306,
57
+ "eval_steps_per_second": 22.974,
58
+ "step": 210
59
+ },
60
+ {
61
+ "epoch": 2.380952380952381,
62
+ "grad_norm": 6.055703639984131,
63
+ "learning_rate": 2.6285714285714286e-05,
64
+ "loss": 1.0137,
65
+ "step": 250
66
+ },
67
+ {
68
+ "epoch": 2.857142857142857,
69
+ "grad_norm": 8.498906135559082,
70
+ "learning_rate": 2.1523809523809525e-05,
71
+ "loss": 0.8427,
72
+ "step": 300
73
+ },
74
+ {
75
+ "epoch": 3.0,
76
+ "eval_accuracy": 0.5333333333333333,
77
+ "eval_f1": 0.4981603538892402,
78
+ "eval_loss": 1.4221067428588867,
79
+ "eval_runtime": 0.6125,
80
+ "eval_samples_per_second": 171.431,
81
+ "eval_steps_per_second": 22.857,
82
+ "step": 315
83
+ },
84
+ {
85
+ "epoch": 3.3333333333333335,
86
+ "grad_norm": 6.134776592254639,
87
+ "learning_rate": 1.676190476190476e-05,
88
+ "loss": 0.7736,
89
+ "step": 350
90
+ },
91
+ {
92
+ "epoch": 3.8095238095238093,
93
+ "grad_norm": 5.722315788269043,
94
+ "learning_rate": 1.2e-05,
95
+ "loss": 0.5872,
96
+ "step": 400
97
+ },
98
+ {
99
+ "epoch": 4.0,
100
+ "eval_accuracy": 0.47619047619047616,
101
+ "eval_f1": 0.46248745780765493,
102
+ "eval_loss": 1.577803373336792,
103
+ "eval_runtime": 0.6116,
104
+ "eval_samples_per_second": 171.683,
105
+ "eval_steps_per_second": 22.891,
106
+ "step": 420
107
+ },
108
+ {
109
+ "epoch": 4.285714285714286,
110
+ "grad_norm": 13.850353240966797,
111
+ "learning_rate": 7.238095238095238e-06,
112
+ "loss": 0.4943,
113
+ "step": 450
114
+ },
115
+ {
116
+ "epoch": 4.761904761904762,
117
+ "grad_norm": 4.647997856140137,
118
+ "learning_rate": 2.4761904761904764e-06,
119
+ "loss": 0.3847,
120
+ "step": 500
121
+ },
122
+ {
123
+ "epoch": 5.0,
124
+ "eval_accuracy": 0.4666666666666667,
125
+ "eval_f1": 0.4557905130135145,
126
+ "eval_loss": 1.6667375564575195,
127
+ "eval_runtime": 0.6136,
128
+ "eval_samples_per_second": 171.12,
129
+ "eval_steps_per_second": 22.816,
130
+ "step": 525
131
+ }
132
+ ],
133
+ "logging_steps": 50,
134
+ "max_steps": 525,
135
+ "num_input_tokens_seen": 0,
136
+ "num_train_epochs": 5,
137
+ "save_steps": 500,
138
+ "stateful_callbacks": {
139
+ "TrainerControl": {
140
+ "args": {
141
+ "should_epoch_stop": false,
142
+ "should_evaluate": false,
143
+ "should_log": false,
144
+ "should_save": true,
145
+ "should_training_stop": true
146
+ },
147
+ "attributes": {}
148
+ }
149
+ },
150
+ "total_flos": 139103171020800.0,
151
+ "train_batch_size": 8,
152
+ "trial_name": null,
153
+ "trial_params": null
154
+ }
checkpoint-525/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a49a894af2c442b2f9e672d65a2bd022e494e122da2ed91ab9df1e3b6223efc4
3
+ size 5240
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4,
26
+ "LABEL_5": 5,
27
+ "LABEL_6": 6
28
+ },
29
+ "max_position_embeddings": 512,
30
+ "model_type": "distilbert",
31
+ "n_heads": 12,
32
+ "n_layers": 6,
33
+ "pad_token_id": 0,
34
+ "problem_type": "single_label_classification",
35
+ "qa_dropout": 0.1,
36
+ "seq_classif_dropout": 0.2,
37
+ "sinusoidal_pos_embds": false,
38
+ "tie_weights_": true,
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.51.3",
41
+ "vocab_size": 30522
42
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0251a64f98594d73df6c25e207523f85b989e8038dbeb3edfa7dbadf4233a58
3
+ size 267847948
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff