YureiYuri commited on
Commit
d72b347
·
verified ·
1 Parent(s): 6a332ec

theraphy_model_empathy

Browse files
checkpoint-880/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6",
19
+ "7": "LABEL_7",
20
+ "8": "LABEL_8",
21
+ "9": "LABEL_9",
22
+ "10": "LABEL_10"
23
+ },
24
+ "initializer_range": 0.02,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1,
28
+ "LABEL_10": 10,
29
+ "LABEL_2": 2,
30
+ "LABEL_3": 3,
31
+ "LABEL_4": 4,
32
+ "LABEL_5": 5,
33
+ "LABEL_6": 6,
34
+ "LABEL_7": 7,
35
+ "LABEL_8": 8,
36
+ "LABEL_9": 9
37
+ },
38
+ "max_position_embeddings": 512,
39
+ "model_type": "distilbert",
40
+ "n_heads": 12,
41
+ "n_layers": 6,
42
+ "pad_token_id": 0,
43
+ "problem_type": "single_label_classification",
44
+ "qa_dropout": 0.1,
45
+ "seq_classif_dropout": 0.2,
46
+ "sinusoidal_pos_embds": false,
47
+ "tie_weights_": true,
48
+ "transformers_version": "4.57.3",
49
+ "vocab_size": 30522
50
+ }
checkpoint-880/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:178976a03cb0b4cbd4708206436f36f258f6dc5969333cd3cad933d363406ec4
3
+ size 267860252
checkpoint-880/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8f6931ced400f6aa9df9a811dc7fe7fb5383e55efcd2916861057f9fa86f5ee
3
+ size 535780171
checkpoint-880/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf20b536b841ae00302a87a5554a163da7c3a74cb0f30f8e412080b027440fe
3
+ size 14455
checkpoint-880/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b6305b66c3dba715b1debc5f22becac5db8f36abba019141c6d03f654e2d14e
3
+ size 1465
checkpoint-880/trainer_state.json ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 880,
3
+ "best_metric": 0.03167559579014778,
4
+ "best_model_checkpoint": "therapy_model_v2/checkpoint-880",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 880,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.2840909090909091,
14
+ "grad_norm": 3.655754804611206,
15
+ "learning_rate": 2.94e-05,
16
+ "loss": 2.1685,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.5681818181818182,
21
+ "grad_norm": 7.086261749267578,
22
+ "learning_rate": 2.8538767395626242e-05,
23
+ "loss": 1.6208,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.8522727272727273,
28
+ "grad_norm": 3.383866310119629,
29
+ "learning_rate": 2.704771371769384e-05,
30
+ "loss": 1.0512,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_loss": 0.6480007767677307,
36
+ "eval_runtime": 117.4999,
37
+ "eval_samples_per_second": 5.974,
38
+ "eval_steps_per_second": 0.374,
39
+ "step": 176
40
+ },
41
+ {
42
+ "epoch": 1.1363636363636362,
43
+ "grad_norm": 2.4574246406555176,
44
+ "learning_rate": 2.5556660039761434e-05,
45
+ "loss": 0.6298,
46
+ "step": 200
47
+ },
48
+ {
49
+ "epoch": 1.4204545454545454,
50
+ "grad_norm": 6.754581928253174,
51
+ "learning_rate": 2.4065606361829024e-05,
52
+ "loss": 0.3986,
53
+ "step": 250
54
+ },
55
+ {
56
+ "epoch": 1.7045454545454546,
57
+ "grad_norm": 2.3031885623931885,
58
+ "learning_rate": 2.2574552683896622e-05,
59
+ "loss": 0.3178,
60
+ "step": 300
61
+ },
62
+ {
63
+ "epoch": 1.9886363636363638,
64
+ "grad_norm": 7.972731113433838,
65
+ "learning_rate": 2.1083499005964216e-05,
66
+ "loss": 0.2452,
67
+ "step": 350
68
+ },
69
+ {
70
+ "epoch": 2.0,
71
+ "eval_loss": 0.18218687176704407,
72
+ "eval_runtime": 117.6397,
73
+ "eval_samples_per_second": 5.967,
74
+ "eval_steps_per_second": 0.374,
75
+ "step": 352
76
+ },
77
+ {
78
+ "epoch": 2.2727272727272725,
79
+ "grad_norm": 1.7412598133087158,
80
+ "learning_rate": 1.9592445328031807e-05,
81
+ "loss": 0.1585,
82
+ "step": 400
83
+ },
84
+ {
85
+ "epoch": 2.5568181818181817,
86
+ "grad_norm": 6.392980098724365,
87
+ "learning_rate": 1.8101391650099404e-05,
88
+ "loss": 0.1024,
89
+ "step": 450
90
+ },
91
+ {
92
+ "epoch": 2.840909090909091,
93
+ "grad_norm": 0.96819669008255,
94
+ "learning_rate": 1.6610337972167e-05,
95
+ "loss": 0.0486,
96
+ "step": 500
97
+ },
98
+ {
99
+ "epoch": 3.0,
100
+ "eval_loss": 0.07200995832681656,
101
+ "eval_runtime": 117.2831,
102
+ "eval_samples_per_second": 5.986,
103
+ "eval_steps_per_second": 0.375,
104
+ "step": 528
105
+ },
106
+ {
107
+ "epoch": 3.125,
108
+ "grad_norm": 0.18693216145038605,
109
+ "learning_rate": 1.5119284294234594e-05,
110
+ "loss": 0.0462,
111
+ "step": 550
112
+ },
113
+ {
114
+ "epoch": 3.409090909090909,
115
+ "grad_norm": 0.16053710877895355,
116
+ "learning_rate": 1.3628230616302189e-05,
117
+ "loss": 0.039,
118
+ "step": 600
119
+ },
120
+ {
121
+ "epoch": 3.6931818181818183,
122
+ "grad_norm": 2.7669754028320312,
123
+ "learning_rate": 1.2137176938369781e-05,
124
+ "loss": 0.0297,
125
+ "step": 650
126
+ },
127
+ {
128
+ "epoch": 3.9772727272727275,
129
+ "grad_norm": 0.07028041779994965,
130
+ "learning_rate": 1.0646123260437375e-05,
131
+ "loss": 0.0254,
132
+ "step": 700
133
+ },
134
+ {
135
+ "epoch": 4.0,
136
+ "eval_loss": 0.04889064282178879,
137
+ "eval_runtime": 117.1195,
138
+ "eval_samples_per_second": 5.994,
139
+ "eval_steps_per_second": 0.376,
140
+ "step": 704
141
+ },
142
+ {
143
+ "epoch": 4.261363636363637,
144
+ "grad_norm": 0.05612948164343834,
145
+ "learning_rate": 9.155069582504971e-06,
146
+ "loss": 0.021,
147
+ "step": 750
148
+ },
149
+ {
150
+ "epoch": 4.545454545454545,
151
+ "grad_norm": 0.3926457464694977,
152
+ "learning_rate": 7.664015904572564e-06,
153
+ "loss": 0.0114,
154
+ "step": 800
155
+ },
156
+ {
157
+ "epoch": 4.829545454545455,
158
+ "grad_norm": 5.097391128540039,
159
+ "learning_rate": 6.172962226640159e-06,
160
+ "loss": 0.0146,
161
+ "step": 850
162
+ },
163
+ {
164
+ "epoch": 5.0,
165
+ "eval_loss": 0.03167559579014778,
166
+ "eval_runtime": 117.8556,
167
+ "eval_samples_per_second": 5.956,
168
+ "eval_steps_per_second": 0.373,
169
+ "step": 880
170
+ }
171
+ ],
172
+ "logging_steps": 50,
173
+ "max_steps": 1056,
174
+ "num_input_tokens_seen": 0,
175
+ "num_train_epochs": 6,
176
+ "save_steps": 500,
177
+ "stateful_callbacks": {
178
+ "TrainerControl": {
179
+ "args": {
180
+ "should_epoch_stop": false,
181
+ "should_evaluate": false,
182
+ "should_log": false,
183
+ "should_save": true,
184
+ "should_training_stop": false
185
+ },
186
+ "attributes": {}
187
+ }
188
+ },
189
+ "total_flos": 464703974837760.0,
190
+ "train_batch_size": 16,
191
+ "trial_name": null,
192
+ "trial_params": null
193
+ }
checkpoint-880/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8268ebe96b7e7766c6c4c4a35e9afc12ad5c8915a17b02e6ae7af1c736e75297
3
+ size 5777
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6",
19
+ "7": "LABEL_7",
20
+ "8": "LABEL_8",
21
+ "9": "LABEL_9",
22
+ "10": "LABEL_10"
23
+ },
24
+ "initializer_range": 0.02,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1,
28
+ "LABEL_10": 10,
29
+ "LABEL_2": 2,
30
+ "LABEL_3": 3,
31
+ "LABEL_4": 4,
32
+ "LABEL_5": 5,
33
+ "LABEL_6": 6,
34
+ "LABEL_7": 7,
35
+ "LABEL_8": 8,
36
+ "LABEL_9": 9
37
+ },
38
+ "max_position_embeddings": 512,
39
+ "model_type": "distilbert",
40
+ "n_heads": 12,
41
+ "n_layers": 6,
42
+ "pad_token_id": 0,
43
+ "problem_type": "single_label_classification",
44
+ "qa_dropout": 0.1,
45
+ "seq_classif_dropout": 0.2,
46
+ "sinusoidal_pos_embds": false,
47
+ "tie_weights_": true,
48
+ "transformers_version": "4.57.3",
49
+ "vocab_size": 30522
50
+ }
label_map.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "anger": 0,
3
+ "anxiety": 1,
4
+ "depression": 2,
5
+ "family": 3,
6
+ "general_support": 4,
7
+ "grief": 5,
8
+ "relationship": 6,
9
+ "self_esteem": 7,
10
+ "sleep_issues": 8,
11
+ "suicide": 9,
12
+ "trauma": 10
13
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:178976a03cb0b4cbd4708206436f36f258f6dc5969333cd3cad933d363406ec4
3
+ size 267860252
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
training_data.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff