rendchevi commited on
Commit
99284f0
·
verified ·
1 Parent(s): 2742400

End of training

Browse files
Files changed (4) hide show
  1. README.md +66 -0
  2. all_results.json +7 -0
  3. train_results.json +7 -0
  4. trainer_state.json +126 -0
README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: FacebookAI/roberta-base
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ - f1
9
+ - precision
10
+ - recall
11
+ model-index:
12
+ - name: roberta-base-CD_baseline
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # roberta-base-CD_baseline
20
+
21
+ This model is a fine-tuned version of [FacebookAI/roberta-base](https://huggingface.co/FacebookAI/roberta-base) on an unknown dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 1.6700
24
+ - Accuracy: 0.4
25
+ - F1: 0.3310
26
+ - Precision: 0.3202
27
+ - Recall: 0.4
28
+
29
+ ## Model description
30
+
31
+ More information needed
32
+
33
+ ## Intended uses & limitations
34
+
35
+ More information needed
36
+
37
+ ## Training and evaluation data
38
+
39
+ More information needed
40
+
41
+ ## Training procedure
42
+
43
+ ### Training hyperparameters
44
+
45
+ The following hyperparameters were used during training:
46
+ - learning_rate: 2e-05
47
+ - train_batch_size: 16
48
+ - eval_batch_size: 16
49
+ - seed: 42
50
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
+ - lr_scheduler_type: linear
52
+ - num_epochs: 1
53
+
54
+ ### Training results
55
+
56
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
57
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
58
+ | 1.6396 | 1.0 | 125 | 1.6700 | 0.4 | 0.3310 | 0.3202 | 0.4 |
59
+
60
+
61
+ ### Framework versions
62
+
63
+ - Transformers 4.38.0
64
+ - Pytorch 2.8.0+cu128
65
+ - Datasets 4.2.0
66
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.857890853881836,
4
+ "train_runtime": 33.8699,
5
+ "train_samples_per_second": 58.931,
6
+ "train_steps_per_second": 3.691
7
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.857890853881836,
4
+ "train_runtime": 33.8699,
5
+ "train_samples_per_second": 58.931,
6
+ "train_steps_per_second": 3.691
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.6699775457382202,
3
+ "best_model_checkpoint": "roberta-base-CD_baseline/checkpoint-125",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 125,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 3.975125312805176,
14
+ "learning_rate": 1.8400000000000003e-05,
15
+ "loss": 2.2918,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 6.948821544647217,
21
+ "learning_rate": 1.6800000000000002e-05,
22
+ "loss": 2.1418,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 7.936761379241943,
28
+ "learning_rate": 1.5200000000000002e-05,
29
+ "loss": 1.9934,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 13.492217063903809,
35
+ "learning_rate": 1.3600000000000002e-05,
36
+ "loss": 1.9768,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 9.684530258178711,
42
+ "learning_rate": 1.2e-05,
43
+ "loss": 1.7049,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 21.087621688842773,
49
+ "learning_rate": 1.04e-05,
50
+ "loss": 1.887,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 2.634740114212036,
56
+ "learning_rate": 8.8e-06,
57
+ "loss": 1.8188,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 2.7339694499969482,
63
+ "learning_rate": 7.2000000000000005e-06,
64
+ "loss": 1.7238,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 16.557479858398438,
70
+ "learning_rate": 5.600000000000001e-06,
71
+ "loss": 1.5953,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 21.172969818115234,
77
+ "learning_rate": 4.000000000000001e-06,
78
+ "loss": 1.7577,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 3.875187635421753,
84
+ "learning_rate": 2.4000000000000003e-06,
85
+ "loss": 1.7686,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 13.141642570495605,
91
+ "learning_rate": 8.000000000000001e-07,
92
+ "loss": 1.6396,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 1.0,
97
+ "eval_accuracy": 0.4,
98
+ "eval_f1": 0.33096034764074916,
99
+ "eval_loss": 1.6699775457382202,
100
+ "eval_precision": 0.3202175398971738,
101
+ "eval_recall": 0.4,
102
+ "eval_runtime": 1.3009,
103
+ "eval_samples_per_second": 176.8,
104
+ "eval_steps_per_second": 11.53,
105
+ "step": 125
106
+ },
107
+ {
108
+ "epoch": 1.0,
109
+ "step": 125,
110
+ "total_flos": 284581711923216.0,
111
+ "train_loss": 1.857890853881836,
112
+ "train_runtime": 33.8699,
113
+ "train_samples_per_second": 58.931,
114
+ "train_steps_per_second": 3.691
115
+ }
116
+ ],
117
+ "logging_steps": 10,
118
+ "max_steps": 125,
119
+ "num_input_tokens_seen": 0,
120
+ "num_train_epochs": 1,
121
+ "save_steps": 500,
122
+ "total_flos": 284581711923216.0,
123
+ "train_batch_size": 16,
124
+ "trial_name": null,
125
+ "trial_params": null
126
+ }