File size: 5,446 Bytes
939c6d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
{
  "best_metric": 0.7460203642621539,
  "best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/Ner_Pipeline/src/ner_pipeline/model_outputs/ner/CeLLaTe_no_vague_1.0/cellate2.0_tapt_base_LR_5e/base/STANDARD/BaseTrainer/no_data_aug/checkpoint-600",
  "epoch": 4.455445544554456,
  "eval_steps": 100,
  "global_step": 900,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.49504950495049505,
      "grad_norm": 0.5439660549163818,
      "learning_rate": 1.9724931232808205e-05,
      "loss": 0.7697,
      "step": 100
    },
    {
      "epoch": 0.49504950495049505,
      "eval_accuracy": 0.9607623318385651,
      "eval_f1": 0.25471864526371496,
      "eval_loss": 0.15020258724689484,
      "eval_precision": 0.30788912579957356,
      "eval_recall": 0.21720818291215402,
      "step": 100
    },
    {
      "epoch": 0.9900990099009901,
      "grad_norm": 2.310349464416504,
      "learning_rate": 1.922480620155039e-05,
      "loss": 0.1727,
      "step": 200
    },
    {
      "epoch": 0.9900990099009901,
      "eval_accuracy": 0.9619623571022548,
      "eval_f1": 0.5057967719936349,
      "eval_loss": 0.11981263011693954,
      "eval_precision": 0.40646693459992694,
      "eval_recall": 0.6693742478941035,
      "step": 200
    },
    {
      "epoch": 1.4851485148514851,
      "grad_norm": 1.5374152660369873,
      "learning_rate": 1.8724681170292574e-05,
      "loss": 0.1057,
      "step": 300
    },
    {
      "epoch": 1.4851485148514851,
      "eval_accuracy": 0.9804364302406366,
      "eval_f1": 0.6964094728800612,
      "eval_loss": 0.08184882998466492,
      "eval_precision": 0.7075442409189693,
      "eval_recall": 0.6856197352587244,
      "step": 300
    },
    {
      "epoch": 1.9801980198019802,
      "grad_norm": 0.818098783493042,
      "learning_rate": 1.822455613903476e-05,
      "loss": 0.0753,
      "step": 400
    },
    {
      "epoch": 1.9801980198019802,
      "eval_accuracy": 0.9807364365565591,
      "eval_f1": 0.720526630760024,
      "eval_loss": 0.07648279517889023,
      "eval_precision": 0.7166666666666667,
      "eval_recall": 0.7244283995186522,
      "step": 400
    },
    {
      "epoch": 2.4752475247524752,
      "grad_norm": 0.49367237091064453,
      "learning_rate": 1.7724431107776944e-05,
      "loss": 0.0555,
      "step": 500
    },
    {
      "epoch": 2.4752475247524752,
      "eval_accuracy": 0.9471357291732457,
      "eval_f1": 0.5116742081447964,
      "eval_loss": 0.1018502488732338,
      "eval_precision": 0.36590732591250325,
      "eval_recall": 0.8504813477737665,
      "step": 500
    },
    {
      "epoch": 2.9702970297029703,
      "grad_norm": 0.568962037563324,
      "learning_rate": 1.722430607651913e-05,
      "loss": 0.0511,
      "step": 600
    },
    {
      "epoch": 2.9702970297029703,
      "eval_accuracy": 0.9815417166677193,
      "eval_f1": 0.7460203642621539,
      "eval_loss": 0.0740918517112732,
      "eval_precision": 0.712798026856673,
      "eval_recall": 0.7824909747292419,
      "step": 600
    },
    {
      "epoch": 3.4653465346534653,
      "grad_norm": 0.7195326089859009,
      "learning_rate": 1.6724181045261317e-05,
      "loss": 0.0381,
      "step": 700
    },
    {
      "epoch": 3.4653465346534653,
      "eval_accuracy": 0.9810601275816333,
      "eval_f1": 0.7280469897209986,
      "eval_loss": 0.08975373208522797,
      "eval_precision": 0.7111302352266208,
      "eval_recall": 0.7457882069795427,
      "step": 700
    },
    {
      "epoch": 3.9603960396039604,
      "grad_norm": 0.6837287545204163,
      "learning_rate": 1.6224056014003503e-05,
      "loss": 0.0369,
      "step": 800
    },
    {
      "epoch": 3.9603960396039604,
      "eval_accuracy": 0.9817706688561865,
      "eval_f1": 0.7423093432536844,
      "eval_loss": 0.08456307649612427,
      "eval_precision": 0.7077762619372442,
      "eval_recall": 0.7803850782190133,
      "step": 800
    },
    {
      "epoch": 4.455445544554456,
      "grad_norm": 1.966813564300537,
      "learning_rate": 1.5723930982745687e-05,
      "loss": 0.0295,
      "step": 900
    },
    {
      "epoch": 4.455445544554456,
      "eval_accuracy": 0.9808785448114697,
      "eval_f1": 0.7300910125142206,
      "eval_loss": 0.09194136410951614,
      "eval_precision": 0.6922869471413161,
      "eval_recall": 0.7722623345367028,
      "step": 900
    },
    {
      "epoch": 4.455445544554456,
      "step": 900,
      "total_flos": 760697313381126.0,
      "train_loss": 0.1482748039563497,
      "train_runtime": 320.0844,
      "train_samples_per_second": 403.206,
      "train_steps_per_second": 12.622
    }
  ],
  "logging_steps": 100,
  "max_steps": 4040,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 20,
  "save_steps": 100,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 3,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 3
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 760697313381126.0,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}