File size: 10,739 Bytes
dd05793
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
{
  "best_metric": 0.9679633867276888,
  "best_model_checkpoint": "/content/drive/MyDrive/Papers/RAG_3GPP/models/checkpoints/embedding/bge-small-telecom_10e_256bs/checkpoint-150",
  "epoch": 6.857142857142857,
  "eval_steps": 15,
  "global_step": 150,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.7142857142857143,
      "grad_norm": 1.681250810623169,
      "learning_rate": 3.571428571428572e-05,
      "loss": 0.824,
      "step": 15
    },
    {
      "epoch": 0.7142857142857143,
      "eval_loss": 0.13330750167369843,
      "eval_runtime": 3.6814,
      "eval_samples_per_second": 356.115,
      "eval_steps_per_second": 1.63,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.9397406559877955,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9839816933638444,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.9893211289092296,
      "eval_telecom-ir-eval_cosine_map@100": 0.9625163452108533,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9623769568849659,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9701258981216676,
      "eval_telecom-ir-eval_cosine_precision@1": 0.9397406559877955,
      "eval_telecom-ir-eval_cosine_recall@1": 0.9397406559877955,
      "step": 15
    },
    {
      "epoch": 1.380952380952381,
      "grad_norm": 0.8189207315444946,
      "learning_rate": 4.972077065562821e-05,
      "loss": 0.1731,
      "step": 30
    },
    {
      "epoch": 1.380952380952381,
      "eval_loss": 0.07593704760074615,
      "eval_runtime": 4.0688,
      "eval_samples_per_second": 322.209,
      "eval_steps_per_second": 1.475,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.9565217391304348,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.9938977879481312,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9877955758962624,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.9908466819221968,
      "eval_telecom-ir-eval_cosine_map@100": 0.9723266300874301,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9721883210441564,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9776352051817517,
      "eval_telecom-ir-eval_cosine_precision@1": 0.9565217391304348,
      "eval_telecom-ir-eval_cosine_recall@1": 0.9565217391304348,
      "step": 30
    },
    {
      "epoch": 2.0476190476190474,
      "grad_norm": 0.7057574391365051,
      "learning_rate": 4.803690529676019e-05,
      "loss": 0.0917,
      "step": 45
    },
    {
      "epoch": 2.0476190476190474,
      "eval_loss": 0.06566686183214188,
      "eval_runtime": 3.7186,
      "eval_samples_per_second": 352.553,
      "eval_steps_per_second": 1.614,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.9649122807017544,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9900839054157132,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.9908466819221968,
      "eval_telecom-ir-eval_cosine_map@100": 0.9768047979761636,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9765700483091787,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9807364362901521,
      "eval_telecom-ir-eval_cosine_precision@1": 0.9649122807017544,
      "eval_telecom-ir-eval_cosine_recall@1": 0.9649122807017544,
      "step": 45
    },
    {
      "epoch": 2.761904761904762,
      "grad_norm": 0.7498806118965149,
      "learning_rate": 4.4928312680573064e-05,
      "loss": 0.0676,
      "step": 60
    },
    {
      "epoch": 2.761904761904762,
      "eval_loss": 0.06091764196753502,
      "eval_runtime": 3.7927,
      "eval_samples_per_second": 345.667,
      "eval_steps_per_second": 1.582,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.9641495041952708,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_map@100": 0.977428148947981,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9771802695143658,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9812569737659373,
      "eval_telecom-ir-eval_cosine_precision@1": 0.9641495041952708,
      "eval_telecom-ir-eval_cosine_recall@1": 0.9641495041952708,
      "step": 60
    },
    {
      "epoch": 3.4285714285714284,
      "grad_norm": 0.48658156394958496,
      "learning_rate": 4.058724504646834e-05,
      "loss": 0.0435,
      "step": 75
    },
    {
      "epoch": 3.4285714285714284,
      "eval_loss": 0.05956002324819565,
      "eval_runtime": 4.2667,
      "eval_samples_per_second": 307.261,
      "eval_steps_per_second": 1.406,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.9649122807017544,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_map@100": 0.978052610298987,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9778295376121463,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9817518617980646,
      "eval_telecom-ir-eval_cosine_precision@1": 0.9649122807017544,
      "eval_telecom-ir-eval_cosine_recall@1": 0.9649122807017544,
      "step": 75
    },
    {
      "epoch": 4.095238095238095,
      "grad_norm": 0.4985809624195099,
      "learning_rate": 3.5282177578265296e-05,
      "loss": 0.038,
      "step": 90
    },
    {
      "epoch": 4.095238095238095,
      "eval_loss": 0.060632411390542984,
      "eval_runtime": 4.6488,
      "eval_samples_per_second": 282.008,
      "eval_steps_per_second": 1.291,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.9649122807017544,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.992372234935164,
      "eval_telecom-ir-eval_cosine_map@100": 0.9775869566334031,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9773646071700992,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9813932046352999,
      "eval_telecom-ir-eval_cosine_precision@1": 0.9649122807017544,
      "eval_telecom-ir-eval_cosine_recall@1": 0.9649122807017544,
      "step": 90
    },
    {
      "epoch": 4.809523809523809,
      "grad_norm": 0.4105435609817505,
      "learning_rate": 2.9341204441673266e-05,
      "loss": 0.0332,
      "step": 105
    },
    {
      "epoch": 4.809523809523809,
      "eval_loss": 0.05935605987906456,
      "eval_runtime": 4.0644,
      "eval_samples_per_second": 322.554,
      "eval_steps_per_second": 1.476,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.965675057208238,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.992372234935164,
      "eval_telecom-ir-eval_cosine_map@100": 0.9783638236659703,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9781273836765828,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9819743331685896,
      "eval_telecom-ir-eval_cosine_precision@1": 0.965675057208238,
      "eval_telecom-ir-eval_cosine_recall@1": 0.965675057208238,
      "step": 105
    },
    {
      "epoch": 5.476190476190476,
      "grad_norm": 0.468258261680603,
      "learning_rate": 2.3131747660339394e-05,
      "loss": 0.0269,
      "step": 120
    },
    {
      "epoch": 5.476190476190476,
      "eval_loss": 0.060672808438539505,
      "eval_runtime": 4.0797,
      "eval_samples_per_second": 321.343,
      "eval_steps_per_second": 1.471,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.9664378337147216,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9908466819221968,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_map@100": 0.9780891289133677,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9778688871938299,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9817380288044749,
      "eval_telecom-ir-eval_cosine_precision@1": 0.9664378337147216,
      "eval_telecom-ir-eval_cosine_recall@1": 0.9664378337147216,
      "step": 120
    },
    {
      "epoch": 6.142857142857143,
      "grad_norm": 0.192308709025383,
      "learning_rate": 1.7037833743707892e-05,
      "loss": 0.0219,
      "step": 135
    },
    {
      "epoch": 6.142857142857143,
      "eval_loss": 0.06004022806882858,
      "eval_runtime": 3.6988,
      "eval_samples_per_second": 354.443,
      "eval_steps_per_second": 1.622,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.965675057208238,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.9938977879481312,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9908466819221968,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_map@100": 0.9779666698415427,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9778095601322145,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9818676160795978,
      "eval_telecom-ir-eval_cosine_precision@1": 0.965675057208238,
      "eval_telecom-ir-eval_cosine_recall@1": 0.965675057208238,
      "step": 135
    },
    {
      "epoch": 6.857142857142857,
      "grad_norm": 0.3330775499343872,
      "learning_rate": 1.1436343403356017e-05,
      "loss": 0.0244,
      "step": 150
    },
    {
      "epoch": 6.857142857142857,
      "eval_loss": 0.05985964834690094,
      "eval_runtime": 3.8386,
      "eval_samples_per_second": 341.53,
      "eval_steps_per_second": 1.563,
      "eval_telecom-ir-eval_cosine_accuracy@1": 0.9679633867276888,
      "eval_telecom-ir-eval_cosine_accuracy@10": 0.992372234935164,
      "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
      "eval_telecom-ir-eval_cosine_map@100": 0.9791402442094453,
      "eval_telecom-ir-eval_cosine_mrr@10": 0.9788647342995168,
      "eval_telecom-ir-eval_cosine_ndcg@10": 0.9823240649953693,
      "eval_telecom-ir-eval_cosine_precision@1": 0.9679633867276888,
      "eval_telecom-ir-eval_cosine_recall@1": 0.9679633867276888,
      "step": 150
    }
  ],
  "logging_steps": 15,
  "max_steps": 210,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 15,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 256,
  "trial_name": null,
  "trial_params": null
}