FredericFan commited on
Commit
5a08389
·
verified ·
1 Parent(s): fc4cf84

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ef30dc9a100b240120af62823ca4707e4a35c361f060e8d6c15efa77b5e60f1
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a37da1bec04a539e9083a0690c020dd6b5a85ca6ba96130597ced0a592b992f
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12c3c123305a27e77f9affa5e1e0fa48210446cf570c456de0079a60956cc284
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b600d17da29b382922c79850abe38cc3a0b9b7af51d7d358a1ba2bfc872d80d7
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29621a933fe39840d93ef11565a36bb6ba3b5a377ed5e55e63eb1777b2373cd7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49a79be8c359f422cb59c77ee0154f081dfd7e588f93c61b503afdc15d8e88d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b929b010a2bf1a7268bbc9d5744f2ae71afa768419f9ef267d54626a2e8ef40d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33bcf6e84bd960ce66a36a7bd45e4c58615ca69233e24115c69f6a6b57693ba
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08910445868968964,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-2000",
4
- "epoch": 0.16,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -319,6 +319,84 @@
319
  "eval_samples_per_second": 22.763,
320
  "eval_steps_per_second": 5.691,
321
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  }
323
  ],
324
  "logging_steps": 50,
@@ -338,7 +416,7 @@
338
  "attributes": {}
339
  }
340
  },
341
- "total_flos": 4871663124480000.0,
342
  "train_batch_size": 4,
343
  "trial_name": null,
344
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08841572701931,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-2500",
4
+ "epoch": 0.2,
5
  "eval_steps": 500,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
319
  "eval_samples_per_second": 22.763,
320
  "eval_steps_per_second": 5.691,
321
  "step": 2000
322
+ },
323
+ {
324
+ "epoch": 0.164,
325
+ "grad_norm": 0.2411368191242218,
326
+ "learning_rate": 2.75412e-05,
327
+ "loss": 0.0786,
328
+ "step": 2050
329
+ },
330
+ {
331
+ "epoch": 0.168,
332
+ "grad_norm": 0.16663742065429688,
333
+ "learning_rate": 2.74812e-05,
334
+ "loss": 0.0724,
335
+ "step": 2100
336
+ },
337
+ {
338
+ "epoch": 0.172,
339
+ "grad_norm": 0.23420193791389465,
340
+ "learning_rate": 2.74212e-05,
341
+ "loss": 0.0653,
342
+ "step": 2150
343
+ },
344
+ {
345
+ "epoch": 0.176,
346
+ "grad_norm": 0.1807372272014618,
347
+ "learning_rate": 2.7361199999999998e-05,
348
+ "loss": 0.0676,
349
+ "step": 2200
350
+ },
351
+ {
352
+ "epoch": 0.18,
353
+ "grad_norm": 0.16474364697933197,
354
+ "learning_rate": 2.73012e-05,
355
+ "loss": 0.0767,
356
+ "step": 2250
357
+ },
358
+ {
359
+ "epoch": 0.184,
360
+ "grad_norm": 0.17184095084667206,
361
+ "learning_rate": 2.72412e-05,
362
+ "loss": 0.0658,
363
+ "step": 2300
364
+ },
365
+ {
366
+ "epoch": 0.188,
367
+ "grad_norm": 0.16993258893489838,
368
+ "learning_rate": 2.71812e-05,
369
+ "loss": 0.0755,
370
+ "step": 2350
371
+ },
372
+ {
373
+ "epoch": 0.192,
374
+ "grad_norm": 0.1555277407169342,
375
+ "learning_rate": 2.71212e-05,
376
+ "loss": 0.0698,
377
+ "step": 2400
378
+ },
379
+ {
380
+ "epoch": 0.196,
381
+ "grad_norm": 0.09040562808513641,
382
+ "learning_rate": 2.7061199999999998e-05,
383
+ "loss": 0.0757,
384
+ "step": 2450
385
+ },
386
+ {
387
+ "epoch": 0.2,
388
+ "grad_norm": 0.12910398840904236,
389
+ "learning_rate": 2.7001199999999998e-05,
390
+ "loss": 0.0688,
391
+ "step": 2500
392
+ },
393
+ {
394
+ "epoch": 0.2,
395
+ "eval_loss": 0.08841572701931,
396
+ "eval_runtime": 87.7555,
397
+ "eval_samples_per_second": 22.791,
398
+ "eval_steps_per_second": 5.698,
399
+ "step": 2500
400
  }
401
  ],
402
  "logging_steps": 50,
 
416
  "attributes": {}
417
  }
418
  },
419
+ "total_flos": 6089578905600000.0,
420
  "train_batch_size": 4,
421
  "trial_name": null,
422
  "trial_params": null