robertou2 commited on
Commit
ffbe7b6
·
verified ·
1 Parent(s): c70dcd9

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f59f1a583963d421ac23948e79a9a6d7a43d73f7425dc3f81a1c3fba0234b14
3
  size 738232680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90f012c442accfa6f38a4282c517cdcc4287c8d382b12c8202d20a6d21141576
3
  size 738232680
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62dee84323e49bce2894834a449efb717fb4f5c0e43b0fc53908d8314f0fea2d
3
  size 1476611275
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d92483e7ef82d62e4cefd6d7ae69e21b950d46ba883869dbfcced86267a001e
3
  size 1476611275
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adb5558d81e37e22ad7ef00a5b98b4bbabdb72c5cf774602127ab6ff043ccfad
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d1bc75e2fb0afb4cdce6286143bee936f37febf68cc744d52dbaaafbfa7bcf7
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3105ebe8471f9890c3eb1f20cc0f2520fa5fdb0128474bbc87e607b2ec7c53dc
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db8e7bffe73040ea5cd93ece4d9c142345880217e710213ff0948759dd1246e
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 500,
3
- "best_metric": 0.0006410721107386053,
4
- "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-500",
5
- "epoch": 33.333333333333336,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -413,111 +413,6 @@
413
  "eval_samples_per_second": 8.795,
414
  "eval_steps_per_second": 1.173,
415
  "step": 405
416
- },
417
- {
418
- "epoch": 28.0,
419
- "grad_norm": 0.015260938555002213,
420
- "learning_rate": 3.3628723335213885e-06,
421
- "loss": 0.0007,
422
- "step": 420
423
- },
424
- {
425
- "epoch": 28.0,
426
- "eval_loss": 0.0006676352932117879,
427
- "eval_runtime": 3.4094,
428
- "eval_samples_per_second": 8.799,
429
- "eval_steps_per_second": 1.173,
430
- "step": 420
431
- },
432
- {
433
- "epoch": 29.0,
434
- "grad_norm": 0.01491440087556839,
435
- "learning_rate": 2.250035618801241e-06,
436
- "loss": 0.0007,
437
- "step": 435
438
- },
439
- {
440
- "epoch": 29.0,
441
- "eval_loss": 0.0006499449955299497,
442
- "eval_runtime": 3.409,
443
- "eval_samples_per_second": 8.8,
444
- "eval_steps_per_second": 1.173,
445
- "step": 435
446
- },
447
- {
448
- "epoch": 30.0,
449
- "grad_norm": 0.014446167275309563,
450
- "learning_rate": 1.351802905002386e-06,
451
- "loss": 0.0006,
452
- "step": 450
453
- },
454
- {
455
- "epoch": 30.0,
456
- "eval_loss": 0.0006467067869380116,
457
- "eval_runtime": 3.4096,
458
- "eval_samples_per_second": 8.799,
459
- "eval_steps_per_second": 1.173,
460
- "step": 450
461
- },
462
- {
463
- "epoch": 31.0,
464
- "grad_norm": 0.013305812142789364,
465
- "learning_rate": 6.766473629355452e-07,
466
- "loss": 0.0006,
467
- "step": 465
468
- },
469
- {
470
- "epoch": 31.0,
471
- "eval_loss": 0.0006454469985328615,
472
- "eval_runtime": 3.4011,
473
- "eval_samples_per_second": 8.821,
474
- "eval_steps_per_second": 1.176,
475
- "step": 465
476
- },
477
- {
478
- "epoch": 32.0,
479
- "grad_norm": 0.015049861744046211,
480
- "learning_rate": 2.3093784127863062e-07,
481
- "loss": 0.0006,
482
- "step": 480
483
- },
484
- {
485
- "epoch": 32.0,
486
- "eval_loss": 0.0006429204368032515,
487
- "eval_runtime": 3.404,
488
- "eval_samples_per_second": 8.813,
489
- "eval_steps_per_second": 1.175,
490
- "step": 480
491
- },
492
- {
493
- "epoch": 33.0,
494
- "grad_norm": 0.014723357744514942,
495
- "learning_rate": 1.8878788223009036e-08,
496
- "loss": 0.0006,
497
- "step": 495
498
- },
499
- {
500
- "epoch": 33.0,
501
- "eval_loss": 0.0006475438713096082,
502
- "eval_runtime": 3.4074,
503
- "eval_samples_per_second": 8.804,
504
- "eval_steps_per_second": 1.174,
505
- "step": 495
506
- },
507
- {
508
- "epoch": 33.333333333333336,
509
- "grad_norm": 0.014316793531179428,
510
- "learning_rate": 5.244749650301639e-10,
511
- "loss": 0.0007,
512
- "step": 500
513
- },
514
- {
515
- "epoch": 33.333333333333336,
516
- "eval_loss": 0.0006410721107386053,
517
- "eval_runtime": 3.4172,
518
- "eval_samples_per_second": 8.779,
519
- "eval_steps_per_second": 1.171,
520
- "step": 500
521
  }
522
  ],
523
  "logging_steps": 10,
@@ -532,12 +427,12 @@
532
  "should_evaluate": false,
533
  "should_log": false,
534
  "should_save": true,
535
- "should_training_stop": true
536
  },
537
  "attributes": {}
538
  }
539
  },
540
- "total_flos": 2.279189598799872e+16,
541
  "train_batch_size": 2,
542
  "trial_name": null,
543
  "trial_params": null
 
1
  {
2
+ "best_global_step": 405,
3
+ "best_metric": 0.0006800881819799542,
4
+ "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-405",
5
+ "epoch": 27.0,
6
  "eval_steps": 500,
7
+ "global_step": 405,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
413
  "eval_samples_per_second": 8.795,
414
  "eval_steps_per_second": 1.173,
415
  "step": 405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  }
417
  ],
418
  "logging_steps": 10,
 
427
  "should_evaluate": false,
428
  "should_log": false,
429
  "should_save": true,
430
+ "should_training_stop": false
431
  },
432
  "attributes": {}
433
  }
434
  },
435
+ "total_flos": 1.846419663323136e+16,
436
  "train_batch_size": 2,
437
  "trial_name": null,
438
  "trial_params": null