Training in progress, step 380, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 838906392
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ee5fba5b390fc66cb1e3f32ce959e31a91eb2761e2b006e4624b977adaa6779
|
| 3 |
size 838906392
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 639365221
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a54a6c36243501fd43d201d1a8f3acc70b557a919ecd49d532133252e27fc165
|
| 3 |
size 639365221
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2a40a2f0b1c0334b5def0fdf5c80618a0bea05d5e20573d789bb7328ef2eae4
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d56bff5d768bcbc6b1abc460eaac5a172bfe59f1beb9abb05741a0af1e0d811
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 20,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2680,6 +2680,154 @@
|
|
| 2680 |
"eval_samples_per_second": 3.474,
|
| 2681 |
"eval_steps_per_second": 0.906,
|
| 2682 |
"step": 360
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2683 |
}
|
| 2684 |
],
|
| 2685 |
"logging_steps": 1,
|
|
@@ -2708,7 +2856,7 @@
|
|
| 2708 |
"attributes": {}
|
| 2709 |
}
|
| 2710 |
},
|
| 2711 |
-
"total_flos": 9.
|
| 2712 |
"train_batch_size": 4,
|
| 2713 |
"trial_name": null,
|
| 2714 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 380,
|
| 3 |
+
"best_metric": 0.3527662754058838,
|
| 4 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-380",
|
| 5 |
+
"epoch": 0.6734603455914931,
|
| 6 |
"eval_steps": 20,
|
| 7 |
+
"global_step": 380,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2680 |
"eval_samples_per_second": 3.474,
|
| 2681 |
"eval_steps_per_second": 0.906,
|
| 2682 |
"step": 360
|
| 2683 |
+
},
|
| 2684 |
+
{
|
| 2685 |
+
"epoch": 0.6397873283119184,
|
| 2686 |
+
"grad_norm": 0.19102731347084045,
|
| 2687 |
+
"learning_rate": 0.00017938268614966994,
|
| 2688 |
+
"loss": 0.2565,
|
| 2689 |
+
"step": 361
|
| 2690 |
+
},
|
| 2691 |
+
{
|
| 2692 |
+
"epoch": 0.6415595923792645,
|
| 2693 |
+
"grad_norm": 0.27359429001808167,
|
| 2694 |
+
"learning_rate": 0.00017926896034306332,
|
| 2695 |
+
"loss": 0.3371,
|
| 2696 |
+
"step": 362
|
| 2697 |
+
},
|
| 2698 |
+
{
|
| 2699 |
+
"epoch": 0.6433318564466105,
|
| 2700 |
+
"grad_norm": 0.1932365894317627,
|
| 2701 |
+
"learning_rate": 0.00017915495800101594,
|
| 2702 |
+
"loss": 0.2547,
|
| 2703 |
+
"step": 363
|
| 2704 |
+
},
|
| 2705 |
+
{
|
| 2706 |
+
"epoch": 0.6451041205139566,
|
| 2707 |
+
"grad_norm": 0.19281119108200073,
|
| 2708 |
+
"learning_rate": 0.00017904067952123303,
|
| 2709 |
+
"loss": 0.218,
|
| 2710 |
+
"step": 364
|
| 2711 |
+
},
|
| 2712 |
+
{
|
| 2713 |
+
"epoch": 0.6468763845813026,
|
| 2714 |
+
"grad_norm": 0.3079637289047241,
|
| 2715 |
+
"learning_rate": 0.00017892612530238334,
|
| 2716 |
+
"loss": 0.4615,
|
| 2717 |
+
"step": 365
|
| 2718 |
+
},
|
| 2719 |
+
{
|
| 2720 |
+
"epoch": 0.6486486486486487,
|
| 2721 |
+
"grad_norm": 0.23658387362957,
|
| 2722 |
+
"learning_rate": 0.0001788112957440974,
|
| 2723 |
+
"loss": 0.4055,
|
| 2724 |
+
"step": 366
|
| 2725 |
+
},
|
| 2726 |
+
{
|
| 2727 |
+
"epoch": 0.6504209127159947,
|
| 2728 |
+
"grad_norm": 0.27093634009361267,
|
| 2729 |
+
"learning_rate": 0.00017869619124696634,
|
| 2730 |
+
"loss": 0.409,
|
| 2731 |
+
"step": 367
|
| 2732 |
+
},
|
| 2733 |
+
{
|
| 2734 |
+
"epoch": 0.6521931767833408,
|
| 2735 |
+
"grad_norm": 0.23031426966190338,
|
| 2736 |
+
"learning_rate": 0.00017858081221254048,
|
| 2737 |
+
"loss": 0.3424,
|
| 2738 |
+
"step": 368
|
| 2739 |
+
},
|
| 2740 |
+
{
|
| 2741 |
+
"epoch": 0.6539654408506868,
|
| 2742 |
+
"grad_norm": 0.22337500751018524,
|
| 2743 |
+
"learning_rate": 0.00017846515904332782,
|
| 2744 |
+
"loss": 0.3182,
|
| 2745 |
+
"step": 369
|
| 2746 |
+
},
|
| 2747 |
+
{
|
| 2748 |
+
"epoch": 0.6557377049180327,
|
| 2749 |
+
"grad_norm": 0.2885172963142395,
|
| 2750 |
+
"learning_rate": 0.00017834923214279268,
|
| 2751 |
+
"loss": 0.3405,
|
| 2752 |
+
"step": 370
|
| 2753 |
+
},
|
| 2754 |
+
{
|
| 2755 |
+
"epoch": 0.6575099689853788,
|
| 2756 |
+
"grad_norm": 0.26560068130493164,
|
| 2757 |
+
"learning_rate": 0.00017823303191535442,
|
| 2758 |
+
"loss": 0.3328,
|
| 2759 |
+
"step": 371
|
| 2760 |
+
},
|
| 2761 |
+
{
|
| 2762 |
+
"epoch": 0.6592822330527248,
|
| 2763 |
+
"grad_norm": 0.18252065777778625,
|
| 2764 |
+
"learning_rate": 0.00017811655876638578,
|
| 2765 |
+
"loss": 0.2438,
|
| 2766 |
+
"step": 372
|
| 2767 |
+
},
|
| 2768 |
+
{
|
| 2769 |
+
"epoch": 0.6610544971200709,
|
| 2770 |
+
"grad_norm": 0.19787971675395966,
|
| 2771 |
+
"learning_rate": 0.00017799981310221173,
|
| 2772 |
+
"loss": 0.2515,
|
| 2773 |
+
"step": 373
|
| 2774 |
+
},
|
| 2775 |
+
{
|
| 2776 |
+
"epoch": 0.6628267611874169,
|
| 2777 |
+
"grad_norm": 0.2631565034389496,
|
| 2778 |
+
"learning_rate": 0.00017788279533010785,
|
| 2779 |
+
"loss": 0.3755,
|
| 2780 |
+
"step": 374
|
| 2781 |
+
},
|
| 2782 |
+
{
|
| 2783 |
+
"epoch": 0.664599025254763,
|
| 2784 |
+
"grad_norm": 0.24500946700572968,
|
| 2785 |
+
"learning_rate": 0.00017776550585829896,
|
| 2786 |
+
"loss": 0.3366,
|
| 2787 |
+
"step": 375
|
| 2788 |
+
},
|
| 2789 |
+
{
|
| 2790 |
+
"epoch": 0.666371289322109,
|
| 2791 |
+
"grad_norm": 0.3467278778553009,
|
| 2792 |
+
"learning_rate": 0.00017764794509595786,
|
| 2793 |
+
"loss": 0.5403,
|
| 2794 |
+
"step": 376
|
| 2795 |
+
},
|
| 2796 |
+
{
|
| 2797 |
+
"epoch": 0.6681435533894551,
|
| 2798 |
+
"grad_norm": 0.2552179992198944,
|
| 2799 |
+
"learning_rate": 0.00017753011345320366,
|
| 2800 |
+
"loss": 0.3533,
|
| 2801 |
+
"step": 377
|
| 2802 |
+
},
|
| 2803 |
+
{
|
| 2804 |
+
"epoch": 0.6699158174568011,
|
| 2805 |
+
"grad_norm": 0.3037780225276947,
|
| 2806 |
+
"learning_rate": 0.00017741201134110042,
|
| 2807 |
+
"loss": 0.5212,
|
| 2808 |
+
"step": 378
|
| 2809 |
+
},
|
| 2810 |
+
{
|
| 2811 |
+
"epoch": 0.671688081524147,
|
| 2812 |
+
"grad_norm": 0.29784807562828064,
|
| 2813 |
+
"learning_rate": 0.0001772936391716559,
|
| 2814 |
+
"loss": 0.4664,
|
| 2815 |
+
"step": 379
|
| 2816 |
+
},
|
| 2817 |
+
{
|
| 2818 |
+
"epoch": 0.6734603455914931,
|
| 2819 |
+
"grad_norm": 0.22430609166622162,
|
| 2820 |
+
"learning_rate": 0.00017717499735781983,
|
| 2821 |
+
"loss": 0.2937,
|
| 2822 |
+
"step": 380
|
| 2823 |
+
},
|
| 2824 |
+
{
|
| 2825 |
+
"epoch": 0.6734603455914931,
|
| 2826 |
+
"eval_loss": 0.3527662754058838,
|
| 2827 |
+
"eval_runtime": 13.1985,
|
| 2828 |
+
"eval_samples_per_second": 3.485,
|
| 2829 |
+
"eval_steps_per_second": 0.909,
|
| 2830 |
+
"step": 380
|
| 2831 |
}
|
| 2832 |
],
|
| 2833 |
"logging_steps": 1,
|
|
|
|
| 2856 |
"attributes": {}
|
| 2857 |
}
|
| 2858 |
},
|
| 2859 |
+
"total_flos": 9.674563755442176e+17,
|
| 2860 |
"train_batch_size": 4,
|
| 2861 |
"trial_name": null,
|
| 2862 |
"trial_params": null
|