Training in progress, step 40000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3555504
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33f0f667a5193eb4d35b243c5c1df790f53411abe8add627f8801dc7a6e453fb
|
| 3 |
size 3555504
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7141515
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:338d656c8c08afe1535666116a30ec6c8bc16d2218bd3572b0ad6095b0a6fc86
|
| 3 |
size 7141515
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f75ef4c1eb025b45e99753eb3086841a7a2849ad71d5bd6afa88ad76ffcffe8a
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00b9e9e803d002d2b860a18a7caf54803f3c31024a6925df1ba4a7df5d623e98
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90b69642bdda2390c9b2aaa786137796ae481637fe31199c160a1cb107e6720e
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 36000,
|
| 3 |
"best_metric": 0.9893807849919393,
|
| 4 |
"best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-36000",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 4000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2636,6 +2636,298 @@
|
|
| 2636 |
"eval_samples_per_second": 128.405,
|
| 2637 |
"eval_steps_per_second": 8.025,
|
| 2638 |
"step": 36000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2639 |
}
|
| 2640 |
],
|
| 2641 |
"logging_steps": 100,
|
|
@@ -2650,7 +2942,7 @@
|
|
| 2650 |
"early_stopping_threshold": 0.0
|
| 2651 |
},
|
| 2652 |
"attributes": {
|
| 2653 |
-
"early_stopping_patience_counter":
|
| 2654 |
}
|
| 2655 |
},
|
| 2656 |
"TrainerControl": {
|
|
@@ -2664,7 +2956,7 @@
|
|
| 2664 |
"attributes": {}
|
| 2665 |
}
|
| 2666 |
},
|
| 2667 |
-
"total_flos": 1.
|
| 2668 |
"train_batch_size": 16,
|
| 2669 |
"trial_name": null,
|
| 2670 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 36000,
|
| 3 |
"best_metric": 0.9893807849919393,
|
| 4 |
"best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-36000",
|
| 5 |
+
"epoch": 1.28,
|
| 6 |
"eval_steps": 4000,
|
| 7 |
+
"global_step": 40000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2636 |
"eval_samples_per_second": 128.405,
|
| 2637 |
"eval_steps_per_second": 8.025,
|
| 2638 |
"step": 36000
|
| 2639 |
+
},
|
| 2640 |
+
{
|
| 2641 |
+
"epoch": 1.1552,
|
| 2642 |
+
"grad_norm": 7.563354969024658,
|
| 2643 |
+
"learning_rate": 1.542869983948636e-05,
|
| 2644 |
+
"loss": 0.0446,
|
| 2645 |
+
"step": 36100
|
| 2646 |
+
},
|
| 2647 |
+
{
|
| 2648 |
+
"epoch": 1.1584,
|
| 2649 |
+
"grad_norm": 0.017443129792809486,
|
| 2650 |
+
"learning_rate": 1.541585874799358e-05,
|
| 2651 |
+
"loss": 0.0472,
|
| 2652 |
+
"step": 36200
|
| 2653 |
+
},
|
| 2654 |
+
{
|
| 2655 |
+
"epoch": 1.1616,
|
| 2656 |
+
"grad_norm": 0.007690173573791981,
|
| 2657 |
+
"learning_rate": 1.5403017656500805e-05,
|
| 2658 |
+
"loss": 0.0368,
|
| 2659 |
+
"step": 36300
|
| 2660 |
+
},
|
| 2661 |
+
{
|
| 2662 |
+
"epoch": 1.1648,
|
| 2663 |
+
"grad_norm": 0.024819310754537582,
|
| 2664 |
+
"learning_rate": 1.5390176565008027e-05,
|
| 2665 |
+
"loss": 0.0466,
|
| 2666 |
+
"step": 36400
|
| 2667 |
+
},
|
| 2668 |
+
{
|
| 2669 |
+
"epoch": 1.168,
|
| 2670 |
+
"grad_norm": 1.0305715799331665,
|
| 2671 |
+
"learning_rate": 1.5377335473515248e-05,
|
| 2672 |
+
"loss": 0.0396,
|
| 2673 |
+
"step": 36500
|
| 2674 |
+
},
|
| 2675 |
+
{
|
| 2676 |
+
"epoch": 1.1712,
|
| 2677 |
+
"grad_norm": 13.792978286743164,
|
| 2678 |
+
"learning_rate": 1.5364494382022473e-05,
|
| 2679 |
+
"loss": 0.0382,
|
| 2680 |
+
"step": 36600
|
| 2681 |
+
},
|
| 2682 |
+
{
|
| 2683 |
+
"epoch": 1.1743999999999999,
|
| 2684 |
+
"grad_norm": 4.012645721435547,
|
| 2685 |
+
"learning_rate": 1.5351653290529694e-05,
|
| 2686 |
+
"loss": 0.0519,
|
| 2687 |
+
"step": 36700
|
| 2688 |
+
},
|
| 2689 |
+
{
|
| 2690 |
+
"epoch": 1.1776,
|
| 2691 |
+
"grad_norm": 0.015255521982908249,
|
| 2692 |
+
"learning_rate": 1.533881219903692e-05,
|
| 2693 |
+
"loss": 0.0331,
|
| 2694 |
+
"step": 36800
|
| 2695 |
+
},
|
| 2696 |
+
{
|
| 2697 |
+
"epoch": 1.1808,
|
| 2698 |
+
"grad_norm": 0.03518729284405708,
|
| 2699 |
+
"learning_rate": 1.532597110754414e-05,
|
| 2700 |
+
"loss": 0.0442,
|
| 2701 |
+
"step": 36900
|
| 2702 |
+
},
|
| 2703 |
+
{
|
| 2704 |
+
"epoch": 1.184,
|
| 2705 |
+
"grad_norm": 0.12012261152267456,
|
| 2706 |
+
"learning_rate": 1.5313130016051365e-05,
|
| 2707 |
+
"loss": 0.0392,
|
| 2708 |
+
"step": 37000
|
| 2709 |
+
},
|
| 2710 |
+
{
|
| 2711 |
+
"epoch": 1.1872,
|
| 2712 |
+
"grad_norm": 0.03485884144902229,
|
| 2713 |
+
"learning_rate": 1.5300288924558587e-05,
|
| 2714 |
+
"loss": 0.0214,
|
| 2715 |
+
"step": 37100
|
| 2716 |
+
},
|
| 2717 |
+
{
|
| 2718 |
+
"epoch": 1.1904,
|
| 2719 |
+
"grad_norm": 10.218805313110352,
|
| 2720 |
+
"learning_rate": 1.528744783306581e-05,
|
| 2721 |
+
"loss": 0.0528,
|
| 2722 |
+
"step": 37200
|
| 2723 |
+
},
|
| 2724 |
+
{
|
| 2725 |
+
"epoch": 1.1936,
|
| 2726 |
+
"grad_norm": 0.0384359173476696,
|
| 2727 |
+
"learning_rate": 1.5274606741573036e-05,
|
| 2728 |
+
"loss": 0.0625,
|
| 2729 |
+
"step": 37300
|
| 2730 |
+
},
|
| 2731 |
+
{
|
| 2732 |
+
"epoch": 1.1968,
|
| 2733 |
+
"grad_norm": 0.3779418170452118,
|
| 2734 |
+
"learning_rate": 1.5261765650080258e-05,
|
| 2735 |
+
"loss": 0.0328,
|
| 2736 |
+
"step": 37400
|
| 2737 |
+
},
|
| 2738 |
+
{
|
| 2739 |
+
"epoch": 1.2,
|
| 2740 |
+
"grad_norm": 5.618625640869141,
|
| 2741 |
+
"learning_rate": 1.5248924558587481e-05,
|
| 2742 |
+
"loss": 0.065,
|
| 2743 |
+
"step": 37500
|
| 2744 |
+
},
|
| 2745 |
+
{
|
| 2746 |
+
"epoch": 1.2032,
|
| 2747 |
+
"grad_norm": 2.0705819129943848,
|
| 2748 |
+
"learning_rate": 1.5236083467094704e-05,
|
| 2749 |
+
"loss": 0.0594,
|
| 2750 |
+
"step": 37600
|
| 2751 |
+
},
|
| 2752 |
+
{
|
| 2753 |
+
"epoch": 1.2064,
|
| 2754 |
+
"grad_norm": 0.14193743467330933,
|
| 2755 |
+
"learning_rate": 1.5223242375601927e-05,
|
| 2756 |
+
"loss": 0.0417,
|
| 2757 |
+
"step": 37700
|
| 2758 |
+
},
|
| 2759 |
+
{
|
| 2760 |
+
"epoch": 1.2096,
|
| 2761 |
+
"grad_norm": 0.034703925251960754,
|
| 2762 |
+
"learning_rate": 1.521040128410915e-05,
|
| 2763 |
+
"loss": 0.0389,
|
| 2764 |
+
"step": 37800
|
| 2765 |
+
},
|
| 2766 |
+
{
|
| 2767 |
+
"epoch": 1.2128,
|
| 2768 |
+
"grad_norm": 0.011800256557762623,
|
| 2769 |
+
"learning_rate": 1.5197560192616374e-05,
|
| 2770 |
+
"loss": 0.0393,
|
| 2771 |
+
"step": 37900
|
| 2772 |
+
},
|
| 2773 |
+
{
|
| 2774 |
+
"epoch": 1.216,
|
| 2775 |
+
"grad_norm": 0.28774189949035645,
|
| 2776 |
+
"learning_rate": 1.5184719101123597e-05,
|
| 2777 |
+
"loss": 0.0504,
|
| 2778 |
+
"step": 38000
|
| 2779 |
+
},
|
| 2780 |
+
{
|
| 2781 |
+
"epoch": 1.2192,
|
| 2782 |
+
"grad_norm": 0.035763729363679886,
|
| 2783 |
+
"learning_rate": 1.517187800963082e-05,
|
| 2784 |
+
"loss": 0.0534,
|
| 2785 |
+
"step": 38100
|
| 2786 |
+
},
|
| 2787 |
+
{
|
| 2788 |
+
"epoch": 1.2224,
|
| 2789 |
+
"grad_norm": 0.158742755651474,
|
| 2790 |
+
"learning_rate": 1.5159036918138043e-05,
|
| 2791 |
+
"loss": 0.0436,
|
| 2792 |
+
"step": 38200
|
| 2793 |
+
},
|
| 2794 |
+
{
|
| 2795 |
+
"epoch": 1.2256,
|
| 2796 |
+
"grad_norm": 9.794978141784668,
|
| 2797 |
+
"learning_rate": 1.5146195826645266e-05,
|
| 2798 |
+
"loss": 0.0502,
|
| 2799 |
+
"step": 38300
|
| 2800 |
+
},
|
| 2801 |
+
{
|
| 2802 |
+
"epoch": 1.2288000000000001,
|
| 2803 |
+
"grad_norm": 0.021368766203522682,
|
| 2804 |
+
"learning_rate": 1.513335473515249e-05,
|
| 2805 |
+
"loss": 0.0286,
|
| 2806 |
+
"step": 38400
|
| 2807 |
+
},
|
| 2808 |
+
{
|
| 2809 |
+
"epoch": 1.232,
|
| 2810 |
+
"grad_norm": 0.5888408422470093,
|
| 2811 |
+
"learning_rate": 1.5120513643659714e-05,
|
| 2812 |
+
"loss": 0.0674,
|
| 2813 |
+
"step": 38500
|
| 2814 |
+
},
|
| 2815 |
+
{
|
| 2816 |
+
"epoch": 1.2352,
|
| 2817 |
+
"grad_norm": 0.005416017957031727,
|
| 2818 |
+
"learning_rate": 1.5107672552166937e-05,
|
| 2819 |
+
"loss": 0.0381,
|
| 2820 |
+
"step": 38600
|
| 2821 |
+
},
|
| 2822 |
+
{
|
| 2823 |
+
"epoch": 1.2384,
|
| 2824 |
+
"grad_norm": 0.03922798112034798,
|
| 2825 |
+
"learning_rate": 1.5094831460674157e-05,
|
| 2826 |
+
"loss": 0.0747,
|
| 2827 |
+
"step": 38700
|
| 2828 |
+
},
|
| 2829 |
+
{
|
| 2830 |
+
"epoch": 1.2416,
|
| 2831 |
+
"grad_norm": 0.030901480466127396,
|
| 2832 |
+
"learning_rate": 1.508199036918138e-05,
|
| 2833 |
+
"loss": 0.0491,
|
| 2834 |
+
"step": 38800
|
| 2835 |
+
},
|
| 2836 |
+
{
|
| 2837 |
+
"epoch": 1.2448,
|
| 2838 |
+
"grad_norm": 0.02417912147939205,
|
| 2839 |
+
"learning_rate": 1.5069149277688603e-05,
|
| 2840 |
+
"loss": 0.0465,
|
| 2841 |
+
"step": 38900
|
| 2842 |
+
},
|
| 2843 |
+
{
|
| 2844 |
+
"epoch": 1.248,
|
| 2845 |
+
"grad_norm": 15.668951988220215,
|
| 2846 |
+
"learning_rate": 1.5056308186195826e-05,
|
| 2847 |
+
"loss": 0.0535,
|
| 2848 |
+
"step": 39000
|
| 2849 |
+
},
|
| 2850 |
+
{
|
| 2851 |
+
"epoch": 1.2511999999999999,
|
| 2852 |
+
"grad_norm": 0.21102702617645264,
|
| 2853 |
+
"learning_rate": 1.504346709470305e-05,
|
| 2854 |
+
"loss": 0.0517,
|
| 2855 |
+
"step": 39100
|
| 2856 |
+
},
|
| 2857 |
+
{
|
| 2858 |
+
"epoch": 1.2544,
|
| 2859 |
+
"grad_norm": 0.06641241163015366,
|
| 2860 |
+
"learning_rate": 1.5030626003210274e-05,
|
| 2861 |
+
"loss": 0.0416,
|
| 2862 |
+
"step": 39200
|
| 2863 |
+
},
|
| 2864 |
+
{
|
| 2865 |
+
"epoch": 1.2576,
|
| 2866 |
+
"grad_norm": 0.5495890974998474,
|
| 2867 |
+
"learning_rate": 1.5017784911717497e-05,
|
| 2868 |
+
"loss": 0.0357,
|
| 2869 |
+
"step": 39300
|
| 2870 |
+
},
|
| 2871 |
+
{
|
| 2872 |
+
"epoch": 1.2608,
|
| 2873 |
+
"grad_norm": 0.035381533205509186,
|
| 2874 |
+
"learning_rate": 1.500494382022472e-05,
|
| 2875 |
+
"loss": 0.0577,
|
| 2876 |
+
"step": 39400
|
| 2877 |
+
},
|
| 2878 |
+
{
|
| 2879 |
+
"epoch": 1.264,
|
| 2880 |
+
"grad_norm": 0.03879441320896149,
|
| 2881 |
+
"learning_rate": 1.4992102728731944e-05,
|
| 2882 |
+
"loss": 0.0191,
|
| 2883 |
+
"step": 39500
|
| 2884 |
+
},
|
| 2885 |
+
{
|
| 2886 |
+
"epoch": 1.2671999999999999,
|
| 2887 |
+
"grad_norm": 0.014720222912728786,
|
| 2888 |
+
"learning_rate": 1.4979261637239167e-05,
|
| 2889 |
+
"loss": 0.0423,
|
| 2890 |
+
"step": 39600
|
| 2891 |
+
},
|
| 2892 |
+
{
|
| 2893 |
+
"epoch": 1.2704,
|
| 2894 |
+
"grad_norm": 3.2292592525482178,
|
| 2895 |
+
"learning_rate": 1.496642054574639e-05,
|
| 2896 |
+
"loss": 0.0602,
|
| 2897 |
+
"step": 39700
|
| 2898 |
+
},
|
| 2899 |
+
{
|
| 2900 |
+
"epoch": 1.2736,
|
| 2901 |
+
"grad_norm": 1.6030577421188354,
|
| 2902 |
+
"learning_rate": 1.4953579454253613e-05,
|
| 2903 |
+
"loss": 0.0543,
|
| 2904 |
+
"step": 39800
|
| 2905 |
+
},
|
| 2906 |
+
{
|
| 2907 |
+
"epoch": 1.2768,
|
| 2908 |
+
"grad_norm": 0.031688716262578964,
|
| 2909 |
+
"learning_rate": 1.4940738362760836e-05,
|
| 2910 |
+
"loss": 0.0341,
|
| 2911 |
+
"step": 39900
|
| 2912 |
+
},
|
| 2913 |
+
{
|
| 2914 |
+
"epoch": 1.28,
|
| 2915 |
+
"grad_norm": 9.190576553344727,
|
| 2916 |
+
"learning_rate": 1.492789727126806e-05,
|
| 2917 |
+
"loss": 0.0381,
|
| 2918 |
+
"step": 40000
|
| 2919 |
+
},
|
| 2920 |
+
{
|
| 2921 |
+
"epoch": 1.28,
|
| 2922 |
+
"eval_accuracy": 0.98796,
|
| 2923 |
+
"eval_f1": 0.9879625821520611,
|
| 2924 |
+
"eval_loss": 0.04839452728629112,
|
| 2925 |
+
"eval_precision": 0.9880194851769686,
|
| 2926 |
+
"eval_recall": 0.98796,
|
| 2927 |
+
"eval_runtime": 777.527,
|
| 2928 |
+
"eval_samples_per_second": 128.613,
|
| 2929 |
+
"eval_steps_per_second": 8.038,
|
| 2930 |
+
"step": 40000
|
| 2931 |
}
|
| 2932 |
],
|
| 2933 |
"logging_steps": 100,
|
|
|
|
| 2942 |
"early_stopping_threshold": 0.0
|
| 2943 |
},
|
| 2944 |
"attributes": {
|
| 2945 |
+
"early_stopping_patience_counter": 1
|
| 2946 |
}
|
| 2947 |
},
|
| 2948 |
"TrainerControl": {
|
|
|
|
| 2956 |
"attributes": {}
|
| 2957 |
}
|
| 2958 |
},
|
| 2959 |
+
"total_flos": 1.698520718775022e+17,
|
| 2960 |
"train_batch_size": 16,
|
| 2961 |
"trial_name": null,
|
| 2962 |
"trial_params": null
|