Instructions to use moos124/code-reasoning-0.5b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use moos124/code-reasoning-0.5b with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("moos124/code-reasoning-0.5b", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Training in progress, step 3120, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70430032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6707292d7f654e5124c3e926150bc642c498945f51878660f225650de5246c50
|
| 3 |
size 70430032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 141058579
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:086d9369ed7b9b1b0db7b13e9ce72ff9f192de08d450f900a44752b156fb06a4
|
| 3 |
size 141058579
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:671dc5d364c5724905180db7a8f088b1689fd04a21018fd65eb0b930b5fd8447
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5be2b28db77843da54a5469ae9097a28157a8cf17202b01284ef63e0481acf8e
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2858,6 +2858,286 @@
|
|
| 2858 |
"mean_token_accuracy": 0.7862283095717431,
|
| 2859 |
"num_tokens": 13204391.0,
|
| 2860 |
"step": 2840
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2861 |
}
|
| 2862 |
],
|
| 2863 |
"logging_steps": 10,
|
|
@@ -2877,7 +3157,7 @@
|
|
| 2877 |
"attributes": {}
|
| 2878 |
}
|
| 2879 |
},
|
| 2880 |
-
"total_flos": 6.
|
| 2881 |
"train_batch_size": 4,
|
| 2882 |
"trial_name": null,
|
| 2883 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.6656,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 3120,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2858 |
"mean_token_accuracy": 0.7862283095717431,
|
| 2859 |
"num_tokens": 13204391.0,
|
| 2860 |
"step": 2840
|
| 2861 |
+
},
|
| 2862 |
+
{
|
| 2863 |
+
"entropy": 0.829298897087574,
|
| 2864 |
+
"epoch": 0.608,
|
| 2865 |
+
"grad_norm": 0.2556048631668091,
|
| 2866 |
+
"learning_rate": 8.176813665984053e-05,
|
| 2867 |
+
"loss": 0.8883259773254395,
|
| 2868 |
+
"mean_token_accuracy": 0.789163002371788,
|
| 2869 |
+
"num_tokens": 13244838.0,
|
| 2870 |
+
"step": 2850
|
| 2871 |
+
},
|
| 2872 |
+
{
|
| 2873 |
+
"entropy": 0.9395963847637177,
|
| 2874 |
+
"epoch": 0.6101333333333333,
|
| 2875 |
+
"grad_norm": 0.19703006744384766,
|
| 2876 |
+
"learning_rate": 8.163429845813997e-05,
|
| 2877 |
+
"loss": 1.0494510650634765,
|
| 2878 |
+
"mean_token_accuracy": 0.7710079193115235,
|
| 2879 |
+
"num_tokens": 13290932.0,
|
| 2880 |
+
"step": 2860
|
| 2881 |
+
},
|
| 2882 |
+
{
|
| 2883 |
+
"entropy": 0.9920587949454784,
|
| 2884 |
+
"epoch": 0.6122666666666666,
|
| 2885 |
+
"grad_norm": 0.2381218671798706,
|
| 2886 |
+
"learning_rate": 8.150008123083838e-05,
|
| 2887 |
+
"loss": 1.0494998931884765,
|
| 2888 |
+
"mean_token_accuracy": 0.7526131421327591,
|
| 2889 |
+
"num_tokens": 13333787.0,
|
| 2890 |
+
"step": 2870
|
| 2891 |
+
},
|
| 2892 |
+
{
|
| 2893 |
+
"entropy": 0.9984497465193272,
|
| 2894 |
+
"epoch": 0.6144,
|
| 2895 |
+
"grad_norm": 0.25819751620292664,
|
| 2896 |
+
"learning_rate": 8.136548658605635e-05,
|
| 2897 |
+
"loss": 1.1107137680053711,
|
| 2898 |
+
"mean_token_accuracy": 0.7557663440704345,
|
| 2899 |
+
"num_tokens": 13382126.0,
|
| 2900 |
+
"step": 2880
|
| 2901 |
+
},
|
| 2902 |
+
{
|
| 2903 |
+
"entropy": 0.9907154351472854,
|
| 2904 |
+
"epoch": 0.6165333333333334,
|
| 2905 |
+
"grad_norm": 0.2328466922044754,
|
| 2906 |
+
"learning_rate": 8.123051613643641e-05,
|
| 2907 |
+
"loss": 1.1184075355529786,
|
| 2908 |
+
"mean_token_accuracy": 0.7595549002289772,
|
| 2909 |
+
"num_tokens": 13430083.0,
|
| 2910 |
+
"step": 2890
|
| 2911 |
+
},
|
| 2912 |
+
{
|
| 2913 |
+
"entropy": 0.9244011230766773,
|
| 2914 |
+
"epoch": 0.6186666666666667,
|
| 2915 |
+
"grad_norm": 0.24781359732151031,
|
| 2916 |
+
"learning_rate": 8.109517149912386e-05,
|
| 2917 |
+
"loss": 1.017502498626709,
|
| 2918 |
+
"mean_token_accuracy": 0.7722871780395508,
|
| 2919 |
+
"num_tokens": 13478876.0,
|
| 2920 |
+
"step": 2900
|
| 2921 |
+
},
|
| 2922 |
+
{
|
| 2923 |
+
"entropy": 0.8886970773339271,
|
| 2924 |
+
"epoch": 0.6208,
|
| 2925 |
+
"grad_norm": 0.2412341833114624,
|
| 2926 |
+
"learning_rate": 8.095945429574724e-05,
|
| 2927 |
+
"loss": 0.9119473457336426,
|
| 2928 |
+
"mean_token_accuracy": 0.7751852914690971,
|
| 2929 |
+
"num_tokens": 13527978.0,
|
| 2930 |
+
"step": 2910
|
| 2931 |
+
},
|
| 2932 |
+
{
|
| 2933 |
+
"entropy": 1.040999775379896,
|
| 2934 |
+
"epoch": 0.6229333333333333,
|
| 2935 |
+
"grad_norm": 0.2708323895931244,
|
| 2936 |
+
"learning_rate": 8.082336615239903e-05,
|
| 2937 |
+
"loss": 1.1017963409423828,
|
| 2938 |
+
"mean_token_accuracy": 0.7445731669664383,
|
| 2939 |
+
"num_tokens": 13579308.0,
|
| 2940 |
+
"step": 2920
|
| 2941 |
+
},
|
| 2942 |
+
{
|
| 2943 |
+
"entropy": 1.0086095616221429,
|
| 2944 |
+
"epoch": 0.6250666666666667,
|
| 2945 |
+
"grad_norm": 0.2506955564022064,
|
| 2946 |
+
"learning_rate": 8.068690869961613e-05,
|
| 2947 |
+
"loss": 1.1194355964660645,
|
| 2948 |
+
"mean_token_accuracy": 0.7530581071972847,
|
| 2949 |
+
"num_tokens": 13632480.0,
|
| 2950 |
+
"step": 2930
|
| 2951 |
+
},
|
| 2952 |
+
{
|
| 2953 |
+
"entropy": 0.9920367047190666,
|
| 2954 |
+
"epoch": 0.6272,
|
| 2955 |
+
"grad_norm": 0.28143101930618286,
|
| 2956 |
+
"learning_rate": 8.055008357236027e-05,
|
| 2957 |
+
"loss": 1.0880350112915038,
|
| 2958 |
+
"mean_token_accuracy": 0.7523079156875611,
|
| 2959 |
+
"num_tokens": 13683250.0,
|
| 2960 |
+
"step": 2940
|
| 2961 |
+
},
|
| 2962 |
+
{
|
| 2963 |
+
"entropy": 0.947841040790081,
|
| 2964 |
+
"epoch": 0.6293333333333333,
|
| 2965 |
+
"grad_norm": 0.34841635823249817,
|
| 2966 |
+
"learning_rate": 8.04128924099985e-05,
|
| 2967 |
+
"loss": 1.013569164276123,
|
| 2968 |
+
"mean_token_accuracy": 0.7690569952130317,
|
| 2969 |
+
"num_tokens": 13724761.0,
|
| 2970 |
+
"step": 2950
|
| 2971 |
+
},
|
| 2972 |
+
{
|
| 2973 |
+
"entropy": 0.8923015877604484,
|
| 2974 |
+
"epoch": 0.6314666666666666,
|
| 2975 |
+
"grad_norm": 0.24537858366966248,
|
| 2976 |
+
"learning_rate": 8.027533685628348e-05,
|
| 2977 |
+
"loss": 0.9606434822082519,
|
| 2978 |
+
"mean_token_accuracy": 0.7777309969067574,
|
| 2979 |
+
"num_tokens": 13771701.0,
|
| 2980 |
+
"step": 2960
|
| 2981 |
+
},
|
| 2982 |
+
{
|
| 2983 |
+
"entropy": 1.082998887449503,
|
| 2984 |
+
"epoch": 0.6336,
|
| 2985 |
+
"grad_norm": 0.2772109806537628,
|
| 2986 |
+
"learning_rate": 8.013741855933386e-05,
|
| 2987 |
+
"loss": 1.155489444732666,
|
| 2988 |
+
"mean_token_accuracy": 0.7356668919324875,
|
| 2989 |
+
"num_tokens": 13824969.0,
|
| 2990 |
+
"step": 2970
|
| 2991 |
+
},
|
| 2992 |
+
{
|
| 2993 |
+
"entropy": 1.0548067845404148,
|
| 2994 |
+
"epoch": 0.6357333333333334,
|
| 2995 |
+
"grad_norm": 0.2706131041049957,
|
| 2996 |
+
"learning_rate": 7.999913917161446e-05,
|
| 2997 |
+
"loss": 1.1606884002685547,
|
| 2998 |
+
"mean_token_accuracy": 0.7461161836981773,
|
| 2999 |
+
"num_tokens": 13879673.0,
|
| 3000 |
+
"step": 2980
|
| 3001 |
+
},
|
| 3002 |
+
{
|
| 3003 |
+
"entropy": 0.9122042678296566,
|
| 3004 |
+
"epoch": 0.6378666666666667,
|
| 3005 |
+
"grad_norm": 0.28579071164131165,
|
| 3006 |
+
"learning_rate": 7.986050034991646e-05,
|
| 3007 |
+
"loss": 1.0014433860778809,
|
| 3008 |
+
"mean_token_accuracy": 0.7702639386057853,
|
| 3009 |
+
"num_tokens": 13923893.0,
|
| 3010 |
+
"step": 2990
|
| 3011 |
+
},
|
| 3012 |
+
{
|
| 3013 |
+
"entropy": 0.856528140604496,
|
| 3014 |
+
"epoch": 0.64,
|
| 3015 |
+
"grad_norm": 0.2646186351776123,
|
| 3016 |
+
"learning_rate": 7.972150375533767e-05,
|
| 3017 |
+
"loss": 0.9789193153381348,
|
| 3018 |
+
"mean_token_accuracy": 0.7824795439839363,
|
| 3019 |
+
"num_tokens": 13967914.0,
|
| 3020 |
+
"step": 3000
|
| 3021 |
+
},
|
| 3022 |
+
{
|
| 3023 |
+
"entropy": 1.013469608873129,
|
| 3024 |
+
"epoch": 0.6421333333333333,
|
| 3025 |
+
"grad_norm": 0.2540909945964813,
|
| 3026 |
+
"learning_rate": 7.958215105326252e-05,
|
| 3027 |
+
"loss": 1.1425801277160645,
|
| 3028 |
+
"mean_token_accuracy": 0.7503237001597881,
|
| 3029 |
+
"num_tokens": 14016335.0,
|
| 3030 |
+
"step": 3010
|
| 3031 |
+
},
|
| 3032 |
+
{
|
| 3033 |
+
"entropy": 0.9561307951807976,
|
| 3034 |
+
"epoch": 0.6442666666666667,
|
| 3035 |
+
"grad_norm": 0.2495027333498001,
|
| 3036 |
+
"learning_rate": 7.94424439133421e-05,
|
| 3037 |
+
"loss": 1.0421770095825196,
|
| 3038 |
+
"mean_token_accuracy": 0.7604482308030128,
|
| 3039 |
+
"num_tokens": 14060745.0,
|
| 3040 |
+
"step": 3020
|
| 3041 |
+
},
|
| 3042 |
+
{
|
| 3043 |
+
"entropy": 0.9330584339797496,
|
| 3044 |
+
"epoch": 0.6464,
|
| 3045 |
+
"grad_norm": 0.26480352878570557,
|
| 3046 |
+
"learning_rate": 7.930238400947422e-05,
|
| 3047 |
+
"loss": 1.0622355461120605,
|
| 3048 |
+
"mean_token_accuracy": 0.7683120101690293,
|
| 3049 |
+
"num_tokens": 14108255.0,
|
| 3050 |
+
"step": 3030
|
| 3051 |
+
},
|
| 3052 |
+
{
|
| 3053 |
+
"entropy": 0.8226673573255538,
|
| 3054 |
+
"epoch": 0.6485333333333333,
|
| 3055 |
+
"grad_norm": 0.2883199453353882,
|
| 3056 |
+
"learning_rate": 7.916197301978331e-05,
|
| 3057 |
+
"loss": 0.8736177444458008,
|
| 3058 |
+
"mean_token_accuracy": 0.7835568472743034,
|
| 3059 |
+
"num_tokens": 14151595.0,
|
| 3060 |
+
"step": 3040
|
| 3061 |
+
},
|
| 3062 |
+
{
|
| 3063 |
+
"entropy": 1.0103112280368804,
|
| 3064 |
+
"epoch": 0.6506666666666666,
|
| 3065 |
+
"grad_norm": 0.2573588788509369,
|
| 3066 |
+
"learning_rate": 7.902121262660036e-05,
|
| 3067 |
+
"loss": 1.1782626152038573,
|
| 3068 |
+
"mean_token_accuracy": 0.7547322385013103,
|
| 3069 |
+
"num_tokens": 14198658.0,
|
| 3070 |
+
"step": 3050
|
| 3071 |
+
},
|
| 3072 |
+
{
|
| 3073 |
+
"entropy": 0.9194101721048356,
|
| 3074 |
+
"epoch": 0.6528,
|
| 3075 |
+
"grad_norm": 0.22869926691055298,
|
| 3076 |
+
"learning_rate": 7.888010451644265e-05,
|
| 3077 |
+
"loss": 0.96375732421875,
|
| 3078 |
+
"mean_token_accuracy": 0.7731851547956466,
|
| 3079 |
+
"num_tokens": 14243252.0,
|
| 3080 |
+
"step": 3060
|
| 3081 |
+
},
|
| 3082 |
+
{
|
| 3083 |
+
"entropy": 0.927897697687149,
|
| 3084 |
+
"epoch": 0.6549333333333334,
|
| 3085 |
+
"grad_norm": 0.32361456751823425,
|
| 3086 |
+
"learning_rate": 7.873865037999373e-05,
|
| 3087 |
+
"loss": 1.0542486190795899,
|
| 3088 |
+
"mean_token_accuracy": 0.7636147439479828,
|
| 3089 |
+
"num_tokens": 14290318.0,
|
| 3090 |
+
"step": 3070
|
| 3091 |
+
},
|
| 3092 |
+
{
|
| 3093 |
+
"entropy": 0.8857385322451592,
|
| 3094 |
+
"epoch": 0.6570666666666667,
|
| 3095 |
+
"grad_norm": 0.25951746106147766,
|
| 3096 |
+
"learning_rate": 7.859685191208297e-05,
|
| 3097 |
+
"loss": 0.9199460983276367,
|
| 3098 |
+
"mean_token_accuracy": 0.7751095175743103,
|
| 3099 |
+
"num_tokens": 14341937.0,
|
| 3100 |
+
"step": 3080
|
| 3101 |
+
},
|
| 3102 |
+
{
|
| 3103 |
+
"entropy": 0.9319920368492604,
|
| 3104 |
+
"epoch": 0.6592,
|
| 3105 |
+
"grad_norm": 0.22098122537136078,
|
| 3106 |
+
"learning_rate": 7.845471081166535e-05,
|
| 3107 |
+
"loss": 1.057561206817627,
|
| 3108 |
+
"mean_token_accuracy": 0.763427771627903,
|
| 3109 |
+
"num_tokens": 14388811.0,
|
| 3110 |
+
"step": 3090
|
| 3111 |
+
},
|
| 3112 |
+
{
|
| 3113 |
+
"entropy": 0.9401551052927971,
|
| 3114 |
+
"epoch": 0.6613333333333333,
|
| 3115 |
+
"grad_norm": 0.25181668996810913,
|
| 3116 |
+
"learning_rate": 7.831222878180115e-05,
|
| 3117 |
+
"loss": 1.0170879364013672,
|
| 3118 |
+
"mean_token_accuracy": 0.7671449035406113,
|
| 3119 |
+
"num_tokens": 14432608.0,
|
| 3120 |
+
"step": 3100
|
| 3121 |
+
},
|
| 3122 |
+
{
|
| 3123 |
+
"entropy": 0.9817736372351646,
|
| 3124 |
+
"epoch": 0.6634666666666666,
|
| 3125 |
+
"grad_norm": 0.25245943665504456,
|
| 3126 |
+
"learning_rate": 7.816940752963543e-05,
|
| 3127 |
+
"loss": 1.1231375694274903,
|
| 3128 |
+
"mean_token_accuracy": 0.7525465905666351,
|
| 3129 |
+
"num_tokens": 14483062.0,
|
| 3130 |
+
"step": 3110
|
| 3131 |
+
},
|
| 3132 |
+
{
|
| 3133 |
+
"entropy": 1.032941934466362,
|
| 3134 |
+
"epoch": 0.6656,
|
| 3135 |
+
"grad_norm": 0.255884051322937,
|
| 3136 |
+
"learning_rate": 7.80262487663777e-05,
|
| 3137 |
+
"loss": 1.1379814147949219,
|
| 3138 |
+
"mean_token_accuracy": 0.7467011958360672,
|
| 3139 |
+
"num_tokens": 14526227.0,
|
| 3140 |
+
"step": 3120
|
| 3141 |
}
|
| 3142 |
],
|
| 3143 |
"logging_steps": 10,
|
|
|
|
| 3157 |
"attributes": {}
|
| 3158 |
}
|
| 3159 |
},
|
| 3160 |
+
"total_flos": 6.879296464710451e+16,
|
| 3161 |
"train_batch_size": 4,
|
| 3162 |
"trial_name": null,
|
| 3163 |
"trial_params": null
|