3N3G commited on
Commit
5bc6431
·
verified ·
1 Parent(s): 50f167a

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e518767161c6b3fd6ef8ef726dc7bf3af87f7bb1519146c47dd7d0c283186f5b
3
  size 4969539560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67708b17aa8ae31fb85a04177716cbfbb8b7425f052006d954bae0522c6f8bee
3
  size 4969539560
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20237dae51bbcf7eaa43919889030f143f9e8a15e84b1f5cf442d4a825a7f70b
3
  size 1912795688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee66416d8bfe4367c4aeb4cf6600df9a1ca0e261e23a542692948c4fb197ec85
3
  size 1912795688
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 96.0,
6
  "eval_steps": 16,
7
- "global_step": 384,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2888,6 +2888,126 @@
2888
  "eval_samples_per_second": 17.094,
2889
  "eval_steps_per_second": 17.094,
2890
  "step": 384
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2891
  }
2892
  ],
2893
  "logging_steps": 1,
@@ -2902,12 +3022,12 @@
2902
  "should_evaluate": false,
2903
  "should_log": false,
2904
  "should_save": true,
2905
- "should_training_stop": false
2906
  },
2907
  "attributes": {}
2908
  }
2909
  },
2910
- "total_flos": 1.2878715456847872e+17,
2911
  "train_batch_size": 1,
2912
  "trial_name": null,
2913
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 100.0,
6
  "eval_steps": 16,
7
+ "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2888
  "eval_samples_per_second": 17.094,
2889
  "eval_steps_per_second": 17.094,
2890
  "step": 384
2891
+ },
2892
+ {
2893
+ "epoch": 96.2909090909091,
2894
+ "grad_norm": 3.1493375301361084,
2895
+ "learning_rate": 1.0437936906629333e-08,
2896
+ "loss": 0.5571,
2897
+ "step": 385
2898
+ },
2899
+ {
2900
+ "epoch": 96.58181818181818,
2901
+ "grad_norm": 3.1544456481933594,
2902
+ "learning_rate": 1.0384981238178533e-08,
2903
+ "loss": 0.7043,
2904
+ "step": 386
2905
+ },
2906
+ {
2907
+ "epoch": 96.87272727272727,
2908
+ "grad_norm": 3.5196638107299805,
2909
+ "learning_rate": 1.033542317614051e-08,
2910
+ "loss": 0.6956,
2911
+ "step": 387
2912
+ },
2913
+ {
2914
+ "epoch": 97.0,
2915
+ "grad_norm": 2.829664707183838,
2916
+ "learning_rate": 1.0289266494553564e-08,
2917
+ "loss": 0.5839,
2918
+ "step": 388
2919
+ },
2920
+ {
2921
+ "epoch": 97.2909090909091,
2922
+ "grad_norm": 3.240220308303833,
2923
+ "learning_rate": 1.0246514708427701e-08,
2924
+ "loss": 0.629,
2925
+ "step": 389
2926
+ },
2927
+ {
2928
+ "epoch": 97.58181818181818,
2929
+ "grad_norm": 3.419234275817871,
2930
+ "learning_rate": 1.0207171073476952e-08,
2931
+ "loss": 0.7125,
2932
+ "step": 390
2933
+ },
2934
+ {
2935
+ "epoch": 97.87272727272727,
2936
+ "grad_norm": 3.266242742538452,
2937
+ "learning_rate": 1.017123858587145e-08,
2938
+ "loss": 0.7004,
2939
+ "step": 391
2940
+ },
2941
+ {
2942
+ "epoch": 98.0,
2943
+ "grad_norm": 2.8885867595672607,
2944
+ "learning_rate": 1.0138719982009241e-08,
2945
+ "loss": 0.4986,
2946
+ "step": 392
2947
+ },
2948
+ {
2949
+ "epoch": 98.2909090909091,
2950
+ "grad_norm": 3.4574053287506104,
2951
+ "learning_rate": 1.0109617738307912e-08,
2952
+ "loss": 0.7095,
2953
+ "step": 393
2954
+ },
2955
+ {
2956
+ "epoch": 98.58181818181818,
2957
+ "grad_norm": 3.2674267292022705,
2958
+ "learning_rate": 1.0083934071015988e-08,
2959
+ "loss": 0.5806,
2960
+ "step": 394
2961
+ },
2962
+ {
2963
+ "epoch": 98.87272727272727,
2964
+ "grad_norm": 2.897749423980713,
2965
+ "learning_rate": 1.0061670936044179e-08,
2966
+ "loss": 0.6434,
2967
+ "step": 395
2968
+ },
2969
+ {
2970
+ "epoch": 99.0,
2971
+ "grad_norm": 3.9228150844573975,
2972
+ "learning_rate": 1.0042830028816398e-08,
2973
+ "loss": 0.7094,
2974
+ "step": 396
2975
+ },
2976
+ {
2977
+ "epoch": 99.2909090909091,
2978
+ "grad_norm": 2.946876287460327,
2979
+ "learning_rate": 1.002741278414069e-08,
2980
+ "loss": 0.5678,
2981
+ "step": 397
2982
+ },
2983
+ {
2984
+ "epoch": 99.58181818181818,
2985
+ "grad_norm": 2.9825222492218018,
2986
+ "learning_rate": 1.0015420376099922e-08,
2987
+ "loss": 0.6347,
2988
+ "step": 398
2989
+ },
2990
+ {
2991
+ "epoch": 99.87272727272727,
2992
+ "grad_norm": 3.46803879737854,
2993
+ "learning_rate": 1.0006853717962394e-08,
2994
+ "loss": 0.7428,
2995
+ "step": 399
2996
+ },
2997
+ {
2998
+ "epoch": 100.0,
2999
+ "grad_norm": 4.149415969848633,
3000
+ "learning_rate": 1.0001713462112291e-08,
3001
+ "loss": 0.6465,
3002
+ "step": 400
3003
+ },
3004
+ {
3005
+ "epoch": 100.0,
3006
+ "eval_loss": 0.6363555192947388,
3007
+ "eval_runtime": 0.7511,
3008
+ "eval_samples_per_second": 17.308,
3009
+ "eval_steps_per_second": 17.308,
3010
+ "step": 400
3011
  }
3012
  ],
3013
  "logging_steps": 1,
 
3022
  "should_evaluate": false,
3023
  "should_log": false,
3024
  "should_save": true,
3025
+ "should_training_stop": true
3026
  },
3027
  "attributes": {}
3028
  }
3029
  },
3030
+ "total_flos": 1.34153286008832e+17,
3031
  "train_batch_size": 1,
3032
  "trial_name": null,
3033
  "trial_params": null