irishprancer commited on
Commit
c993cca
·
verified ·
1 Parent(s): d3bbe2e

Training in progress, step 4200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c703da2264e055a297790ac7d288cc98953e35f33bdc95a5685265382fb890e
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:880fd56a3f8de1ea8da94daf1c2a4ae51100a00332912dd6360718788d991f3e
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3222e360dfa7b35a1163b70615c3f1dede1049c9c6a85199690290550db33722
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b0f4a093ed328add3e959161df7d638c8c11def0b0aa41283d1549f7fc07bb9
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12c3bba0118a53d6f1dc342017f2ea922f8815570aacf4ca8fc91241ca3c4a16
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8fde804448728c0f3c5740097a588b9bc938edec8f5ff4ab3791e696a0e04dd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63e5c116adbf0f8029c8a52c2390c4234cb418c18a9d2241c9b1d212add8b091
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65b74f49a3daa98af42d6e544ec1a2f4a5627b7dc9aa14dada3f91ea7451360b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 176.08695652173913,
5
  "eval_steps": 150,
6
- "global_step": 4050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3922,6 +3922,151 @@
3922
  "EMA_steps_per_second": 25.545,
3923
  "epoch": 176.08695652173913,
3924
  "step": 4050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3925
  }
3926
  ],
3927
  "logging_steps": 10,
@@ -3941,7 +4086,7 @@
3941
  "attributes": {}
3942
  }
3943
  },
3944
- "total_flos": 1.0440404596622131e+17,
3945
  "train_batch_size": 4,
3946
  "trial_name": null,
3947
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 182.6086956521739,
5
  "eval_steps": 150,
6
+ "global_step": 4200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3922
  "EMA_steps_per_second": 25.545,
3923
  "epoch": 176.08695652173913,
3924
  "step": 4050
3925
+ },
3926
+ {
3927
+ "epoch": 176.52173913043478,
3928
+ "grad_norm": 3.616029977798462,
3929
+ "learning_rate": 2.525382632276528e-06,
3930
+ "loss": 0.2307,
3931
+ "step": 4060
3932
+ },
3933
+ {
3934
+ "epoch": 176.95652173913044,
3935
+ "grad_norm": 2.267972946166992,
3936
+ "learning_rate": 2.5253568548882678e-06,
3937
+ "loss": 0.2305,
3938
+ "step": 4070
3939
+ },
3940
+ {
3941
+ "epoch": 177.3913043478261,
3942
+ "grad_norm": 2.406853437423706,
3943
+ "learning_rate": 2.5253305771487484e-06,
3944
+ "loss": 0.2039,
3945
+ "step": 4080
3946
+ },
3947
+ {
3948
+ "epoch": 177.82608695652175,
3949
+ "grad_norm": 3.070425033569336,
3950
+ "learning_rate": 2.5253037990683853e-06,
3951
+ "loss": 0.225,
3952
+ "step": 4090
3953
+ },
3954
+ {
3955
+ "epoch": 178.2608695652174,
3956
+ "grad_norm": 1.9626798629760742,
3957
+ "learning_rate": 2.525276520657793e-06,
3958
+ "loss": 0.2597,
3959
+ "step": 4100
3960
+ },
3961
+ {
3962
+ "epoch": 178.69565217391303,
3963
+ "grad_norm": 2.3126184940338135,
3964
+ "learning_rate": 2.525248741927783e-06,
3965
+ "loss": 0.2189,
3966
+ "step": 4110
3967
+ },
3968
+ {
3969
+ "epoch": 179.1304347826087,
3970
+ "grad_norm": 1.8034698963165283,
3971
+ "learning_rate": 2.525220462889366e-06,
3972
+ "loss": 0.2083,
3973
+ "step": 4120
3974
+ },
3975
+ {
3976
+ "epoch": 179.56521739130434,
3977
+ "grad_norm": 1.5343788862228394,
3978
+ "learning_rate": 2.5251916835537516e-06,
3979
+ "loss": 0.2335,
3980
+ "step": 4130
3981
+ },
3982
+ {
3983
+ "epoch": 180.0,
3984
+ "grad_norm": 2.8505375385284424,
3985
+ "learning_rate": 2.525162403932346e-06,
3986
+ "loss": 0.2243,
3987
+ "step": 4140
3988
+ },
3989
+ {
3990
+ "epoch": 180.43478260869566,
3991
+ "grad_norm": 1.8230454921722412,
3992
+ "learning_rate": 2.5251326240367557e-06,
3993
+ "loss": 0.239,
3994
+ "step": 4150
3995
+ },
3996
+ {
3997
+ "epoch": 180.8695652173913,
3998
+ "grad_norm": 2.1887192726135254,
3999
+ "learning_rate": 2.5251023438787834e-06,
4000
+ "loss": 0.2246,
4001
+ "step": 4160
4002
+ },
4003
+ {
4004
+ "epoch": 181.30434782608697,
4005
+ "grad_norm": 1.9359138011932373,
4006
+ "learning_rate": 2.5250715634704324e-06,
4007
+ "loss": 0.2131,
4008
+ "step": 4170
4009
+ },
4010
+ {
4011
+ "epoch": 181.7391304347826,
4012
+ "grad_norm": 1.9063074588775635,
4013
+ "learning_rate": 2.5250402828239028e-06,
4014
+ "loss": 0.2413,
4015
+ "step": 4180
4016
+ },
4017
+ {
4018
+ "epoch": 182.17391304347825,
4019
+ "grad_norm": 1.9552299976348877,
4020
+ "learning_rate": 2.525008501951592e-06,
4021
+ "loss": 0.223,
4022
+ "step": 4190
4023
+ },
4024
+ {
4025
+ "epoch": 182.6086956521739,
4026
+ "grad_norm": 1.7993073463439941,
4027
+ "learning_rate": 2.5249762208660985e-06,
4028
+ "loss": 0.2381,
4029
+ "step": 4200
4030
+ },
4031
+ {
4032
+ "epoch": 182.6086956521739,
4033
+ "eval_loss": 0.978575587272644,
4034
+ "eval_runtime": 0.5175,
4035
+ "eval_samples_per_second": 19.326,
4036
+ "eval_steps_per_second": 19.326,
4037
+ "step": 4200
4038
+ },
4039
+ {
4040
+ "Start_State_loss": 0.8609819412231445,
4041
+ "Start_State_runtime": 0.5378,
4042
+ "Start_State_samples_per_second": 18.593,
4043
+ "Start_State_steps_per_second": 18.593,
4044
+ "epoch": 182.6086956521739,
4045
+ "step": 4200
4046
+ },
4047
+ {
4048
+ "Raw_Model_loss": 0.978575587272644,
4049
+ "Raw_Model_runtime": 0.522,
4050
+ "Raw_Model_samples_per_second": 19.156,
4051
+ "Raw_Model_steps_per_second": 19.156,
4052
+ "epoch": 182.6086956521739,
4053
+ "step": 4200
4054
+ },
4055
+ {
4056
+ "SWA_loss": 0.8046241998672485,
4057
+ "SWA_runtime": 0.5454,
4058
+ "SWA_samples_per_second": 18.334,
4059
+ "SWA_steps_per_second": 18.334,
4060
+ "epoch": 182.6086956521739,
4061
+ "step": 4200
4062
+ },
4063
+ {
4064
+ "EMA_loss": 0.8599117398262024,
4065
+ "EMA_runtime": 0.5113,
4066
+ "EMA_samples_per_second": 19.558,
4067
+ "EMA_steps_per_second": 19.558,
4068
+ "epoch": 182.6086956521739,
4069
+ "step": 4200
4070
  }
4071
  ],
4072
  "logging_steps": 10,
 
4086
  "attributes": {}
4087
  }
4088
  },
4089
+ "total_flos": 1.0822098760143667e+17,
4090
  "train_batch_size": 4,
4091
  "trial_name": null,
4092
  "trial_params": null