sedrickkeh commited on
Commit
3cf737f
·
verified ·
1 Parent(s): 3830f84

Training in progress, epoch 2

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6d425a1a52946d8537dccfd7d3c79538f086e65ddeb956035954eb215e26291
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7dbb8add0484033dd1b505da3909700b7cff58581098bec6285f34305f89ac9
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6456894bd7feb1e3a947da189faf7e58401cd7d998c9fcfaab69a2f20f6f730
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc827cb767a276c5050e2522f59f4d7bb37e9bc5324347f3db3e077d33ac1625
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f78c240a7396b348534f3895f56d66ae7ab7cd60b8afcb46581bfeea11e0551d
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad05c8cbefab896c7c49fe6ca8b5a30980e3906085bf6086591f7ff197677860
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed4a298c2aa9c52fa3155f4e6e5fa49a96afe9bdde0864e1f5ce258d821b321d
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f44b8c028a726801e6964ce48026cbcb0bc5897784f765572cd353b9d4357df7
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -67,3 +67,64 @@
67
  {"current_steps": 67, "total_steps": 186, "loss": 0.7176, "lr": 8.096392994005177e-06, "epoch": 1.072, "percentage": 36.02, "elapsed_time": "0:57:29", "remaining_time": "1:42:06"}
68
  {"current_steps": 68, "total_steps": 186, "loss": 0.7277, "lr": 8.021996528115335e-06, "epoch": 1.088, "percentage": 36.56, "elapsed_time": "0:58:18", "remaining_time": "1:41:10"}
69
  {"current_steps": 69, "total_steps": 186, "loss": 0.7373, "lr": 7.946530643076138e-06, "epoch": 1.104, "percentage": 37.1, "elapsed_time": "0:59:03", "remaining_time": "1:40:08"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  {"current_steps": 67, "total_steps": 186, "loss": 0.7176, "lr": 8.096392994005177e-06, "epoch": 1.072, "percentage": 36.02, "elapsed_time": "0:57:29", "remaining_time": "1:42:06"}
68
  {"current_steps": 68, "total_steps": 186, "loss": 0.7277, "lr": 8.021996528115335e-06, "epoch": 1.088, "percentage": 36.56, "elapsed_time": "0:58:18", "remaining_time": "1:41:10"}
69
  {"current_steps": 69, "total_steps": 186, "loss": 0.7373, "lr": 7.946530643076138e-06, "epoch": 1.104, "percentage": 37.1, "elapsed_time": "0:59:03", "remaining_time": "1:40:08"}
70
+ {"current_steps": 70, "total_steps": 186, "loss": 0.7057, "lr": 7.870022044630569e-06, "epoch": 1.12, "percentage": 37.63, "elapsed_time": "0:59:56", "remaining_time": "1:39:20"}
71
+ {"current_steps": 71, "total_steps": 186, "loss": 0.7334, "lr": 7.792497807515317e-06, "epoch": 1.1360000000000001, "percentage": 38.17, "elapsed_time": "1:00:44", "remaining_time": "1:38:23"}
72
+ {"current_steps": 72, "total_steps": 186, "loss": 0.7679, "lr": 7.713985365879607e-06, "epoch": 1.152, "percentage": 38.71, "elapsed_time": "1:01:34", "remaining_time": "1:37:29"}
73
+ {"current_steps": 73, "total_steps": 186, "loss": 0.7183, "lr": 7.63451250357685e-06, "epoch": 1.168, "percentage": 39.25, "elapsed_time": "1:02:17", "remaining_time": "1:36:25"}
74
+ {"current_steps": 74, "total_steps": 186, "loss": 0.7163, "lr": 7.55410734433254e-06, "epoch": 1.184, "percentage": 39.78, "elapsed_time": "1:03:13", "remaining_time": "1:35:41"}
75
+ {"current_steps": 75, "total_steps": 186, "loss": 0.8224, "lr": 7.472798341791877e-06, "epoch": 1.2, "percentage": 40.32, "elapsed_time": "1:04:06", "remaining_time": "1:34:53"}
76
+ {"current_steps": 76, "total_steps": 186, "loss": 0.748, "lr": 7.390614269450633e-06, "epoch": 1.216, "percentage": 40.86, "elapsed_time": "1:04:59", "remaining_time": "1:34:04"}
77
+ {"current_steps": 77, "total_steps": 186, "loss": 0.798, "lr": 7.3075842104728445e-06, "epoch": 1.232, "percentage": 41.4, "elapsed_time": "1:05:58", "remaining_time": "1:33:24"}
78
+ {"current_steps": 78, "total_steps": 186, "loss": 0.7786, "lr": 7.223737547398898e-06, "epoch": 1.248, "percentage": 41.94, "elapsed_time": "1:06:55", "remaining_time": "1:32:39"}
79
+ {"current_steps": 79, "total_steps": 186, "loss": 0.7259, "lr": 7.139103951747694e-06, "epoch": 1.264, "percentage": 42.47, "elapsed_time": "1:07:51", "remaining_time": "1:31:54"}
80
+ {"current_steps": 80, "total_steps": 186, "loss": 0.7555, "lr": 7.053713373516538e-06, "epoch": 1.28, "percentage": 43.01, "elapsed_time": "1:08:42", "remaining_time": "1:31:01"}
81
+ {"current_steps": 81, "total_steps": 186, "loss": 0.7324, "lr": 6.9675960305824785e-06, "epoch": 1.296, "percentage": 43.55, "elapsed_time": "1:09:31", "remaining_time": "1:30:06"}
82
+ {"current_steps": 82, "total_steps": 186, "loss": 0.6775, "lr": 6.880782398008862e-06, "epoch": 1.312, "percentage": 44.09, "elapsed_time": "1:10:25", "remaining_time": "1:29:19"}
83
+ {"current_steps": 83, "total_steps": 186, "loss": 0.8298, "lr": 6.7933031972608644e-06, "epoch": 1.328, "percentage": 44.62, "elapsed_time": "1:11:25", "remaining_time": "1:28:38"}
84
+ {"current_steps": 84, "total_steps": 186, "loss": 0.6214, "lr": 6.70518938533383e-06, "epoch": 1.3439999999999999, "percentage": 45.16, "elapsed_time": "1:12:06", "remaining_time": "1:27:33"}
85
+ {"current_steps": 85, "total_steps": 186, "loss": 0.8824, "lr": 6.61647214379826e-06, "epoch": 1.3599999999999999, "percentage": 45.7, "elapsed_time": "1:13:10", "remaining_time": "1:26:56"}
86
+ {"current_steps": 86, "total_steps": 186, "loss": 0.7861, "lr": 6.527182867765333e-06, "epoch": 1.376, "percentage": 46.24, "elapsed_time": "1:14:02", "remaining_time": "1:26:05"}
87
+ {"current_steps": 87, "total_steps": 186, "loss": 0.5916, "lr": 6.437353154776848e-06, "epoch": 1.392, "percentage": 46.77, "elapsed_time": "1:14:49", "remaining_time": "1:25:08"}
88
+ {"current_steps": 88, "total_steps": 186, "loss": 0.7394, "lr": 6.3470147936235485e-06, "epoch": 1.408, "percentage": 47.31, "elapsed_time": "1:15:35", "remaining_time": "1:24:10"}
89
+ {"current_steps": 89, "total_steps": 186, "loss": 0.7603, "lr": 6.256199753095745e-06, "epoch": 1.424, "percentage": 47.85, "elapsed_time": "1:16:24", "remaining_time": "1:23:16"}
90
+ {"current_steps": 90, "total_steps": 186, "loss": 0.783, "lr": 6.164940170670266e-06, "epoch": 1.44, "percentage": 48.39, "elapsed_time": "1:17:18", "remaining_time": "1:22:27"}
91
+ {"current_steps": 91, "total_steps": 186, "loss": 0.6773, "lr": 6.073268341137694e-06, "epoch": 1.456, "percentage": 48.92, "elapsed_time": "1:18:08", "remaining_time": "1:21:34"}
92
+ {"current_steps": 92, "total_steps": 186, "loss": 0.6969, "lr": 5.98121670517393e-06, "epoch": 1.472, "percentage": 49.46, "elapsed_time": "1:18:54", "remaining_time": "1:20:37"}
93
+ {"current_steps": 93, "total_steps": 186, "loss": 0.7354, "lr": 5.8888178378601565e-06, "epoch": 1.488, "percentage": 50.0, "elapsed_time": "1:19:52", "remaining_time": "1:19:52"}
94
+ {"current_steps": 94, "total_steps": 186, "loss": 0.6885, "lr": 5.796104437155213e-06, "epoch": 1.504, "percentage": 50.54, "elapsed_time": "1:20:49", "remaining_time": "1:19:06"}
95
+ {"current_steps": 95, "total_steps": 186, "loss": 0.791, "lr": 5.703109312324493e-06, "epoch": 1.52, "percentage": 51.08, "elapsed_time": "1:21:49", "remaining_time": "1:18:22"}
96
+ {"current_steps": 96, "total_steps": 186, "loss": 0.8243, "lr": 5.609865372329461e-06, "epoch": 1.536, "percentage": 51.61, "elapsed_time": "1:22:39", "remaining_time": "1:17:29"}
97
+ {"current_steps": 97, "total_steps": 186, "loss": 0.7718, "lr": 5.516405614181883e-06, "epoch": 1.552, "percentage": 52.15, "elapsed_time": "1:23:26", "remaining_time": "1:16:33"}
98
+ {"current_steps": 98, "total_steps": 186, "loss": 0.6252, "lr": 5.4227631112668955e-06, "epoch": 1.568, "percentage": 52.69, "elapsed_time": "1:24:08", "remaining_time": "1:15:33"}
99
+ {"current_steps": 99, "total_steps": 186, "loss": 0.7551, "lr": 5.328971001639054e-06, "epoch": 1.584, "percentage": 53.23, "elapsed_time": "1:25:01", "remaining_time": "1:14:42"}
100
+ {"current_steps": 100, "total_steps": 186, "loss": 0.8162, "lr": 5.235062476295488e-06, "epoch": 1.6, "percentage": 53.76, "elapsed_time": "1:25:56", "remaining_time": "1:13:54"}
101
+ {"current_steps": 101, "total_steps": 186, "loss": 0.6622, "lr": 5.141070767430331e-06, "epoch": 1.616, "percentage": 54.3, "elapsed_time": "1:26:49", "remaining_time": "1:13:03"}
102
+ {"current_steps": 102, "total_steps": 186, "loss": 0.8376, "lr": 5.047029136674563e-06, "epoch": 1.6320000000000001, "percentage": 54.84, "elapsed_time": "1:27:41", "remaining_time": "1:12:13"}
103
+ {"current_steps": 103, "total_steps": 186, "loss": 0.7099, "lr": 4.95297086332544e-06, "epoch": 1.6480000000000001, "percentage": 55.38, "elapsed_time": "1:28:25", "remaining_time": "1:11:15"}
104
+ {"current_steps": 104, "total_steps": 186, "loss": 0.8082, "lr": 4.858929232569671e-06, "epoch": 1.6640000000000001, "percentage": 55.91, "elapsed_time": "1:29:16", "remaining_time": "1:10:23"}
105
+ {"current_steps": 105, "total_steps": 186, "loss": 0.7456, "lr": 4.7649375237045135e-06, "epoch": 1.6800000000000002, "percentage": 56.45, "elapsed_time": "1:30:03", "remaining_time": "1:09:28"}
106
+ {"current_steps": 106, "total_steps": 186, "loss": 0.7856, "lr": 4.671028998360947e-06, "epoch": 1.696, "percentage": 56.99, "elapsed_time": "1:30:46", "remaining_time": "1:08:30"}
107
+ {"current_steps": 107, "total_steps": 186, "loss": 0.7818, "lr": 4.5772368887331044e-06, "epoch": 1.712, "percentage": 57.53, "elapsed_time": "1:31:38", "remaining_time": "1:07:39"}
108
+ {"current_steps": 108, "total_steps": 186, "loss": 0.7014, "lr": 4.483594385818119e-06, "epoch": 1.728, "percentage": 58.06, "elapsed_time": "1:32:27", "remaining_time": "1:06:46"}
109
+ {"current_steps": 109, "total_steps": 186, "loss": 0.702, "lr": 4.39013462767054e-06, "epoch": 1.744, "percentage": 58.6, "elapsed_time": "1:33:17", "remaining_time": "1:05:54"}
110
+ {"current_steps": 110, "total_steps": 186, "loss": 0.6881, "lr": 4.29689068767551e-06, "epoch": 1.76, "percentage": 59.14, "elapsed_time": "1:34:13", "remaining_time": "1:05:05"}
111
+ {"current_steps": 111, "total_steps": 186, "loss": 0.8086, "lr": 4.203895562844789e-06, "epoch": 1.776, "percentage": 59.68, "elapsed_time": "1:35:04", "remaining_time": "1:04:14"}
112
+ {"current_steps": 112, "total_steps": 186, "loss": 0.8712, "lr": 4.111182162139844e-06, "epoch": 1.792, "percentage": 60.22, "elapsed_time": "1:36:04", "remaining_time": "1:03:28"}
113
+ {"current_steps": 113, "total_steps": 186, "loss": 0.6912, "lr": 4.018783294826071e-06, "epoch": 1.808, "percentage": 60.75, "elapsed_time": "1:36:55", "remaining_time": "1:02:37"}
114
+ {"current_steps": 114, "total_steps": 186, "loss": 0.8044, "lr": 3.926731658862307e-06, "epoch": 1.8239999999999998, "percentage": 61.29, "elapsed_time": "1:37:47", "remaining_time": "1:01:45"}
115
+ {"current_steps": 115, "total_steps": 186, "loss": 0.6886, "lr": 3.8350598293297345e-06, "epoch": 1.8399999999999999, "percentage": 61.83, "elapsed_time": "1:38:25", "remaining_time": "1:00:45"}
116
+ {"current_steps": 116, "total_steps": 186, "loss": 0.6942, "lr": 3.7438002469042567e-06, "epoch": 1.8559999999999999, "percentage": 62.37, "elapsed_time": "1:39:05", "remaining_time": "0:59:47"}
117
+ {"current_steps": 117, "total_steps": 186, "loss": 0.7744, "lr": 3.652985206376455e-06, "epoch": 1.8719999999999999, "percentage": 62.9, "elapsed_time": "1:39:55", "remaining_time": "0:58:55"}
118
+ {"current_steps": 118, "total_steps": 186, "loss": 0.7281, "lr": 3.5626468452231534e-06, "epoch": 1.888, "percentage": 63.44, "elapsed_time": "1:40:49", "remaining_time": "0:58:05"}
119
+ {"current_steps": 119, "total_steps": 186, "loss": 0.7454, "lr": 3.472817132234669e-06, "epoch": 1.904, "percentage": 63.98, "elapsed_time": "1:41:49", "remaining_time": "0:57:19"}
120
+ {"current_steps": 120, "total_steps": 186, "loss": 0.8402, "lr": 3.3835278562017405e-06, "epoch": 1.92, "percentage": 64.52, "elapsed_time": "1:42:41", "remaining_time": "0:56:28"}
121
+ {"current_steps": 121, "total_steps": 186, "loss": 0.7245, "lr": 3.29481061466617e-06, "epoch": 1.936, "percentage": 65.05, "elapsed_time": "1:43:29", "remaining_time": "0:55:35"}
122
+ {"current_steps": 122, "total_steps": 186, "loss": 0.7821, "lr": 3.2066968027391377e-06, "epoch": 1.952, "percentage": 65.59, "elapsed_time": "1:44:19", "remaining_time": "0:54:43"}
123
+ {"current_steps": 123, "total_steps": 186, "loss": 0.7493, "lr": 3.119217601991139e-06, "epoch": 1.968, "percentage": 66.13, "elapsed_time": "1:45:05", "remaining_time": "0:53:49"}
124
+ {"current_steps": 124, "total_steps": 186, "loss": 0.6969, "lr": 3.032403969417523e-06, "epoch": 1.984, "percentage": 66.67, "elapsed_time": "1:45:46", "remaining_time": "0:52:53"}
125
+ {"current_steps": 125, "total_steps": 186, "loss": 0.9684, "lr": 2.946286626483463e-06, "epoch": 2.0, "percentage": 67.2, "elapsed_time": "1:46:33", "remaining_time": "0:51:59"}
126
+ {"current_steps": 126, "total_steps": 186, "loss": 0.7107, "lr": 2.8608960482523058e-06, "epoch": 2.016, "percentage": 67.74, "elapsed_time": "1:48:33", "remaining_time": "0:51:41"}
127
+ {"current_steps": 127, "total_steps": 186, "loss": 0.7462, "lr": 2.776262452601104e-06, "epoch": 2.032, "percentage": 68.28, "elapsed_time": "1:49:27", "remaining_time": "0:50:51"}
128
+ {"current_steps": 128, "total_steps": 186, "loss": 0.6588, "lr": 2.6924157895271563e-06, "epoch": 2.048, "percentage": 68.82, "elapsed_time": "1:50:10", "remaining_time": "0:49:55"}
129
+ {"current_steps": 129, "total_steps": 186, "loss": 0.7366, "lr": 2.6093857305493666e-06, "epoch": 2.064, "percentage": 69.35, "elapsed_time": "1:51:12", "remaining_time": "0:49:08"}
130
+ {"current_steps": 130, "total_steps": 186, "loss": 0.634, "lr": 2.5272016582081236e-06, "epoch": 2.08, "percentage": 69.89, "elapsed_time": "1:51:53", "remaining_time": "0:48:11"}