Saving weights and logs of step 10000

Files changed (4) hide show

README.md CHANGED Viewed

	@@ -1 +1,5 @@
1	Just for performing some experiments. Do not use.


1	Just for performing some experiments. Do not use.
2	+
3	+ Since the loss seem to start going up, I did have to restore this from 9e945cb0636bde60bec30bd7df5db30f80401cc7 (2 step 600k/200). I am then restarting with warmup decaying from 1e-4.
4	+
5	+

events.out.tfevents.1641666808.t1v-n-ccbf3e94-w-0.792149.3.v2 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:d234277a4579128e139aa241541106f29e30dcc392c9c20d1bd8d82562b59f51
+size 1470136

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df383e5d2aa55f3e13fcc3dbd076bce1372f2e42325790161c80e6f2d5ff7ac3
 size 498796983

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea8be818d6a6d1fed62fd1200f35b6f2dc8f7d297717334b26e5a3e2a0c25aca
 size 498796983

run_step3.sh CHANGED Viewed

@@ -13,7 +13,7 @@
     --per_device_eval_batch_size="40" \
     --learning_rate="1e-4" \
     --end_learning_rate="5e-3" \
-    --warmup_steps="0" \
     --overwrite_output_dir \
     --num_train_epochs="2" \
     --adam_beta1="0.9" \

     --per_device_eval_batch_size="40" \
     --learning_rate="1e-4" \
     --end_learning_rate="5e-3" \
+    --warmup_steps="10000" \
     --overwrite_output_dir \
     --num_train_epochs="2" \
     --adam_beta1="0.9" \