Saving weights and logs of step 10000
Browse files
events.out.tfevents.1674903183.t1v-n-9758a16f-w-0.1492005.0.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2dd3ffce9745507951439d1be4c76dcbd0d7c80c9cd1d81a071364326618079
|
| 3 |
+
size 1471521
|
flax_model.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef6b3c0c304a555e4c58d229764b95edf5329663cf82770abb303ed3691bffa4
|
| 3 |
+
size 283917642
|
start_train.sh
CHANGED
|
@@ -7,7 +7,7 @@ python run_bart_dlm_flax.py \
|
|
| 7 |
--max_seq_length="1024" \
|
| 8 |
--per_device_train_batch_size="16" \
|
| 9 |
--per_device_eval_batch_size="16" \
|
| 10 |
-
--learning_rate="
|
| 11 |
--weight_decay="0.01" \
|
| 12 |
--warmup_steps="10000" \
|
| 13 |
--overwrite_output_dir \
|
|
|
|
| 7 |
--max_seq_length="1024" \
|
| 8 |
--per_device_train_batch_size="16" \
|
| 9 |
--per_device_eval_batch_size="16" \
|
| 10 |
+
--learning_rate="1e-4" \
|
| 11 |
--weight_decay="0.01" \
|
| 12 |
--warmup_steps="10000" \
|
| 13 |
--overwrite_output_dir \
|