mt5 and from scratch
Browse files
base_scandvoc_511_scratch.gin
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
include 't5x/examples/t5/t5_1_1/base.gin'
|
| 2 |
+
include 't5x/configs/runs/pretrain.gin'
|
| 3 |
+
#iinclude 't5x/configs/runs/finetune.gin'
|
| 4 |
+
|
| 5 |
+
# Register necessary SeqIO Tasks/Mixtures.
|
| 6 |
+
import t5.data.mixtures
|
| 7 |
+
import tasks
|
| 8 |
+
|
| 9 |
+
include 'base.gin'
|
| 10 |
+
|
| 11 |
+
VOCABULARY = @seqio.SentencePieceVocabulary()
|
| 12 |
+
seqio.SentencePieceVocabulary.sentencepiece_model_file = "gs://nb-t5/t5/vocabs/wikipedia/no-da-en-sv-nn-is_32000_unigram.sp.model"
|
| 13 |
+
seqio.SentencePieceVocabulary.extra_ids = 100
|
msmall_scratch.gin
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
include 't5x/examples/t5/mt5/small.gin'
|
| 2 |
+
include 't5x/configs/runs/pretrain.gin'
|
| 3 |
+
#iinclude 't5x/configs/runs/finetune.gin'
|
| 4 |
+
|
| 5 |
+
# Register necessary SeqIO Tasks/Mixtures.
|
| 6 |
+
import t5.data.mixtures
|
| 7 |
+
import tasks
|
| 8 |
+
|
| 9 |
+
MIXTURE_OR_TASK_NAME = %gin.REQUIRED
|
| 10 |
+
TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 512}
|
| 11 |
+
TRAIN_STEPS = 1_500_000
|
| 12 |
+
DROPOUT_RATE = 0.0 # Changed from the default since T5-1.1 recomments this.
|
| 13 |
+
#INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
|
| 14 |
+
#INITIAL_CHECKPOINT_PATH = "gs://t5-data/pretrained_models/t5x/t5_1_1_base/checkpoint_1000000"
|
| 15 |
+
INITIAL_CHECKPOINT_PATH = "gs://t5-data/pretrained_models/t5x/mt5_base/checkpoint_1000000"
|
| 16 |
+
|
| 17 |
+
PjitPartitioner.num_partitions = 1
|
| 18 |
+
|
| 19 |
+
|
train_exp15_base_ul2_511_scandvoc_full_scratch.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PROJECT_DIR=${HOME}"/models/ul2-t5x"
|
| 2 |
+
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
| 3 |
+
MODEL_DIR="gs://nb-t5x-us-central2/exp15-t5-base-ul2-511-scandvoc-full-scratch"
|
| 4 |
+
export PYTHONPATH=${PROJECT_DIR}
|
| 5 |
+
MIXTURE_OR_TASK_NAME="scandinavian_ul2_scandvoc"
|
| 6 |
+
|
| 7 |
+
python3 ${T5X_DIR}/t5x/train.py \
|
| 8 |
+
--gin_search_paths=${PROJECT_DIR} \
|
| 9 |
+
--gin_file="base_scandvoc_511_full_scratch.gin" \
|
| 10 |
+
--gin.MODEL_DIR="'${MODEL_DIR}'" \
|
| 11 |
+
--gin.MIXTURE_OR_TASK_NAME="'${MIXTURE_OR_TASK_NAME}'" \
|
| 12 |
+
|
train_exp16_base_span_511_scandvoc_full_scratch.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PROJECT_DIR=${HOME}"/models/ul2-t5x"
|
| 2 |
+
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
| 3 |
+
MODEL_DIR="gs://nb-t5x-us-central2/exp16-t5-base-span-511-scandvoc-full-scratch"
|
| 4 |
+
export PYTHONPATH=${PROJECT_DIR}
|
| 5 |
+
MIXTURE_OR_TASK_NAME="scandinavian_span_scandvoc"
|
| 6 |
+
|
| 7 |
+
python3 ${T5X_DIR}/t5x/train.py \
|
| 8 |
+
--gin_search_paths=${PROJECT_DIR} \
|
| 9 |
+
--gin_file="base_scandvoc_511_full_scratch.gin" \
|
| 10 |
+
--gin.MODEL_DIR="'${MODEL_DIR}'" \
|
| 11 |
+
--gin.MIXTURE_OR_TASK_NAME="'${MIXTURE_OR_TASK_NAME}'" \
|
| 12 |
+
|
train_exp17_small_ul2_mt5voc_scratch.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PROJECT_DIR=${HOME}"/models/ul2-t5x"
|
| 2 |
+
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
| 3 |
+
MODEL_DIR="gs://nb-t5x-us-central2/exp17-t5-small-ul2-mt5voc-scratch"
|
| 4 |
+
export PYTHONPATH=${PROJECT_DIR}
|
| 5 |
+
MIXTURE_OR_TASK_NAME="scandinavian_ul2_mt5voc"
|
| 6 |
+
|
| 7 |
+
python3 ${T5X_DIR}/t5x/train.py \
|
| 8 |
+
--gin_search_paths=${PROJECT_DIR} \
|
| 9 |
+
--gin_file="msmall_scratch.gin" \
|
| 10 |
+
--gin.MODEL_DIR="'${MODEL_DIR}'" \
|
| 11 |
+
--gin.MIXTURE_OR_TASK_NAME="'${MIXTURE_OR_TASK_NAME}'" \
|
| 12 |
+
|
train_exp18_small_span_mt5voc_scratch.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PROJECT_DIR=${HOME}"/models/ul2-t5x"
|
| 2 |
+
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
| 3 |
+
MODEL_DIR="gs://nb-t5x-us-central2/exp18-t5-small-span-mt5voc-scratch"
|
| 4 |
+
export PYTHONPATH=${PROJECT_DIR}
|
| 5 |
+
MIXTURE_OR_TASK_NAME="scandinavian_span_mt5voc"
|
| 6 |
+
|
| 7 |
+
python3 ${T5X_DIR}/t5x/train.py \
|
| 8 |
+
--gin_search_paths=${PROJECT_DIR} \
|
| 9 |
+
--gin_file="msmall_scratch.gin" \
|
| 10 |
+
--gin.MODEL_DIR="'${MODEL_DIR}'" \
|
| 11 |
+
--gin.MIXTURE_OR_TASK_NAME="'${MIXTURE_OR_TASK_NAME}'" \
|
| 12 |
+
|