test
Browse files
run_mlm_flax_stream_tpunew.py
CHANGED
|
@@ -180,7 +180,7 @@ class DataTrainingArguments:
|
|
| 180 |
default=10000, metadata={"help": "The number of examples to pre-load for shuffling."}
|
| 181 |
)
|
| 182 |
num_train_steps: int = field(default=50000, metadata={"help": "The number of training steps."})
|
| 183 |
-
num_eval_samples: int = field(default=
|
| 184 |
|
| 185 |
def __post_init__(self):
|
| 186 |
if self.dataset_name is None and self.train_file is None and self.validation_file is None:
|
|
@@ -269,11 +269,12 @@ class FlaxDataCollatorForLanguageModeling:
|
|
| 269 |
def generate_batch_splits(samples_idx: np.ndarray, batch_size: int) -> np.ndarray:
|
| 270 |
num_samples = len(samples_idx)
|
| 271 |
samples_to_remove = num_samples % batch_size
|
| 272 |
-
|
| 273 |
if samples_to_remove != 0:
|
| 274 |
samples_idx = samples_idx[:-samples_to_remove]
|
| 275 |
sections_split = num_samples // batch_size
|
| 276 |
batch_idx = np.split(samples_idx, sections_split)
|
|
|
|
| 277 |
return batch_idx
|
| 278 |
|
| 279 |
def advance_iter_and_group_samples(train_iterator, num_samples, max_seq_length):
|
|
@@ -561,13 +562,9 @@ if __name__ == "__main__":
|
|
| 561 |
train_metrics = []
|
| 562 |
eval_metrics = []
|
| 563 |
|
| 564 |
-
|
| 565 |
-
training_iter = iter(torch.utils.data.DataLoader(tokenized_datasets.with_format("torch"), batch_size=1, shuffle=False, num_workers=dataset.n_shards, collate_fn=lambda x: x))
|
| 566 |
-
else:
|
| 567 |
-
training_iter = iter(tokenized_datasets)
|
| 568 |
|
| 569 |
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
| 570 |
-
|
| 571 |
eval_samples = advance_iter_and_group_samples(training_iter, data_args.num_eval_samples, max_seq_length)
|
| 572 |
|
| 573 |
|
|
|
|
| 180 |
default=10000, metadata={"help": "The number of examples to pre-load for shuffling."}
|
| 181 |
)
|
| 182 |
num_train_steps: int = field(default=50000, metadata={"help": "The number of training steps."})
|
| 183 |
+
num_eval_samples: int = field(default=10000, metadata={"help": "The number of samples to be used for evaluation"})
|
| 184 |
|
| 185 |
def __post_init__(self):
|
| 186 |
if self.dataset_name is None and self.train_file is None and self.validation_file is None:
|
|
|
|
| 269 |
def generate_batch_splits(samples_idx: np.ndarray, batch_size: int) -> np.ndarray:
|
| 270 |
num_samples = len(samples_idx)
|
| 271 |
samples_to_remove = num_samples % batch_size
|
| 272 |
+
|
| 273 |
if samples_to_remove != 0:
|
| 274 |
samples_idx = samples_idx[:-samples_to_remove]
|
| 275 |
sections_split = num_samples // batch_size
|
| 276 |
batch_idx = np.split(samples_idx, sections_split)
|
| 277 |
+
|
| 278 |
return batch_idx
|
| 279 |
|
| 280 |
def advance_iter_and_group_samples(train_iterator, num_samples, max_seq_length):
|
|
|
|
| 562 |
train_metrics = []
|
| 563 |
eval_metrics = []
|
| 564 |
|
| 565 |
+
training_iter = iter(torch.utils.data.DataLoader(tokenized_datasets.with_format("torch"), batch_size=1, shuffle=False, num_workers=dataset.n_shards, collate_fn=lambda x: x))
|
|
|
|
|
|
|
|
|
|
| 566 |
|
| 567 |
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
|
|
|
| 568 |
eval_samples = advance_iter_and_group_samples(training_iter, data_args.num_eval_samples, max_seq_length)
|
| 569 |
|
| 570 |
|
run_nb_roberta_base_scandi_tpunew8.sh
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
python run_mlm_flax_stream_tpunew.py \
|
| 2 |
+
--output_dir="../nb-roberta-base-scandi-tpunew8" \
|
| 3 |
+
--hub_model_id="NbAiLab/nb-roberta-base-scandi-tpunew8" \
|
| 4 |
+
--hub_private_repo=True \
|
| 5 |
+
--model_name_or_path="xlm-roberta-base" \
|
| 6 |
+
--config_name="./config_base.json" \
|
| 7 |
+
--tokenizer_name="./" \
|
| 8 |
+
--dataset_name="NbAiLab/scandinavian" \
|
| 9 |
+
--max_seq_length="512" \
|
| 10 |
+
--weight_decay="0.01" \
|
| 11 |
+
--per_device_train_batch_size="62" \
|
| 12 |
+
--per_device_eval_batch_size="62" \
|
| 13 |
+
--learning_rate="4e-4" \
|
| 14 |
+
--warmup_steps="1000" \
|
| 15 |
+
--overwrite_output_dir \
|
| 16 |
+
--num_train_steps="10000" \
|
| 17 |
+
--adam_beta1="0.9" \
|
| 18 |
+
--adam_beta2="0.98" \
|
| 19 |
+
--logging_steps="50" \
|
| 20 |
+
--save_steps="50" \
|
| 21 |
+
--eval_steps="50" \
|
| 22 |
+
--dtype="bfloat16" \
|
| 23 |
+
--push_to_hub
|