Model save
Browse files- README.md +5 -5
- all_results.json +8 -0
- breeze-listen-w2v2-kn-GF.log +6 -6
- config.json +1 -1
- model.safetensors +1 -1
- train-ctc-model.sh +3 -2
- train_results.json +8 -0
- trainer_state.json +30 -0
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
license: cc-by-nc-4.0
|
| 3 |
-
base_model: facebook/mms-1b-
|
| 4 |
tags:
|
| 5 |
- generated_from_trainer
|
| 6 |
datasets:
|
|
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 15 |
|
| 16 |
# breeze-listen-w2v2-kn-GF
|
| 17 |
|
| 18 |
-
This model is a fine-tuned version of [facebook/mms-1b-
|
| 19 |
|
| 20 |
## Model description
|
| 21 |
|
|
@@ -35,12 +35,12 @@ More information needed
|
|
| 35 |
|
| 36 |
The following hyperparameters were used during training:
|
| 37 |
- learning_rate: 0.001
|
| 38 |
-
- train_batch_size:
|
| 39 |
- eval_batch_size: 8
|
| 40 |
- seed: 42
|
| 41 |
- distributed_type: multi-GPU
|
| 42 |
-
- gradient_accumulation_steps:
|
| 43 |
-
- total_train_batch_size:
|
| 44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 45 |
- lr_scheduler_type: linear
|
| 46 |
- lr_scheduler_warmup_steps: 100
|
|
|
|
| 1 |
---
|
| 2 |
license: cc-by-nc-4.0
|
| 3 |
+
base_model: facebook/mms-1b-fl102
|
| 4 |
tags:
|
| 5 |
- generated_from_trainer
|
| 6 |
datasets:
|
|
|
|
| 15 |
|
| 16 |
# breeze-listen-w2v2-kn-GF
|
| 17 |
|
| 18 |
+
This model is a fine-tuned version of [facebook/mms-1b-fl102](https://huggingface.co/facebook/mms-1b-fl102) on the fleurs dataset.
|
| 19 |
|
| 20 |
## Model description
|
| 21 |
|
|
|
|
| 35 |
|
| 36 |
The following hyperparameters were used during training:
|
| 37 |
- learning_rate: 0.001
|
| 38 |
+
- train_batch_size: 4
|
| 39 |
- eval_batch_size: 8
|
| 40 |
- seed: 42
|
| 41 |
- distributed_type: multi-GPU
|
| 42 |
+
- gradient_accumulation_steps: 16
|
| 43 |
+
- total_train_batch_size: 64
|
| 44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 45 |
- lr_scheduler_type: linear
|
| 46 |
- lr_scheduler_warmup_steps: 100
|
all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3.99,
|
| 3 |
+
"train_loss": 3.364711216517857,
|
| 4 |
+
"train_runtime": 13678.922,
|
| 5 |
+
"train_samples": 2471,
|
| 6 |
+
"train_samples_per_second": 0.723,
|
| 7 |
+
"train_steps_per_second": 0.023
|
| 8 |
+
}
|
breeze-listen-w2v2-kn-GF.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
02/04/2024
|
| 2 |
-
02/04/2024
|
| 3 |
_n_gpu=1,
|
| 4 |
adafactor=False,
|
| 5 |
adam_beta1=0.9,
|
|
@@ -39,7 +39,7 @@ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
|
|
| 39 |
fsdp_min_num_params=0,
|
| 40 |
fsdp_transformer_layer_cls_to_wrap=None,
|
| 41 |
full_determinism=False,
|
| 42 |
-
gradient_accumulation_steps=
|
| 43 |
gradient_checkpointing=True,
|
| 44 |
gradient_checkpointing_kwargs=None,
|
| 45 |
greater_is_better=None,
|
|
@@ -64,7 +64,7 @@ local_rank=0,
|
|
| 64 |
log_level=passive,
|
| 65 |
log_level_replica=warning,
|
| 66 |
log_on_each_node=True,
|
| 67 |
-
logging_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-kn-GF/runs/
|
| 68 |
logging_first_step=False,
|
| 69 |
logging_nan_inf_filter=True,
|
| 70 |
logging_steps=500,
|
|
@@ -84,7 +84,7 @@ output_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w
|
|
| 84 |
overwrite_output_dir=True,
|
| 85 |
past_index=-1,
|
| 86 |
per_device_eval_batch_size=8,
|
| 87 |
-
per_device_train_batch_size=
|
| 88 |
prediction_loss_only=False,
|
| 89 |
push_to_hub=True,
|
| 90 |
push_to_hub_model_id=None,
|
|
@@ -119,4 +119,4 @@ warmup_ratio=0.0,
|
|
| 119 |
warmup_steps=100,
|
| 120 |
weight_decay=0.0,
|
| 121 |
)
|
| 122 |
-
{'train_runtime':
|
|
|
|
| 1 |
+
02/04/2024 18:56:48 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: True
|
| 2 |
+
02/04/2024 18:56:48 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
|
| 3 |
_n_gpu=1,
|
| 4 |
adafactor=False,
|
| 5 |
adam_beta1=0.9,
|
|
|
|
| 39 |
fsdp_min_num_params=0,
|
| 40 |
fsdp_transformer_layer_cls_to_wrap=None,
|
| 41 |
full_determinism=False,
|
| 42 |
+
gradient_accumulation_steps=16,
|
| 43 |
gradient_checkpointing=True,
|
| 44 |
gradient_checkpointing_kwargs=None,
|
| 45 |
greater_is_better=None,
|
|
|
|
| 64 |
log_level=passive,
|
| 65 |
log_level_replica=warning,
|
| 66 |
log_on_each_node=True,
|
| 67 |
+
logging_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-kn-GF/runs/Feb04_18-56-48_knight,
|
| 68 |
logging_first_step=False,
|
| 69 |
logging_nan_inf_filter=True,
|
| 70 |
logging_steps=500,
|
|
|
|
| 84 |
overwrite_output_dir=True,
|
| 85 |
past_index=-1,
|
| 86 |
per_device_eval_batch_size=8,
|
| 87 |
+
per_device_train_batch_size=4,
|
| 88 |
prediction_loss_only=False,
|
| 89 |
push_to_hub=True,
|
| 90 |
push_to_hub_model_id=None,
|
|
|
|
| 119 |
warmup_steps=100,
|
| 120 |
weight_decay=0.0,
|
| 121 |
)
|
| 122 |
+
{'train_runtime': 12199.309, 'train_samples_per_second': 0.81, 'train_steps_per_second': 0.012, 'train_loss': 3.1379870364540503, 'epoch': 3.94}
|
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "facebook/mms-1b-
|
| 3 |
"activation_dropout": 0.05,
|
| 4 |
"adapter_attn_dim": 16,
|
| 5 |
"adapter_kernel_size": 3,
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "facebook/mms-1b-fl102",
|
| 3 |
"activation_dropout": 0.05,
|
| 4 |
"adapter_attn_dim": 16,
|
| 5 |
"adapter_kernel_size": 3,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3859264976
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5019ad31159ea5df95db8779bd8072d6f2dfcaa73fce699f4d1ef7fdd5b3043
|
| 3 |
size 3859264976
|
train-ctc-model.sh
CHANGED
|
@@ -55,8 +55,8 @@ echo "OUTDIR: ${OUTDIR}"
|
|
| 55 |
# Training parameters you can tweak. Feel free to directly change any of the parameters below.
|
| 56 |
|
| 57 |
MAX_EPOCHS=4
|
| 58 |
-
TRAIN_BATCH_SIZE=
|
| 59 |
-
EVAL_BATCH_SIZE=
|
| 60 |
LEARNING_RATE="1e-3"
|
| 61 |
|
| 62 |
EVAL_STEPS="1000"
|
|
@@ -80,6 +80,7 @@ python ${SCRIPT_DIR}/run_speech_recognition_ctc_adapter.py \
|
|
| 80 |
--output_dir="${OUTDIR}" \
|
| 81 |
--num_train_epochs="${MAX_EPOCHS}" \
|
| 82 |
--per_device_train_batch_size="${TRAIN_BATCH_SIZE}" \
|
|
|
|
| 83 |
--learning_rate="${LEARNING_RATE}" \
|
| 84 |
--warmup_steps="100" \
|
| 85 |
--evaluation_strategy="steps" \
|
|
|
|
| 55 |
# Training parameters you can tweak. Feel free to directly change any of the parameters below.
|
| 56 |
|
| 57 |
MAX_EPOCHS=4
|
| 58 |
+
TRAIN_BATCH_SIZE=1
|
| 59 |
+
EVAL_BATCH_SIZE=1
|
| 60 |
LEARNING_RATE="1e-3"
|
| 61 |
|
| 62 |
EVAL_STEPS="1000"
|
|
|
|
| 80 |
--output_dir="${OUTDIR}" \
|
| 81 |
--num_train_epochs="${MAX_EPOCHS}" \
|
| 82 |
--per_device_train_batch_size="${TRAIN_BATCH_SIZE}" \
|
| 83 |
+
--gradient_accumulation_steps="32" \
|
| 84 |
--learning_rate="${LEARNING_RATE}" \
|
| 85 |
--warmup_steps="100" \
|
| 86 |
--evaluation_strategy="steps" \
|
train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3.99,
|
| 3 |
+
"train_loss": 3.364711216517857,
|
| 4 |
+
"train_runtime": 13678.922,
|
| 5 |
+
"train_samples": 2471,
|
| 6 |
+
"train_samples_per_second": 0.723,
|
| 7 |
+
"train_steps_per_second": 0.023
|
| 8 |
+
}
|
trainer_state.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 3.9886685552407934,
|
| 5 |
+
"eval_steps": 1000,
|
| 6 |
+
"global_step": 308,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 3.99,
|
| 13 |
+
"step": 308,
|
| 14 |
+
"total_flos": 1.1188723782736937e+19,
|
| 15 |
+
"train_loss": 3.364711216517857,
|
| 16 |
+
"train_runtime": 13678.922,
|
| 17 |
+
"train_samples_per_second": 0.723,
|
| 18 |
+
"train_steps_per_second": 0.023
|
| 19 |
+
}
|
| 20 |
+
],
|
| 21 |
+
"logging_steps": 500,
|
| 22 |
+
"max_steps": 308,
|
| 23 |
+
"num_input_tokens_seen": 0,
|
| 24 |
+
"num_train_epochs": 4,
|
| 25 |
+
"save_steps": 1000,
|
| 26 |
+
"total_flos": 1.1188723782736937e+19,
|
| 27 |
+
"train_batch_size": 1,
|
| 28 |
+
"trial_name": null,
|
| 29 |
+
"trial_params": null
|
| 30 |
+
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4856
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4aaa9085f71e66c7740edfe87fb1bf5ecbe420672688ea0bc427245f20a7e66a
|
| 3 |
size 4856
|