DSTK / semantic_tokenizer /f40ms /config /hubert_config.yaml
gooorillax's picture
first push of codes and models for g2p, t2u, tokenizer and detokenizer
cd8454d
_name: null
common:
_name: null
no_progress_bar: false
log_interval: 100
log_format: json
log_file: null
aim_repo: null
aim_run_hash: null
tensorboard_logdir: tblog
wandb_project: null
azureml_logging: false
seed: 1337
cpu: false
tpu: false
bf16: false
memory_efficient_bf16: false
fp16: true
memory_efficient_fp16: true
fp16_no_flatten_grads: false
fp16_init_scale: 128
fp16_scale_window: null
fp16_scale_tolerance: 0.0
on_cpu_convert_precision: false
min_loss_scale: 0.0001
threshold_loss_scale: null
amp: false
amp_batch_retries: 2
amp_init_scale: 128
amp_scale_window: null
user_dir: null
empty_cache_freq: 0
all_gather_list_size: 16384
model_parallel_size: 1
quantization_config_path: null
profile: false
reset_logging: false
suppress_crashes: false
use_plasma_view: false
plasma_path: /tmp/plasma
common_eval:
_name: null
path: null
post_process: null
quiet: false
model_overrides: '{}'
results_path: null
distributed_training:
_name: null
distributed_world_size: 64
distributed_num_procs: 8
distributed_rank: 0
distributed_backend: nccl
distributed_init_method: tcp://modelarts-job-4e3029c5-de6d-4973-85ce-9be855ccbfcf:6000
distributed_port: -1
device_id: 0
distributed_no_spawn: false
ddp_backend: no_c10d
ddp_comm_hook: none
bucket_cap_mb: 25
fix_batches_to_gpus: false
find_unused_parameters: true
gradient_as_bucket_view: false
fast_stat_sync: false
heartbeat_timeout: -1
broadcast_buffers: false
slowmo_momentum: null
slowmo_base_algorithm: localsgd
localsgd_frequency: 3
nprocs_per_node: 8
pipeline_model_parallel: false
pipeline_balance: null
pipeline_devices: null
pipeline_chunks: 0
pipeline_encoder_balance: null
pipeline_encoder_devices: null
pipeline_decoder_balance: null
pipeline_decoder_devices: null
pipeline_checkpoint: never
zero_sharding: none
fp16: true
memory_efficient_fp16: true
tpu: false
no_reshard_after_forward: false
fp32_reduce_scatter: false
cpu_offload: false
use_sharded_state: false
not_fsdp_flatten_parameters: false
dataset:
_name: null
num_workers: 1
skip_invalid_size_inputs_valid_test: true
max_tokens: 450000
batch_size: null
required_batch_size_multiple: 8
required_seq_len_multiple: 1
dataset_impl: null
data_buffer_size: 10
train_subset: train
valid_subset: valid
combine_valid_subsets: null
ignore_unused_valid_subsets: false
validate_interval: 1
validate_interval_updates: 1000000
validate_after_updates: 0
fixed_validation_seed: null
disable_validation: false
max_tokens_valid: 450000
batch_size_valid: null
max_valid_steps: null
curriculum: 0
gen_subset: test
num_shards: 1
shard_id: 0
grouped_shuffling: false
update_epoch_batch_itr: false
update_ordered_indices_seed: false
optimization:
_name: null
max_epoch: 0
max_update: 700000
stop_time_hours: 0.0
clip_norm: 1.0
sentence_avg: false
update_freq:
- 4
lr:
- 0.0015
stop_min_lr: -1.0
use_bmuf: false
skip_remainder_batch: false
checkpoint:
_name: null
save_dir: checkpoints
restore_file: checkpoint_last.pt
continue_once: null
finetune_from_model: null
reset_dataloader: false
reset_lr_scheduler: false
reset_meters: false
reset_optimizer: false
optimizer_overrides: '{}'
save_interval: 1
save_interval_updates: 1000000
keep_interval_updates: 100
keep_interval_updates_pattern: -1
keep_last_epochs: -1
keep_best_checkpoints: -1
no_save: false
no_epoch_checkpoints: false
no_last_checkpoints: false
no_save_optimizer_state: false
best_checkpoint_metric: loss
maximize_best_checkpoint_metric: false
patience: -1
checkpoint_suffix: ''
checkpoint_shard_count: 1
load_checkpoint_on_all_dp_ranks: false
write_checkpoints_asynchronously: false
model_parallel_size: 1
bmuf:
_name: null
block_lr: 1.0
block_momentum: 0.875
global_sync_iter: 50
warmup_iterations: 500
use_nbm: false
average_sync: false
distributed_world_size: 64
generation:
_name: null
beam: 5
nbest: 1
max_len_a: 0.0
max_len_b: 200
min_len: 1
match_source_len: false
unnormalized: false
no_early_stop: false
no_beamable_mm: false
lenpen: 1.0
unkpen: 0.0
replace_unk: null
sacrebleu: false
score_reference: false
prefix_size: 0
no_repeat_ngram_size: 0
sampling: false
sampling_topk: -1
sampling_topp: -1.0
constraints: null
temperature: 1.0
diverse_beam_groups: -1
diverse_beam_strength: 0.5
diversity_rate: -1.0
print_alignment: null
print_step: false
lm_path: null
lm_weight: 0.0
iter_decode_eos_penalty: 0.0
iter_decode_max_iter: 10
iter_decode_force_max_iter: false
iter_decode_with_beam: 1
iter_decode_with_external_reranker: false
retain_iter_history: false
retain_dropout: false
retain_dropout_modules: null
decoding_format: null
no_seed_provided: false
eos_token: null
eval_lm:
_name: null
output_word_probs: false
output_word_stats: false
context_window: 0
softmax_batch: 9223372036854775807
interactive:
_name: null
buffer_size: 0
input: '-'
model:
_name: hubert
label_rate: 50.0
extractor_mode: layer_norm
encoder_layers: 24
encoder_embed_dim: 1024
encoder_ffn_embed_dim: 4096
encoder_attention_heads: 16
activation_fn: gelu
layer_type: transformer
dropout: 0.0
attention_dropout: 0.0
activation_dropout: 0.0
encoder_layerdrop: 0.0
dropout_input: 0.0
dropout_features: 0.0
final_dim: 768
untie_final_proj: true
layer_norm_first: true
conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2'
conv_bias: false
logit_temp: 0.1
target_glu: false
feature_grad_mult: 1.0
mask_length: 10
mask_prob: 0.8
mask_selection: static
mask_other: 0.0
no_mask_overlap: false
mask_min_space: 1
mask_channel_length: 10
mask_channel_prob: 0.0
mask_channel_selection: static
mask_channel_other: 0.0
no_mask_channel_overlap: false
mask_channel_min_space: 1
conv_pos: 128
conv_pos_groups: 16
latent_temp:
- 2.0
- 0.5
- 0.999995
skip_masked: false
skip_nomask: false
checkpoint_activations: false
required_seq_len_multiple: 2
depthwise_conv_kernel_size: 31
attn_type: ''
pos_enc_type: abs
fp16: false
task:
_name: hubert_pretraining
data: data
fine_tuning: false
labels:
- km
label_dir: config
label_rate: 50.0
sample_rate: 16000
normalize: true
enable_padding: false
max_keep_size: 320000
max_sample_size: 320000
min_sample_size: 16000
single_target: false
random_crop: true
pad_audio: false
criterion:
_name: hubert
pred_masked_weight: 1.0
pred_nomask_weight: 0.0
loss_weights:
- 10.0
log_keys: []
optimizer:
_name: adam
adam_betas: (0.9,0.98)
adam_eps: 1.0e-06
weight_decay: 0.01
use_old_adam: false
fp16_adam_stats: false
tpu: false
lr:
- 0.0015
lr_scheduler:
_name: polynomial_decay
warmup_updates: 32000
force_anneal: null
end_learning_rate: 0.0
power: 1.0
total_num_update: 700000.0
lr:
- 0.0015
scoring: null
bpe: null
tokenizer: null
ema:
_name: null
store_ema: false
ema_decay: 0.9999
ema_start_update: 0
ema_seed_model: null
ema_update_freq: 1
ema_fp32: false
job_logging_cfg:
version: 1
formatters:
simple:
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
handlers:
console:
class: logging.StreamHandler
formatter: simple
stream: ext://sys.stdout
file:
class: logging.FileHandler
formatter: simple
filename: hydra_train.log
root:
level: INFO
handlers:
- console
- file
disable_existing_loggers: false