DSTK / semantic_tokenizer /f40ms /config /hubert_config.yaml

first push of codes and models for g2p, t2u, tokenizer and detokenizer

cd8454d 7 months ago

7.68 kB

	_name: null
	common:
	_name: null
	no_progress_bar: false
	log_interval: 100
	log_format: json
	log_file: null
	aim_repo: null
	aim_run_hash: null
	tensorboard_logdir: tblog
	wandb_project: null
	azureml_logging: false
	seed: 1337
	cpu: false
	tpu: false
	bf16: false
	memory_efficient_bf16: false
	fp16: true
	memory_efficient_fp16: true
	fp16_no_flatten_grads: false
	fp16_init_scale: 128
	fp16_scale_window: null
	fp16_scale_tolerance: 0.0
	on_cpu_convert_precision: false
	min_loss_scale: 0.0001
	threshold_loss_scale: null
	amp: false
	amp_batch_retries: 2
	amp_init_scale: 128
	amp_scale_window: null
	user_dir: null
	empty_cache_freq: 0
	all_gather_list_size: 16384
	model_parallel_size: 1
	quantization_config_path: null
	profile: false
	reset_logging: false
	suppress_crashes: false
	use_plasma_view: false
	plasma_path: /tmp/plasma
	common_eval:
	_name: null
	path: null
	post_process: null
	quiet: false
	model_overrides: '{}'
	results_path: null
	distributed_training:
	_name: null
	distributed_world_size: 64
	distributed_num_procs: 8
	distributed_rank: 0
	distributed_backend: nccl
	distributed_init_method: tcp://modelarts-job-4e3029c5-de6d-4973-85ce-9be855ccbfcf:6000
	distributed_port: -1
	device_id: 0
	distributed_no_spawn: false
	ddp_backend: no_c10d
	ddp_comm_hook: none
	bucket_cap_mb: 25
	fix_batches_to_gpus: false
	find_unused_parameters: true
	gradient_as_bucket_view: false
	fast_stat_sync: false
	heartbeat_timeout: -1
	broadcast_buffers: false
	slowmo_momentum: null
	slowmo_base_algorithm: localsgd
	localsgd_frequency: 3
	nprocs_per_node: 8
	pipeline_model_parallel: false
	pipeline_balance: null
	pipeline_devices: null
	pipeline_chunks: 0
	pipeline_encoder_balance: null
	pipeline_encoder_devices: null
	pipeline_decoder_balance: null
	pipeline_decoder_devices: null
	pipeline_checkpoint: never
	zero_sharding: none
	fp16: true
	memory_efficient_fp16: true
	tpu: false
	no_reshard_after_forward: false
	fp32_reduce_scatter: false
	cpu_offload: false
	use_sharded_state: false
	not_fsdp_flatten_parameters: false
	dataset:
	_name: null
	num_workers: 1
	skip_invalid_size_inputs_valid_test: true
	max_tokens: 450000
	batch_size: null
	required_batch_size_multiple: 8
	required_seq_len_multiple: 1
	dataset_impl: null
	data_buffer_size: 10
	train_subset: train
	valid_subset: valid
	combine_valid_subsets: null
	ignore_unused_valid_subsets: false
	validate_interval: 1
	validate_interval_updates: 1000000
	validate_after_updates: 0
	fixed_validation_seed: null
	disable_validation: false
	max_tokens_valid: 450000
	batch_size_valid: null
	max_valid_steps: null
	curriculum: 0
	gen_subset: test
	num_shards: 1
	shard_id: 0
	grouped_shuffling: false
	update_epoch_batch_itr: false
	update_ordered_indices_seed: false
	optimization:
	_name: null
	max_epoch: 0
	max_update: 700000
	stop_time_hours: 0.0
	clip_norm: 1.0
	sentence_avg: false
	update_freq:
	- 4
	lr:
	- 0.0015
	stop_min_lr: -1.0
	use_bmuf: false
	skip_remainder_batch: false
	checkpoint:
	_name: null
	save_dir: checkpoints
	restore_file: checkpoint_last.pt
	continue_once: null
	finetune_from_model: null
	reset_dataloader: false
	reset_lr_scheduler: false
	reset_meters: false
	reset_optimizer: false
	optimizer_overrides: '{}'
	save_interval: 1
	save_interval_updates: 1000000
	keep_interval_updates: 100
	keep_interval_updates_pattern: -1
	keep_last_epochs: -1
	keep_best_checkpoints: -1
	no_save: false
	no_epoch_checkpoints: false
	no_last_checkpoints: false
	no_save_optimizer_state: false
	best_checkpoint_metric: loss
	maximize_best_checkpoint_metric: false
	patience: -1
	checkpoint_suffix: ''
	checkpoint_shard_count: 1
	load_checkpoint_on_all_dp_ranks: false
	write_checkpoints_asynchronously: false
	model_parallel_size: 1
	bmuf:
	_name: null
	block_lr: 1.0
	block_momentum: 0.875
	global_sync_iter: 50
	warmup_iterations: 500
	use_nbm: false
	average_sync: false
	distributed_world_size: 64
	generation:
	_name: null
	beam: 5
	nbest: 1
	max_len_a: 0.0
	max_len_b: 200
	min_len: 1
	match_source_len: false
	unnormalized: false
	no_early_stop: false
	no_beamable_mm: false
	lenpen: 1.0
	unkpen: 0.0
	replace_unk: null
	sacrebleu: false
	score_reference: false
	prefix_size: 0
	no_repeat_ngram_size: 0
	sampling: false
	sampling_topk: -1
	sampling_topp: -1.0
	constraints: null
	temperature: 1.0
	diverse_beam_groups: -1
	diverse_beam_strength: 0.5
	diversity_rate: -1.0
	print_alignment: null
	print_step: false
	lm_path: null
	lm_weight: 0.0
	iter_decode_eos_penalty: 0.0
	iter_decode_max_iter: 10
	iter_decode_force_max_iter: false
	iter_decode_with_beam: 1
	iter_decode_with_external_reranker: false
	retain_iter_history: false
	retain_dropout: false
	retain_dropout_modules: null
	decoding_format: null
	no_seed_provided: false
	eos_token: null
	eval_lm:
	_name: null
	output_word_probs: false
	output_word_stats: false
	context_window: 0
	softmax_batch: 9223372036854775807
	interactive:
	_name: null
	buffer_size: 0
	input: '-'
	model:
	_name: hubert
	label_rate: 50.0
	extractor_mode: layer_norm
	encoder_layers: 24
	encoder_embed_dim: 1024
	encoder_ffn_embed_dim: 4096
	encoder_attention_heads: 16
	activation_fn: gelu
	layer_type: transformer
	dropout: 0.0
	attention_dropout: 0.0
	activation_dropout: 0.0
	encoder_layerdrop: 0.0
	dropout_input: 0.0
	dropout_features: 0.0
	final_dim: 768
	untie_final_proj: true
	layer_norm_first: true
	conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2'
	conv_bias: false
	logit_temp: 0.1
	target_glu: false
	feature_grad_mult: 1.0
	mask_length: 10
	mask_prob: 0.8
	mask_selection: static
	mask_other: 0.0
	no_mask_overlap: false
	mask_min_space: 1
	mask_channel_length: 10
	mask_channel_prob: 0.0
	mask_channel_selection: static
	mask_channel_other: 0.0
	no_mask_channel_overlap: false
	mask_channel_min_space: 1
	conv_pos: 128
	conv_pos_groups: 16
	latent_temp:
	- 2.0
	- 0.5
	- 0.999995
	skip_masked: false
	skip_nomask: false
	checkpoint_activations: false
	required_seq_len_multiple: 2
	depthwise_conv_kernel_size: 31
	attn_type: ''
	pos_enc_type: abs
	fp16: false
	task:
	_name: hubert_pretraining
	data: data
	fine_tuning: false
	labels:
	- km
	label_dir: config
	label_rate: 50.0
	sample_rate: 16000
	normalize: true
	enable_padding: false
	max_keep_size: 320000
	max_sample_size: 320000
	min_sample_size: 16000
	single_target: false
	random_crop: true
	pad_audio: false
	criterion:
	_name: hubert
	pred_masked_weight: 1.0
	pred_nomask_weight: 0.0
	loss_weights:
	- 10.0
	log_keys: []
	optimizer:
	_name: adam
	adam_betas: (0.9,0.98)
	adam_eps: 1.0e-06
	weight_decay: 0.01
	use_old_adam: false
	fp16_adam_stats: false
	tpu: false
	lr:
	- 0.0015
	lr_scheduler:
	_name: polynomial_decay
	warmup_updates: 32000
	force_anneal: null
	end_learning_rate: 0.0
	power: 1.0
	total_num_update: 700000.0
	lr:
	- 0.0015
	scoring: null
	bpe: null
	tokenizer: null
	ema:
	_name: null
	store_ema: false
	ema_decay: 0.9999
	ema_start_update: 0
	ema_seed_model: null
	ema_update_freq: 1
	ema_fp32: false
	job_logging_cfg:
	version: 1
	formatters:
	simple:
	format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
	handlers:
	console:
	class: logging.StreamHandler
	formatter: simple
	stream: ext://sys.stdout
	file:
	class: logging.FileHandler
	formatter: simple
	filename: hydra_train.log
	root:
	level: INFO
	handlers:
	- console
	- file
	disable_existing_loggers: false