diff --git a/assamese/female/model/config.yaml b/assamese/female/model/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a14c0196345f5701294146a49986cd0b96f99948 --- /dev/null +++ b/assamese/female/model/config.yaml @@ -0,0 +1,272 @@ +config: conf/tuning/train_fastspeech2.yaml +print_config: false +log_level: INFO +dry_run: false +iterator_type: sequence +output_dir: exp/tts_train_fastspeech2_raw_char_None +ngpu: 1 +seed: 0 +num_workers: 1 +num_att_plot: 3 +dist_backend: nccl +dist_init_method: env:// +dist_world_size: 4 +dist_rank: 0 +local_rank: 0 +dist_master_addr: localhost +dist_master_port: 60699 +dist_launcher: null +multiprocessing_distributed: true +unused_parameters: false +sharded_ddp: false +cudnn_enabled: true +cudnn_benchmark: false +cudnn_deterministic: true +collect_stats: false +write_collected_feats: false +max_epoch: 1000 +patience: null +val_scheduler_criterion: +- valid +- loss +early_stopping_criterion: +- valid +- loss +- min +best_model_criterion: +- - valid + - loss + - min +- - train + - loss + - min +keep_nbest_models: 5 +grad_clip: 1.0 +grad_clip_type: 2.0 +grad_noise: false +accum_grad: 8 +no_forward_run: false +resume: true +train_dtype: float32 +use_amp: false +log_interval: null +use_tensorboard: true +use_wandb: false +wandb_project: null +wandb_id: null +wandb_entity: null +wandb_name: null +wandb_model_log_interval: -1 +detect_anomaly: false +pretrain_path: null +init_param: [] +ignore_init_mismatch: false +freeze_param: [] +num_iters_per_epoch: 800 +batch_size: 20 +valid_batch_size: null +batch_bins: 3000000 +valid_batch_bins: null +train_shape_file: +- exp/tts_stats_raw_char_None/train/text_shape.char +- exp/tts_stats_raw_char_None/train/speech_shape +valid_shape_file: +- exp/tts_stats_raw_char_None/valid/text_shape.char +- exp/tts_stats_raw_char_None/valid/speech_shape +batch_type: numel +valid_batch_type: null +fold_length: +- 150 +- 204800 +sort_in_batch: descending +sort_batch: descending +multiple_iterator: false +chunk_length: 500 +chunk_shift_ratio: 0.5 +num_cache_chunks: 1024 +train_data_path_and_name_and_type: +- - dump/raw/tr_no_dev/text + - text + - text +- - duration_info/tr_no_dev/durations + - durations + - text_int +- - dump/raw/tr_no_dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp + - energy + - npy +valid_data_path_and_name_and_type: +- - dump/raw/dev/text + - text + - text +- - duration_info/dev/durations + - durations + - text_int +- - dump/raw/dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp + - energy + - npy +allow_variable_data_keys: false +max_cache_size: 0.0 +max_cache_fd: 32 +valid_max_cache_size: null +optim: adam +optim_conf: + lr: 1.0 +scheduler: noamlr +scheduler_conf: + model_size: 384 + warmup_steps: 4000 +token_list: +- +- +- A +- a +- i +- ',' +- r +- E +- k +- t +- n +- l +- o +- b +- u +- m +- y +- $ +- . +- p +- h +- d +- s +- I +- g +- j +- ऐ +- c +- ट +- ख +- C +- w +- श +- M +- B +- थ +- ध +- ण +- ष +- ठ +- घ +- U +- P +- q +- ड +- ङ +- R +- औ +- ञ +- D +- ढ +- +odim: null +model_conf: {} +use_preprocessor: true +token_type: char +bpemodel: null +non_linguistic_symbols: null +cleaner: null +g2p: g2p_en_no_space +feats_extract: fbank +feats_extract_conf: + n_fft: 1024 + hop_length: 256 + win_length: 1024 + fs: 22050 + fmin: 0 + fmax: 8000 + n_mels: 80 +normalize: global_mvn +normalize_conf: + stats_file: /speech/arun/released_models/tts/female/assamese/fastspeech2_hs/feats_stats.npz +tts: fastspeech2 +tts_conf: + adim: 384 + aheads: 2 + elayers: 4 + eunits: 1536 + dlayers: 4 + dunits: 1536 + positionwise_layer_type: conv1d + positionwise_conv_kernel_size: 3 + duration_predictor_layers: 2 + duration_predictor_chans: 256 + duration_predictor_kernel_size: 3 + postnet_layers: 5 + postnet_filts: 5 + postnet_chans: 256 + use_masking: true + use_scaled_pos_enc: true + encoder_normalize_before: true + decoder_normalize_before: true + reduction_factor: 1 + init_type: xavier_uniform + init_enc_alpha: 1.0 + init_dec_alpha: 1.0 + transformer_enc_dropout_rate: 0.2 + transformer_enc_positional_dropout_rate: 0.2 + transformer_enc_attn_dropout_rate: 0.2 + transformer_dec_dropout_rate: 0.2 + transformer_dec_positional_dropout_rate: 0.2 + transformer_dec_attn_dropout_rate: 0.2 + pitch_predictor_layers: 5 + pitch_predictor_chans: 256 + pitch_predictor_kernel_size: 5 + pitch_predictor_dropout: 0.5 + pitch_embed_kernel_size: 1 + pitch_embed_dropout: 0.0 + stop_gradient_from_pitch_predictor: true + energy_predictor_layers: 2 + energy_predictor_chans: 256 + energy_predictor_kernel_size: 3 + energy_predictor_dropout: 0.5 + energy_embed_kernel_size: 1 + energy_embed_dropout: 0.0 + stop_gradient_from_energy_predictor: false +pitch_extract: dio +pitch_extract_conf: + fs: 22050 + n_fft: 1024 + hop_length: 256 + f0max: 400 + f0min: 80 + reduction_factor: 1 +pitch_normalize: global_mvn +pitch_normalize_conf: + stats_file: /speech/arun/released_models/tts/female/assamese/fastspeech2_hs/pitch_stats.npz +energy_extract: energy +energy_extract_conf: + fs: 22050 + n_fft: 1024 + hop_length: 256 + win_length: 1024 + reduction_factor: 1 +energy_normalize: global_mvn +energy_normalize_conf: + stats_file: /speech/arun/released_models/tts/female/assamese/fastspeech2_hs/energy_stats.npz +required: +- output_dir +- token_list +version: 0.10.3a3 +distributed: true diff --git a/assamese/female/model/energy_stats.npz b/assamese/female/model/energy_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..02ccb730d77ac4356cabc9997ac78c408b125c5f --- /dev/null +++ b/assamese/female/model/energy_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80879ba3818e253e2cbfb78176b51de0c247fdb17f07a6b2db730c9d0026f31e +size 770 diff --git a/assamese/female/model/feats_stats.npz b/assamese/female/model/feats_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..fa1cb9ec44ea84345616079bcf5c430db53ae3b0 --- /dev/null +++ b/assamese/female/model/feats_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be4c43d477c15c04189297ad17d1e1c436f31aed3caff14cacebe4fc13308ab +size 1402 diff --git a/assamese/female/model/feats_type b/assamese/female/model/feats_type new file mode 100644 index 0000000000000000000000000000000000000000..16b9d46ca2ab51e9b5f8a9e5ba31f3ef5a906ab6 --- /dev/null +++ b/assamese/female/model/feats_type @@ -0,0 +1 @@ +raw diff --git a/assamese/female/model/model.pth b/assamese/female/model/model.pth new file mode 100644 index 0000000000000000000000000000000000000000..b7254e202a592917550537758d81d05a1775db6e --- /dev/null +++ b/assamese/female/model/model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff9efd82aacade9fcd3fc17e350147d3a272356b1f419872ac9fa525b991b578 +size 148685818 diff --git a/assamese/female/model/pitch_stats.npz b/assamese/female/model/pitch_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..042b24e734362ef8411a3fb083ae1d79c109f259 --- /dev/null +++ b/assamese/female/model/pitch_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0453690cf54a373b7a39ccbc4557f8487496bca2290c32d684369fe05f4ed4fe +size 770 diff --git a/assamese/male/model/config.yaml b/assamese/male/model/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..435c8d8c6544f22dca8570feb69981a732dac7fe --- /dev/null +++ b/assamese/male/model/config.yaml @@ -0,0 +1,272 @@ +config: conf/tuning/train_fastspeech2.yaml +print_config: false +log_level: INFO +dry_run: false +iterator_type: sequence +output_dir: exp/tts_train_fastspeech2_raw_char_None +ngpu: 1 +seed: 0 +num_workers: 1 +num_att_plot: 3 +dist_backend: nccl +dist_init_method: env:// +dist_world_size: 4 +dist_rank: 0 +local_rank: 0 +dist_master_addr: localhost +dist_master_port: 33317 +dist_launcher: null +multiprocessing_distributed: true +unused_parameters: false +sharded_ddp: false +cudnn_enabled: true +cudnn_benchmark: false +cudnn_deterministic: true +collect_stats: false +write_collected_feats: false +max_epoch: 1000 +patience: null +val_scheduler_criterion: +- valid +- loss +early_stopping_criterion: +- valid +- loss +- min +best_model_criterion: +- - valid + - loss + - min +- - train + - loss + - min +keep_nbest_models: 5 +grad_clip: 1.0 +grad_clip_type: 2.0 +grad_noise: false +accum_grad: 8 +no_forward_run: false +resume: true +train_dtype: float32 +use_amp: false +log_interval: null +use_tensorboard: true +use_wandb: false +wandb_project: null +wandb_id: null +wandb_entity: null +wandb_name: null +wandb_model_log_interval: -1 +detect_anomaly: false +pretrain_path: null +init_param: [] +ignore_init_mismatch: false +freeze_param: [] +num_iters_per_epoch: 800 +batch_size: 20 +valid_batch_size: null +batch_bins: 3000000 +valid_batch_bins: null +train_shape_file: +- exp/tts_stats_raw_char_None/train/text_shape.char +- exp/tts_stats_raw_char_None/train/speech_shape +valid_shape_file: +- exp/tts_stats_raw_char_None/valid/text_shape.char +- exp/tts_stats_raw_char_None/valid/speech_shape +batch_type: numel +valid_batch_type: null +fold_length: +- 150 +- 204800 +sort_in_batch: descending +sort_batch: descending +multiple_iterator: false +chunk_length: 500 +chunk_shift_ratio: 0.5 +num_cache_chunks: 1024 +train_data_path_and_name_and_type: +- - dump/raw/tr_no_dev/text + - text + - text +- - duration_info/tr_no_dev/durations + - durations + - text_int +- - dump/raw/tr_no_dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp + - energy + - npy +valid_data_path_and_name_and_type: +- - dump/raw/dev/text + - text + - text +- - duration_info/dev/durations + - durations + - text_int +- - dump/raw/dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp + - energy + - npy +allow_variable_data_keys: false +max_cache_size: 0.0 +max_cache_fd: 32 +valid_max_cache_size: null +optim: adam +optim_conf: + lr: 1.0 +scheduler: noamlr +scheduler_conf: + model_size: 384 + warmup_steps: 4000 +token_list: +- +- +- A +- a +- i +- ',' +- r +- E +- k +- n +- t +- l +- b +- o +- u +- m +- y +- $ +- . +- p +- h +- d +- s +- I +- g +- j +- ट +- c +- ऐ +- C +- ख +- w +- श +- B +- थ +- M +- ध +- ष +- ण +- ठ +- घ +- q +- P +- U +- ड +- ङ +- R +- औ +- ञ +- D +- ढ +- +odim: null +model_conf: {} +use_preprocessor: true +token_type: char +bpemodel: null +non_linguistic_symbols: null +cleaner: null +g2p: g2p_en_no_space +feats_extract: fbank +feats_extract_conf: + n_fft: 1024 + hop_length: 256 + win_length: 1024 + fs: 22050 + fmin: 0 + fmax: 8000 + n_mels: 80 +normalize: global_mvn +normalize_conf: + stats_file: /speech/arun/released_models/tts/male/assamese/fastspeech2_hs/feats_stats.npz +tts: fastspeech2 +tts_conf: + adim: 384 + aheads: 2 + elayers: 4 + eunits: 1536 + dlayers: 4 + dunits: 1536 + positionwise_layer_type: conv1d + positionwise_conv_kernel_size: 3 + duration_predictor_layers: 2 + duration_predictor_chans: 256 + duration_predictor_kernel_size: 3 + postnet_layers: 5 + postnet_filts: 5 + postnet_chans: 256 + use_masking: true + use_scaled_pos_enc: true + encoder_normalize_before: true + decoder_normalize_before: true + reduction_factor: 1 + init_type: xavier_uniform + init_enc_alpha: 1.0 + init_dec_alpha: 1.0 + transformer_enc_dropout_rate: 0.2 + transformer_enc_positional_dropout_rate: 0.2 + transformer_enc_attn_dropout_rate: 0.2 + transformer_dec_dropout_rate: 0.2 + transformer_dec_positional_dropout_rate: 0.2 + transformer_dec_attn_dropout_rate: 0.2 + pitch_predictor_layers: 5 + pitch_predictor_chans: 256 + pitch_predictor_kernel_size: 5 + pitch_predictor_dropout: 0.5 + pitch_embed_kernel_size: 1 + pitch_embed_dropout: 0.0 + stop_gradient_from_pitch_predictor: true + energy_predictor_layers: 2 + energy_predictor_chans: 256 + energy_predictor_kernel_size: 3 + energy_predictor_dropout: 0.5 + energy_embed_kernel_size: 1 + energy_embed_dropout: 0.0 + stop_gradient_from_energy_predictor: false +pitch_extract: dio +pitch_extract_conf: + fs: 22050 + n_fft: 1024 + hop_length: 256 + f0max: 350 + f0min: 40 + reduction_factor: 1 +pitch_normalize: global_mvn +pitch_normalize_conf: + stats_file: /speech/arun/released_models/tts/male/assamese/fastspeech2_hs/pitch_stats.npz +energy_extract: energy +energy_extract_conf: + fs: 22050 + n_fft: 1024 + hop_length: 256 + win_length: 1024 + reduction_factor: 1 +energy_normalize: global_mvn +energy_normalize_conf: + stats_file: /speech/arun/released_models/tts/male/assamese/fastspeech2_hs/energy_stats.npz +required: +- output_dir +- token_list +version: 0.10.3a3 +distributed: true diff --git a/assamese/male/model/energy_stats.npz b/assamese/male/model/energy_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..ae5bff509f6c89f504376f42aba694df730756f2 --- /dev/null +++ b/assamese/male/model/energy_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66cf788e284146143ddc0d81738e2a4cece116bcfa0ca105a518717fee76f681 +size 770 diff --git a/assamese/male/model/feats_stats.npz b/assamese/male/model/feats_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..06adb8b0dd90a307062de061de9ac5f5e159ea3a --- /dev/null +++ b/assamese/male/model/feats_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d6c9208c60aa8fbfce176574858e5fa363ba048fb2418bae339a990592f2c4 +size 1402 diff --git a/assamese/male/model/feats_type b/assamese/male/model/feats_type new file mode 100644 index 0000000000000000000000000000000000000000..16b9d46ca2ab51e9b5f8a9e5ba31f3ef5a906ab6 --- /dev/null +++ b/assamese/male/model/feats_type @@ -0,0 +1 @@ +raw diff --git a/assamese/male/model/model.pth b/assamese/male/model/model.pth new file mode 100644 index 0000000000000000000000000000000000000000..a110dbc445ea3132489f92903bbeedb5c7bb6c51 --- /dev/null +++ b/assamese/male/model/model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e7dd206b37613770973d840f31906e19e51bb6ac771a40bbdd41afa3bce78a +size 148685818 diff --git a/assamese/male/model/pitch_stats.npz b/assamese/male/model/pitch_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..f2f467655cce74f655070df9ad5935022c20e5f4 --- /dev/null +++ b/assamese/male/model/pitch_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216a0a305e14a24fb5b1a90dc2dd44e31fe4f33efc022d1298bdd7d3890c7f2e +size 770 diff --git a/bengali/female/model/config.yaml b/bengali/female/model/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54878f7cc75f23c67781f6cfefd7d0590ea7f74b --- /dev/null +++ b/bengali/female/model/config.yaml @@ -0,0 +1,274 @@ +config: conf/tuning/train_fastspeech2.yaml +print_config: false +log_level: INFO +dry_run: false +iterator_type: sequence +output_dir: exp/tts_train_fastspeech2_raw_char_None +ngpu: 1 +seed: 0 +num_workers: 1 +num_att_plot: 3 +dist_backend: nccl +dist_init_method: env:// +dist_world_size: 2 +dist_rank: 0 +local_rank: 0 +dist_master_addr: localhost +dist_master_port: 51317 +dist_launcher: null +multiprocessing_distributed: true +unused_parameters: false +sharded_ddp: false +cudnn_enabled: true +cudnn_benchmark: false +cudnn_deterministic: true +collect_stats: false +write_collected_feats: false +max_epoch: 1000 +patience: null +val_scheduler_criterion: +- valid +- loss +early_stopping_criterion: +- valid +- loss +- min +best_model_criterion: +- - valid + - loss + - min +- - train + - loss + - min +keep_nbest_models: 5 +nbest_averaging_interval: 0 +grad_clip: 1.0 +grad_clip_type: 2.0 +grad_noise: false +accum_grad: 8 +no_forward_run: false +resume: true +train_dtype: float32 +use_amp: false +log_interval: null +use_matplotlib: true +use_tensorboard: true +use_wandb: false +wandb_project: null +wandb_id: null +wandb_entity: null +wandb_name: null +wandb_model_log_interval: -1 +detect_anomaly: false +pretrain_path: null +init_param: [] +ignore_init_mismatch: false +freeze_param: [] +num_iters_per_epoch: 800 +batch_size: 20 +valid_batch_size: null +batch_bins: 3000000 +valid_batch_bins: null +train_shape_file: +- exp/tts_stats_raw_char_None/train/text_shape.char +- exp/tts_stats_raw_char_None/train/speech_shape +valid_shape_file: +- exp/tts_stats_raw_char_None/valid/text_shape.char +- exp/tts_stats_raw_char_None/valid/speech_shape +batch_type: numel +valid_batch_type: null +fold_length: +- 150 +- 204800 +sort_in_batch: descending +sort_batch: descending +multiple_iterator: false +chunk_length: 500 +chunk_shift_ratio: 0.5 +num_cache_chunks: 1024 +train_data_path_and_name_and_type: +- - dump/raw/tr_no_dev/text + - text + - text +- - duration_info/tr_no_dev/durations + - durations + - text_int +- - dump/raw/tr_no_dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp + - energy + - npy +valid_data_path_and_name_and_type: +- - dump/raw/dev/text + - text + - text +- - duration_info/dev/durations + - durations + - text_int +- - dump/raw/dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp + - energy + - npy +allow_variable_data_keys: false +max_cache_size: 0.0 +max_cache_fd: 32 +valid_max_cache_size: null +optim: adam +optim_conf: + lr: 1.0 +scheduler: noamlr +scheduler_conf: + model_size: 384 + warmup_steps: 4000 +token_list: +- +- +- A +- a +- E +- r +- i +- n +- k +- y +- t +- b +- ',' +- s +- l +- m +- p +- u +- o +- d +- $ +- . +- ट +- j +- g +- h +- श +- C +- c +- I +- B +- थ +- ष +- ध +- ड +- ख +- ण +- D +- P +- q +- M +- ङ +- U +- ठ +- R +- घ +- ञ +- ऐ +- औ +- J +- ढ +- +odim: null +model_conf: {} +use_preprocessor: true +token_type: char +bpemodel: null +non_linguistic_symbols: null +cleaner: null +g2p: g2p_en_no_space +feats_extract: fbank +feats_extract_conf: + n_fft: 1024 + hop_length: 256 + win_length: 1024 + fs: 22050 + fmin: 0 + fmax: 8000 + n_mels: 80 +normalize: global_mvn +normalize_conf: + stats_file: /speech/arun/released_models/tts/female/bengali/fastspeech2_hs/feats_stats.npz +tts: fastspeech2 +tts_conf: + adim: 384 + aheads: 2 + elayers: 4 + eunits: 1536 + dlayers: 4 + dunits: 1536 + positionwise_layer_type: conv1d + positionwise_conv_kernel_size: 3 + duration_predictor_layers: 2 + duration_predictor_chans: 256 + duration_predictor_kernel_size: 3 + postnet_layers: 5 + postnet_filts: 5 + postnet_chans: 256 + use_masking: true + use_scaled_pos_enc: true + encoder_normalize_before: true + decoder_normalize_before: true + reduction_factor: 1 + init_type: xavier_uniform + init_enc_alpha: 1.0 + init_dec_alpha: 1.0 + transformer_enc_dropout_rate: 0.2 + transformer_enc_positional_dropout_rate: 0.2 + transformer_enc_attn_dropout_rate: 0.2 + transformer_dec_dropout_rate: 0.2 + transformer_dec_positional_dropout_rate: 0.2 + transformer_dec_attn_dropout_rate: 0.2 + pitch_predictor_layers: 5 + pitch_predictor_chans: 256 + pitch_predictor_kernel_size: 5 + pitch_predictor_dropout: 0.5 + pitch_embed_kernel_size: 1 + pitch_embed_dropout: 0.0 + stop_gradient_from_pitch_predictor: true + energy_predictor_layers: 2 + energy_predictor_chans: 256 + energy_predictor_kernel_size: 3 + energy_predictor_dropout: 0.5 + energy_embed_kernel_size: 1 + energy_embed_dropout: 0.0 + stop_gradient_from_energy_predictor: false +pitch_extract: dio +pitch_extract_conf: + fs: 22050 + n_fft: 1024 + hop_length: 256 + f0max: 400 + f0min: 80 + reduction_factor: 1 +pitch_normalize: global_mvn +pitch_normalize_conf: + stats_file: /speech/arun/released_models/tts/female/bengali/fastspeech2_hs/pitch_stats.npz +energy_extract: energy +energy_extract_conf: + fs: 22050 + n_fft: 1024 + hop_length: 256 + win_length: 1024 + reduction_factor: 1 +energy_normalize: global_mvn +energy_normalize_conf: + stats_file: /speech/arun/released_models/tts/female/bengali/fastspeech2_hs/energy_stats.npz +required: +- output_dir +- token_list +version: 0.10.7a1 +distributed: true diff --git a/bengali/female/model/energy_stats.npz b/bengali/female/model/energy_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..bfbc311ad7edb8ad2a509fc26b84a008cb5c8237 --- /dev/null +++ b/bengali/female/model/energy_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb3a5505b1549cb04e05687fcd81f0ef06361f67c61592097f9628d6fd6aa06 +size 770 diff --git a/bengali/female/model/feats_stats.npz b/bengali/female/model/feats_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..374a6b7c041653bc67a1449cc5366067833893fa --- /dev/null +++ b/bengali/female/model/feats_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6299d70bc5b2a185c786cb678d8e526bf41464a32bee9ccf85d7aef27205224b +size 1402 diff --git a/bengali/female/model/feats_type b/bengali/female/model/feats_type new file mode 100644 index 0000000000000000000000000000000000000000..16b9d46ca2ab51e9b5f8a9e5ba31f3ef5a906ab6 --- /dev/null +++ b/bengali/female/model/feats_type @@ -0,0 +1 @@ +raw diff --git a/bengali/female/model/model.pth b/bengali/female/model/model.pth new file mode 100644 index 0000000000000000000000000000000000000000..1b966d4846af91bd5334f70278f0b687458da685 --- /dev/null +++ b/bengali/female/model/model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb26e66d94ff5ee854f8f809f7ff8eb9761a3e7afe2742cb00cdd8b5ee4b098 +size 148681929 diff --git a/bengali/female/model/pitch_stats.npz b/bengali/female/model/pitch_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..c40aa1371e216e7a92356c4ec18acf5bc396f5c7 --- /dev/null +++ b/bengali/female/model/pitch_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90624660ddb2568a44ee68b7887ab4335df6f73a57f38b6d11912ce38a349caf +size 770 diff --git a/bengali/male/model/config.yaml b/bengali/male/model/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..346a6c585b29a44bf1504d6603029cee27606063 --- /dev/null +++ b/bengali/male/model/config.yaml @@ -0,0 +1,272 @@ +config: conf/tuning/train_fastspeech2.yaml +print_config: false +log_level: INFO +dry_run: false +iterator_type: sequence +output_dir: exp/tts_train_fastspeech2_raw_char_None +ngpu: 1 +seed: 0 +num_workers: 1 +num_att_plot: 3 +dist_backend: nccl +dist_init_method: env:// +dist_world_size: 4 +dist_rank: 0 +local_rank: 0 +dist_master_addr: localhost +dist_master_port: 40623 +dist_launcher: null +multiprocessing_distributed: true +unused_parameters: false +sharded_ddp: false +cudnn_enabled: true +cudnn_benchmark: false +cudnn_deterministic: true +collect_stats: false +write_collected_feats: false +max_epoch: 1000 +patience: null +val_scheduler_criterion: +- valid +- loss +early_stopping_criterion: +- valid +- loss +- min +best_model_criterion: +- - valid + - loss + - min +- - train + - loss + - min +keep_nbest_models: 5 +grad_clip: 1.0 +grad_clip_type: 2.0 +grad_noise: false +accum_grad: 8 +no_forward_run: false +resume: true +train_dtype: float32 +use_amp: false +log_interval: null +use_tensorboard: true +use_wandb: false +wandb_project: null +wandb_id: null +wandb_entity: null +wandb_name: null +wandb_model_log_interval: -1 +detect_anomaly: false +pretrain_path: null +init_param: [] +ignore_init_mismatch: false +freeze_param: [] +num_iters_per_epoch: 800 +batch_size: 20 +valid_batch_size: null +batch_bins: 3000000 +valid_batch_bins: null +train_shape_file: +- exp/tts_stats_raw_char_None/train/text_shape.char +- exp/tts_stats_raw_char_None/train/speech_shape +valid_shape_file: +- exp/tts_stats_raw_char_None/valid/text_shape.char +- exp/tts_stats_raw_char_None/valid/speech_shape +batch_type: numel +valid_batch_type: null +fold_length: +- 150 +- 204800 +sort_in_batch: descending +sort_batch: descending +multiple_iterator: false +chunk_length: 500 +chunk_shift_ratio: 0.5 +num_cache_chunks: 1024 +train_data_path_and_name_and_type: +- - dump/raw/tr_no_dev/text + - text + - text +- - duration_info/tr_no_dev/durations + - durations + - text_int +- - dump/raw/tr_no_dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp + - energy + - npy +valid_data_path_and_name_and_type: +- - dump/raw/dev/text + - text + - text +- - duration_info/dev/durations + - durations + - text_int +- - dump/raw/dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp + - energy + - npy +allow_variable_data_keys: false +max_cache_size: 0.0 +max_cache_fd: 32 +valid_max_cache_size: null +optim: adam +optim_conf: + lr: 1.0 +scheduler: noamlr +scheduler_conf: + model_size: 384 + warmup_steps: 4000 +token_list: +- +- +- A +- a +- E +- r +- i +- n +- k +- y +- t +- b +- s +- l +- m +- p +- u +- o +- d +- $ +- . +- ',' +- ट +- j +- g +- h +- श +- C +- c +- I +- B +- ष +- थ +- ध +- ख +- ड +- ण +- D +- P +- q +- M +- ङ +- U +- ठ +- R +- घ +- ञ +- ऐ +- औ +- J +- ढ +- +odim: null +model_conf: {} +use_preprocessor: true +token_type: char +bpemodel: null +non_linguistic_symbols: null +cleaner: null +g2p: g2p_en_no_space +feats_extract: fbank +feats_extract_conf: + n_fft: 1024 + hop_length: 256 + win_length: 1024 + fs: 22050 + fmin: 0 + fmax: 8000 + n_mels: 80 +normalize: global_mvn +normalize_conf: + stats_file: /speech/arun/released_models/tts/male/bengali/fastspeech2_hs/feats_stats.npz +tts: fastspeech2 +tts_conf: + adim: 384 + aheads: 2 + elayers: 4 + eunits: 1536 + dlayers: 4 + dunits: 1536 + positionwise_layer_type: conv1d + positionwise_conv_kernel_size: 3 + duration_predictor_layers: 2 + duration_predictor_chans: 256 + duration_predictor_kernel_size: 3 + postnet_layers: 5 + postnet_filts: 5 + postnet_chans: 256 + use_masking: true + use_scaled_pos_enc: true + encoder_normalize_before: true + decoder_normalize_before: true + reduction_factor: 1 + init_type: xavier_uniform + init_enc_alpha: 1.0 + init_dec_alpha: 1.0 + transformer_enc_dropout_rate: 0.2 + transformer_enc_positional_dropout_rate: 0.2 + transformer_enc_attn_dropout_rate: 0.2 + transformer_dec_dropout_rate: 0.2 + transformer_dec_positional_dropout_rate: 0.2 + transformer_dec_attn_dropout_rate: 0.2 + pitch_predictor_layers: 5 + pitch_predictor_chans: 256 + pitch_predictor_kernel_size: 5 + pitch_predictor_dropout: 0.5 + pitch_embed_kernel_size: 1 + pitch_embed_dropout: 0.0 + stop_gradient_from_pitch_predictor: true + energy_predictor_layers: 2 + energy_predictor_chans: 256 + energy_predictor_kernel_size: 3 + energy_predictor_dropout: 0.5 + energy_embed_kernel_size: 1 + energy_embed_dropout: 0.0 + stop_gradient_from_energy_predictor: false +pitch_extract: dio +pitch_extract_conf: + fs: 22050 + n_fft: 1024 + hop_length: 256 + f0max: 350 + f0min: 40 + reduction_factor: 1 +pitch_normalize: global_mvn +pitch_normalize_conf: + stats_file: /speech/arun/released_models/tts/male/bengali/fastspeech2_hs/pitch_stats.npz +energy_extract: energy +energy_extract_conf: + fs: 22050 + n_fft: 1024 + hop_length: 256 + win_length: 1024 + reduction_factor: 1 +energy_normalize: global_mvn +energy_normalize_conf: + stats_file: /speech/arun/released_models/tts/male/bengali/fastspeech2_hs/energy_stats.npz +required: +- output_dir +- token_list +version: 0.10.3a3 +distributed: true diff --git a/bengali/male/model/energy_stats.npz b/bengali/male/model/energy_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..466480b9fd12fa99ea8a10737fee8f2c66cbca05 --- /dev/null +++ b/bengali/male/model/energy_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e315fb35f6df786f93b85135af1d33d3536a963cafc6a253379014bacd86af +size 770 diff --git a/bengali/male/model/feats_stats.npz b/bengali/male/model/feats_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..118b311919dcff23c75a8cd20e543c43a4d43010 --- /dev/null +++ b/bengali/male/model/feats_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc564e7dbf6feb83ce81a81ddf1e5b88b86f7de313c5b864820db0d35a639f3a +size 1402 diff --git a/bengali/male/model/feats_type b/bengali/male/model/feats_type new file mode 100644 index 0000000000000000000000000000000000000000..16b9d46ca2ab51e9b5f8a9e5ba31f3ef5a906ab6 --- /dev/null +++ b/bengali/male/model/feats_type @@ -0,0 +1 @@ +raw diff --git a/bengali/male/model/model.pth b/bengali/male/model/model.pth new file mode 100644 index 0000000000000000000000000000000000000000..44bbd14cd787ad0e631a357026e1f7d3250d24df --- /dev/null +++ b/bengali/male/model/model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:465c2a5b13bcb4d0b0c33ef947810dade8f264acfe3d673d3582a5ac86e6aff5 +size 148685817 diff --git a/bengali/male/model/pitch_stats.npz b/bengali/male/model/pitch_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..9c994dcad98dca60c1c9ae5e17dfaf28960fc637 --- /dev/null +++ b/bengali/male/model/pitch_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd07268ce13cb540cb0a8ed76dcd9ae7df5251e82940c170a662aad471227dfa +size 770 diff --git a/bodo/female/model/config.yaml b/bodo/female/model/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb04a5fd7abc356dfd277a9404349bbde34de9a2 --- /dev/null +++ b/bodo/female/model/config.yaml @@ -0,0 +1,280 @@ +accum_grad: 8 +allow_variable_data_keys: false +batch_bins: 3000000 +batch_size: 20 +batch_type: numel +best_model_criterion: +- - valid + - loss + - min +- - train + - loss + - min +bpemodel: null +chunk_length: 500 +chunk_shift_ratio: 0.5 +cleaner: null +collect_stats: false +config: conf/tuning/train_fastspeech2.yaml +cudnn_benchmark: false +cudnn_deterministic: true +cudnn_enabled: true +detect_anomaly: false +dist_backend: nccl +dist_init_method: env:// +dist_launcher: null +dist_master_addr: localhost +dist_master_port: 37083 +dist_rank: 0 +dist_world_size: 2 +distributed: true +dry_run: false +early_stopping_criterion: +- valid +- loss +- min +energy_extract: energy +energy_extract_conf: + fs: 22050 + hop_length: 256 + n_fft: 1024 + reduction_factor: 1 + win_length: 1024 +energy_normalize: global_mvn +energy_normalize_conf: + stats_file: /home/speech/Fastspeech2_HS/bodo/female/model/energy_stats.npz +feats_extract: fbank +feats_extract_conf: + fmax: 8000 + fmin: 0 + fs: 22050 + hop_length: 256 + n_fft: 1024 + n_mels: 80 + win_length: 1024 +fold_length: +- 150 +- 204800 +freeze_param: [] +g2p: g2p_en_no_space +grad_clip: 1.0 +grad_clip_type: 2.0 +grad_noise: false +ignore_init_mismatch: false +init_param: [] +iterator_type: sequence +keep_nbest_models: 5 +local_rank: 0 +log_interval: null +log_level: INFO +max_cache_fd: 32 +max_cache_size: 0.0 +max_epoch: 1000 +model_conf: {} +multiple_iterator: false +multiprocessing_distributed: true +nbest_averaging_interval: 0 +ngpu: 1 +no_forward_run: false +non_linguistic_symbols: null +normalize: global_mvn +normalize_conf: + stats_file: /home/speech/Fastspeech2_HS/bodo/female/model/feats_stats.npz +num_att_plot: 3 +num_cache_chunks: 1024 +num_iters_per_epoch: 800 +num_workers: 1 +odim: null +optim: adam +optim_conf: + lr: 1.0 +output_dir: exp/tts_train_fastspeech2_raw_char_None +patience: null +pitch_extract: dio +pitch_extract_conf: + f0max: 400 + f0min: 80 + fs: 22050 + hop_length: 256 + n_fft: 1024 + reduction_factor: 1 +pitch_normalize: global_mvn +pitch_normalize_conf: + stats_file: /home/speech/Fastspeech2_HS/bodo/female/model/pitch_stats.npz +pretrain_path: null +print_config: false +required: +- output_dir +- token_list +resume: true +scheduler: noamlr +scheduler_conf: + model_size: 384 + warmup_steps: 4000 +seed: 0 +sharded_ddp: false +sort_batch: descending +sort_in_batch: descending +token_list: +- +- +- A +- n +- o +- i +- b +- r +- y +- q +- s +- a +- m +- ',' +- g +- j +- "\u0916" +- l +- d +- E +- "\u0925" +- "\u0910" +- h +- u +- $ +- . +- w +- P +- "\u0914" +- "\u0919" +- t +- k +- "\u091F" +- p +- I +- "\u0921" +- U +- B +- "\u0927" +- "\u0937" +- c +- "\u0936" +- "\u0923" +- H +- R +- C +- "\u0918" +- "\u0920" +- "\u0D7D" +- "\u090D" +- Y +- D +- "\u0911" +- "\u0928" +- J +- z +- "\u091E" +- +token_type: char +train_data_path_and_name_and_type: +- - dump/raw/tr_no_dev/text + - text + - text +- - duration_info/tr_no_dev/durations + - durations + - text_int +- - dump/raw/tr_no_dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp + - energy + - npy +train_dtype: float32 +train_shape_file: +- exp/tts_stats_raw_char_None/train/text_shape.char +- exp/tts_stats_raw_char_None/train/speech_shape +tts: fastspeech2 +tts_conf: + adim: 384 + aheads: 2 + decoder_normalize_before: true + dlayers: 4 + dunits: 1536 + duration_predictor_chans: 256 + duration_predictor_kernel_size: 3 + duration_predictor_layers: 2 + elayers: 4 + encoder_normalize_before: true + energy_embed_dropout: 0.0 + energy_embed_kernel_size: 1 + energy_predictor_chans: 256 + energy_predictor_dropout: 0.5 + energy_predictor_kernel_size: 3 + energy_predictor_layers: 2 + eunits: 1536 + init_dec_alpha: 1.0 + init_enc_alpha: 1.0 + init_type: xavier_uniform + pitch_embed_dropout: 0.0 + pitch_embed_kernel_size: 1 + pitch_predictor_chans: 256 + pitch_predictor_dropout: 0.5 + pitch_predictor_kernel_size: 5 + pitch_predictor_layers: 5 + positionwise_conv_kernel_size: 3 + positionwise_layer_type: conv1d + postnet_chans: 256 + postnet_filts: 5 + postnet_layers: 5 + reduction_factor: 1 + stop_gradient_from_energy_predictor: false + stop_gradient_from_pitch_predictor: true + transformer_dec_attn_dropout_rate: 0.2 + transformer_dec_dropout_rate: 0.2 + transformer_dec_positional_dropout_rate: 0.2 + transformer_enc_attn_dropout_rate: 0.2 + transformer_enc_dropout_rate: 0.2 + transformer_enc_positional_dropout_rate: 0.2 + use_masking: true + use_scaled_pos_enc: true +unused_parameters: false +use_amp: false +use_matplotlib: true +use_preprocessor: true +use_tensorboard: true +use_wandb: false +val_scheduler_criterion: +- valid +- loss +valid_batch_bins: null +valid_batch_size: null +valid_batch_type: null +valid_data_path_and_name_and_type: +- - dump/raw/dev/text + - text + - text +- - duration_info/dev/durations + - durations + - text_int +- - dump/raw/dev/wav.scp + - speech + - sound +- - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp + - pitch + - npy +- - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp + - energy + - npy +valid_max_cache_size: null +valid_shape_file: +- exp/tts_stats_raw_char_None/valid/text_shape.char +- exp/tts_stats_raw_char_None/valid/speech_shape +version: 0.10.7a1 +wandb_entity: null +wandb_id: null +wandb_model_log_interval: -1 +wandb_name: null +wandb_project: null +write_collected_feats: false diff --git a/bodo/female/model/energy_stats.npz b/bodo/female/model/energy_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..c695a65dd431d2187c0a54d81cd9f41ff0aeb267 --- /dev/null +++ b/bodo/female/model/energy_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61fc80f7e22eb170fefaa3efc4ca1fc347b92153873135cc49f768f9f767983 +size 770 diff --git a/bodo/female/model/feats_stats.npz b/bodo/female/model/feats_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..71d22ca81c821fede7538c82c2f5b08ab8c3bc5a --- /dev/null +++ b/bodo/female/model/feats_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d4aa3b2ba214d6b6674f42fbe6ce06da97c36dcc7249914b2689faffcb0dba +size 1402 diff --git a/bodo/female/model/feats_type b/bodo/female/model/feats_type new file mode 100644 index 0000000000000000000000000000000000000000..16b9d46ca2ab51e9b5f8a9e5ba31f3ef5a906ab6 --- /dev/null +++ b/bodo/female/model/feats_type @@ -0,0 +1 @@ +raw diff --git a/bodo/female/model/model.pth b/bodo/female/model/model.pth new file mode 100644 index 0000000000000000000000000000000000000000..df5eca91836aa0801063e8fce979a8b931b904ea --- /dev/null +++ b/bodo/female/model/model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666bdd54c1ed4551ca7be9f919db21938d783c7bebd7939cd893727282a05d0c +size 148691145 diff --git a/bodo/female/model/pitch_stats.npz b/bodo/female/model/pitch_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..64cf9626dbc2567789b703bd8b23a1a760a4c9f6 --- /dev/null +++ b/bodo/female/model/pitch_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f9dcb33f0d679fb28039ab78883d34e10b5b22a8699d1ac35c2b48a2a69578b +size 770 diff --git a/charmap/Text_Cleaning.ipynb b/charmap/Text_Cleaning.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1d728683e3fbe0b5f19aa81a2a316bda36aec96c --- /dev/null +++ b/charmap/Text_Cleaning.ipynb @@ -0,0 +1,332 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "############################################################\n", + "#Author : Bhagyashree\n", + "#Date : 1st Sept, 2020\n", + "#Purpose : Text Cleaning\n", + "#Input : Text file after timestamp removal\n", + "#Output : Text file after cleaning data\n", + "############################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import nltk\n", + "import numpy\n", + "import xlrd\n", + "import openpyxl \n", + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "अब हम ऑलट्रेशन ऑफ मेमोरेंडम के बारे में बात करेंगे।\n", + " मेमोरेंडम के विषयों को बदल दिया जा सकता है।\n", + "कंपनी अधिनियम 1956 के तहत\n", + "निम्नलिखित प्रक्रिया के अनुसार\n", + "यदि कोई कंपनी अपना नाम बदलना चाहती है,\n", + "तो वे विशेष संकल्प में ऐसा कर सकती है।\n", + "और केंद्र सरकार की मंजूरी के साथ\n", + "लिखित में\n", + "हालाँकि, ऐसी किसी भी मंजूरी की आवश्यकता नहीं होती है\n", + "केवल कंपनी के नाम में परिवर्तन के लिए\n", + "बल्कि यह प्रक्रिया\n", + "पूरी तरह से निजी शब्द को हटाने के लिए होती है।\n", + "तो इस मामले में\n", + "एक सार्वजनिक कंपनी को एक निजी कंपनी के संबंध में,\n", + "या एक निजी कंपनी को एक सार्वजनिक कंपनी में परिवर्तित करने\n", + "दूसरी जगह पर पंजीकृत कार्यालय का दोबारा परिवर्तन किया जाना है\n", + "अगर एक ही शहर, कस्बे\n", + "या गाँव में एक जगह से\n", + "30 दिनों का नोटिस दिया जाना होता है।\n", + "यह रजिस्ट्रार द्वारा परिवर्तन की तिथि के बाद\n", + "होगा जो उसी को दर्ज करेगा।\n", + "पंजीकृत कार्यालय के परिवर्तन के मामले में\n", + "जबकि एक ही राज्य में 1 शहर से दूसरे शहर में,\n", + "विशेष प्रस्ताव पारित किया जाना आवश्यक है।\n", + "शेयरधारकों की आम बैठक में\n", + "और इसकी एक प्रति रजिस्ट्रार के पास कार्यालय को\n", + "30 दिनों के भीतर बदलने के दाखिल करनी होगी।\n", + "नोटिस देना होगा।\n", + "रजिस्ट्रार को कार्यालय के नये पते का\n", + "तब तक परिवर्तन प्रभावी नहीं होगा।\n", + "जब तक केंद्र सरकार द्वारा प्रस्ताव की पुष्टि नहीं हो जाती\n", + "कंपनी किसी भी आधार पर\n", + "उपधारा 1 से\n", + "उपधारा 7 में धारा 17 तक उल्लेखित है।\n", + "जैसा कि अधिनियम के बाद ही परिवर्तन प्रभावी होगा।\n", + "अपने उद्देश्यों में बदलाव कर सकती है\n", + "सामान्य बैठक में सदस्यों द्वारा\n", + "कंपनी संशोधन अधिनियम 1996 को मेमोरेंडम ऑफ एसोसिएशन के\n", + "केंद्र सरकार के प्रतिबंध के साथ समाप्त कर दिया गया है।\n", + "विशेष संकल्प के अनुरूप अनुमोदित किए जाने\n", + "ऑब्जेक्ट क्लॉज में परिवर्तन के प्रति शेयर पूंजी में\n", + "परिवर्तन की प्रक्रिया और ऐसे परिवर्तन करने के अधिकार।\n", + "समान्य पहलु को आर्टीकल ऑफ एसोसिएशन में रखा गया है।\n", + "यदि संबंधित विषय के\n", + " आर्टीकल ऑफ एसोसिएशन में पीछे की प्रक्रिया नहीं दी गई है।\n", + "कंपनी को पारित करके आर्टीकल ऑफ एसोसिएशन को बदलना होगा।\n", + "एक विशेष प्रस्ताव\n", + "अब, हम ऑलट्रेशन ऑफ पर चर्चा करेंगे।\n", + "कंपनी अधिनियम 2013 के तहत मेमोरेंडम\n", + "अधिनियम की धारा 16, 17, 18, 19,\n", + "1956 के कंपनी 21, 23 और 37 के अनुरूप।\n", + "कंपनी अधिनियम, 2013 की धारा 13 ने\n", + " ऑलट्रेशन ऑफ मेमोरेंडम के लिए मानदंड निर्धारित किए हैं।\n", + "यह वही वर्णन करता है कि जैसा कि धारा 61 में दिया गया है।\n", + "एक कंपनी विशेष संकल्प द्वारा\n", + "और प्रावधानों को बदल सकती है।\n", + "निर्दिष्ट प्रक्रिया के अनुपालन से मेमोरेंडम\n", + "नाम खंड के परिवर्तन के संबंध में,\n", + "कंपनी अपना नाम बदल सकती है।\n", + "अनुमोदन के बाद लिखित रूप में केंद्र सरकार द्वारा\n", + "इसलिए जब कंपनी के नाम में कोई बदलाव किया जाता है,\n", + "रजिस्टर में दर्ज करेगा।\n", + " तो रजिस्ट्रार कंपनी के नया नाम\n", + "पुराने नाम के स्थान पर\n", + "निगमन का एक नया प्रमाणपत्र जारी करेगा।\n", + "कंपनी के पंजीकृत कार्यालय के बारे में\n", + "जैसा कि पहले उल्लेख किया गया है,\n", + "तब तक कोई प्रभाव नहीं पड़ेगा जब तक कि वह केंद्र सरकार द्वारा अनुमोदित नहीं हो जाता।\n", + "ऑब्जेक्ट क्लॉज के किसी भी परिवर्तन के संबंध में,\n", + "किसी कंपनी के मेमोरेंडम के\n", + "रजिस्ट्रार को दाखिल करने की\n", + "30 दिनों की अवधि के भीतर\n", + "तारीख से पंजीकरण को प्रमाणित करना होता है।\n", + "इस विशेष प्रस्ताव खंड के उप खंड 6 के खंड A के अनुसार\n", + "चलिए फिर से संक्षेप में\n", + " ऑलट्रेशन ऑफ मेमोरेंडम के महत्व के बारे में चर्चा करते हैं\n", + " क्योंकि ये इस मॉड्यूल के अध्ययन का एक बहुत महत्वपूर्ण हिस्सा है।\n", + "पहला महत्व यह है\n", + "कि यह एक बहुत ही आवश्यक दस्तावेज है।\n", + "एक कंपनी के निगमन के लिए\n", + "दूसरा, यह कंपनी के पंजीकृत कार्यालय को निर्दिष्ट करते हुए\n", + "रजिस्ट्रार और अदालत के अधिकार क्षेत्र को निर्धारित करता है।\n", + "तीसरा, यह कंपनी के अधिकारों में उद्देश्यों को\n", + "जनता की जानकारी के लिए दर्ज करता है।\n", + "अगला, यह कंपनी को केवल उन कृत्यों को करने के लिए\n", + "बाध्य करता है जो कंपनी के ऑब्जेक्ट क्लॉज में शामिल हैं।\n", + "यह कंपनी की अधिकृत कैपिटल\n", + "और उसके विभाजन को निश्चित राशि के शेयरों में निर्दिष्ट करता है।\n", + "पर प्रकाश डालता है।\n", + "यह कंपनी के सदस्यों के लायबिलिटी\n", + "अंत में, संघ के नियमों को\n", + "यह एक कंपनी के भी नियंत्रित करता है।\n" + ] + } + ], + "source": [ + "file1 = open(\"recent_deliverables_dec2020/Corporate_Law/Hindi/ankita objects 02_Hindi_new.txt\",\"r+\",encoding='utf-8') \n", + "data = file1.read()\n", + "print(data)\n", + "file1.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "अब हम ऑलट्रेशन ऑफ मेमोरेंडम के बारे में बात करेंगे\n", + " मेमोरेंडम के विषयों को बदल दिया जा सकता है\n", + "कंपनी अधिनियम वन नाइन फाइव सिक्स के तहत\n", + "निम्नलिखित प्रक्रिया के अनुसार\n", + "यदि कोई कंपनी अपना नाम बदलना चाहती है\n", + "तो वे विशेष संकल्प में ऐसा कर सकती है\n", + "और केंद्र सरकार की मंजूरी के साथ\n", + "लिखित में\n", + "हालाँकि ऐसी किसी भी मंजूरी की आवश्यकता नहीं होती है\n", + "केवल कंपनी के नाम में परिवर्तन के लिए\n", + "बल्कि यह प्रक्रिया\n", + "पूरी तरह से निजी शब्द को हटाने के लिए होती है\n", + "तो इस मामले में\n", + "एक सार्वजनिक कंपनी को एक निजी कंपनी के संबंध में\n", + "या एक निजी कंपनी को एक सार्वजनिक कंपनी में परिवर्तित करने\n", + "दूसरी जगह पर पंजीकृत कार्यालय का दोबारा परिवर्तन किया जाना है\n", + "अगर एक ही शहर कस्बे\n", + "या गाँव में एक जगह से\n", + " थ्री ज़ीरो दिनों का नोटिस दिया जाना होता है\n", + "यह रजिस्ट्रार द्वारा परिवर्तन की तिथि के बाद\n", + "होगा जो उसी को दर्ज करेगा\n", + "पंजीकृत कार्यालय के परिवर्तन के मामले में\n", + "जबकि एक ही राज्य में वन शहर से दूसरे शहर में\n", + "विशेष प्रस्ताव पारित किया जाना आवश्यक है\n", + "शेयरधारकों की आम बैठक में\n", + "और इसकी एक प्रति रजिस्ट्रार के पास कार्यालय को\n", + " थ्री ज़ीरो दिनों के भीतर बदलने के दाखिल करनी होगी\n", + "नोटिस देना होगा\n", + "रजिस्ट्रार को कार्यालय के नये पते का\n", + "तब तक परिवर्तन प्रभावी नहीं होगा\n", + "जब तक केंद्र सरकार द्वारा प्रस्ताव की पुष्टि नहीं हो जाती\n", + "कंपनी किसी भी आधार पर\n", + "उपधारा वन से\n", + "उपधारा सेवेन में धारा वन सेवेन तक उल्लेखित है\n", + "जैसा कि अधिनियम के बाद ही परिवर्तन प्रभावी होगा\n", + "अपने उद्देश्यों में बदलाव कर सकती है\n", + "सामान्य बैठक में सदस्यों द्वारा\n", + "कंपनी संशोधन अधिनियम वन नाइन नाइन सिक्स को मेमोरेंडम ऑफ एसोसिएशन के\n", + "केंद्र सरकार के प्रतिबंध के साथ समाप्त कर दिया गया है\n", + "विशेष संकल्प के अनुरूप अनुमोदित किए जाने\n", + "ऑब्जेक्ट क्लॉज में परिवर्तन के प्रति शेयर पूंजी में\n", + "परिवर्तन की प्रक्रिया और ऐसे परिवर्तन करने के अधिकार\n", + "समान्य पहलु को आर्टीकल ऑफ एसोसिएशन में रखा गया है\n", + "यदि संबंधित विषय के\n", + " आर्टीकल ऑफ एसोसिएशन में पीछे की प्रक्रिया नहीं दी गई है\n", + "कंपनी को पारित करके आर्टीकल ऑफ एसोसिएशन को बदलना होगा\n", + "एक विशेष प्रस्ताव\n", + "अब हम ऑलट्रेशन ऑफ पर चर्चा करेंगे\n", + "कंपनी अधिनियम टू ज़ीरो वन थ्री के तहत मेमोरेंडम\n", + "अधिनियम की धारा वन सिक्स वन सेवेन वन ऐइट वन नाइन \n", + " वन नाइन फाइव सिक्स के कंपनी टू वन टू थ्री और थ्री सेवेन के अनुरूप\n", + "कंपनी अधिनियम टू ज़ीरो वन थ्री की धारा वन थ्री ने\n", + " ऑलट्रेशन ऑफ मेमोरेंडम के लिए मानदंड निर्धारित किए हैं\n", + "यह वही वर्णन करता है कि जैसा कि धारा सिक्स वन में दिया गया है\n", + "एक कंपनी विशेष संकल्प द्वारा\n", + "और प्रावधानों को बदल सकती है\n", + "निर्दिष्ट प्रक्रिया के अनुपालन से मेमोरेंडम\n", + "नाम खंड के परिवर्तन के संबंध में\n", + "कंपनी अपना नाम बदल सकती है\n", + "अनुमोदन के बाद लिखित रूप में केंद्र सरकार द्वारा\n", + "इसलिए जब कंपनी के नाम में कोई बदलाव किया जाता है\n", + "रजिस्टर में दर्ज करेगा\n", + " तो रजिस्ट्रार कंपनी के नया नाम\n", + "पुराने नाम के स्थान पर\n", + "निगमन का एक नया प्रमाणपत्र जारी करेगा\n", + "कंपनी के पंजीकृत कार्यालय के बारे में\n", + "जैसा कि पहले उल्लेख किया गया है\n", + "तब तक कोई प्रभाव नहीं पड़ेगा जब तक कि वह केंद्र सरकार द्वारा अनुमोदित नहीं हो जाता\n", + "ऑब्जेक्ट क्लॉज के किसी भी परिवर्तन के संबंध में\n", + "किसी कंपनी के मेमोरेंडम के\n", + "रजिस्ट्रार को दाखिल करने की\n", + " थ्री ज़ीरो दिनों की अवधि के भीतर\n", + "तारीख से पंजीकरण को प्रमाणित करना होता है\n", + "इस विशेष प्रस्ताव खंड के उप खंड सिक्स के खंड ए के अनुसार\n", + "चलिए फिर से संक्षेप में\n", + " ऑलट्रेशन ऑफ मेमोरेंडम के महत्व के बारे में चर्चा करते हैं\n", + " क्योंकि ये इस मॉड्यूल के अध्ययन का एक बहुत महत्वपूर्ण हिस्सा है\n", + "पहला महत्व यह है\n", + "कि यह एक बहुत ही आवश्यक दस्तावेज है\n", + "एक कंपनी के निगमन के लिए\n", + "दूसरा यह कंपनी के पंजीकृत कार्यालय को निर्दिष्ट करते हुए\n", + "रजिस्ट्रार और अदालत के अधिकार क्षेत्र को निर्धारित करता है\n", + "तीसरा यह कंपनी के अधिकारों में उद्देश्यों को\n", + "जनता की जानकारी के लिए दर्ज करता है\n", + "अगला यह कंपनी को केवल उन कृत्यों को करने के लिए\n", + "बाध्य करता है जो कंपनी के ऑब्जेक्ट क्लॉज में शामिल हैं\n", + "यह कंपनी की अधिकृत कैपिटल\n", + "और उसके विभाजन को निश्चित राशि के शेयरों में निर्दिष्ट करता है\n", + "पर प्रकाश डालता है\n", + "यह कंपनी के सदस्यों के लायबिलिटी\n", + "अंत में संघ के नियमों को\n", + "यह एक कंपनी के भी नियंत्रित करता है\n" + ] + } + ], + "source": [ + "\n", + "wb_obj = openpyxl.load_workbook(\"charmap_v2_new.xlsx\") \n", + "sheet_obj = wb_obj.active \n", + "\n", + "#data = re.sub('[A-Z]*', '',data)\n", + "#print(data)\n", + "data = data.replace('?','')\n", + "data = data.replace(' ',' ')\n", + "data = data.replace(';','')\n", + "data = data.replace(')','')\n", + "data = data.replace('(','')\n", + "data = data.replace('!','')\n", + "data = data.replace(' – ',' ')\n", + "data = data.replace('-',' ')\n", + "data = data.replace('।','')\n", + "data = data.replace('&','')\n", + "data = data.replace('’','')\n", + "data = data.replace('‘','')\n", + "data = data.replace(':','')\n", + "data = data.replace(',','')\n", + "data = data.replace('/','')\n", + "data = data.replace(',','')\n", + "data = data.replace('.','')\n", + "data = data.replace('|','')\n", + "m_row = sheet_obj.max_row \n", + "line = data\n", + "\n", + "for i in range(1,m_row+1):\n", + " num = sheet_obj.cell(row = i, column = 1).value \n", + " word = sheet_obj.cell(row = i, column = 2).value\n", + " #print(num)\n", + " #print(word)\n", + " line = line.replace(str(num), word)\n", + "#print(line)\n", + "#' '.join(line.split())\n", + "print(line) \n", + "file1 = open(\"recent_deliverables_dec2020/Corporate_Law/Hindi/ankita objects 02_Hindi.txt\",\"w+\",encoding='utf-8') \n", + "file1.write(line)\n", + "file1.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/charmap/Text_Cleaning.py b/charmap/Text_Cleaning.py new file mode 100644 index 0000000000000000000000000000000000000000..c3c8bf47fcb82f2c3234e5a07480fe1b46bdd4a1 --- /dev/null +++ b/charmap/Text_Cleaning.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[2]: + + +############################################################ +#Author : Bhagyashree +#Date : 1st Sept, 2020 +#Purpose : Text Cleaning +#Input : Text file after timestamp removal +#Output : Text file after cleaning data +############################################################ + + +# In[3]: + + +import nltk +import numpy +import xlrd +import openpyxl +import re +import sys + +# In[21]: + +#file1 = open("recent_deliverables_dec2020/Corporate_Law/Hindi/ankita objects 02_Hindi_new.txt","r+",encoding='utf-8') +file1 = open(sys.argv[1],"r+",encoding='utf-8') +data = file1.read() +#print(data) +file1.close() + + +# In[22]: + +wb_obj = openpyxl.load_workbook(sys.argv[2]) +sheet_obj = wb_obj.active + +#data = re.sub('[A-Z]*', '',data) +#print(data) +data = data.replace('?','') +data = data.replace(' ',' ') +data = data.replace(';','') +data = data.replace(')','') +data = data.replace('(','') +data = data.replace('!','') +data = data.replace(' – ',' ') +data = data.replace('-',' ') +data = data.replace('।','') +data = data.replace('&','') +data = data.replace('’','') +data = data.replace('‘','') +data = data.replace(':','') +data = data.replace(',','') +data = data.replace('/','') +data = data.replace(',','') +data = data.replace('.','') +data = data.replace('|','') +m_row = sheet_obj.max_row +line = data + +for i in range(1,m_row+1): + num = sheet_obj.cell(row = i, column = 1).value + word = sheet_obj.cell(row = i, column = 2).value + #print(num) + #print(word) + line = line.replace(str(num), word) +#print(line) +#' '.join(line.split()) +#print(line) +file1 = open(sys.argv[3],"w+",encoding='utf-8') +#file1 = open("recent_deliverables_dec2020/Corporate_Law/Hindi/ankita objects 02_Hindi.txt","w+",encoding='utf-8') +file1.write(line) +file1.close() + diff --git a/charmap/charmap_Bengali.txt b/charmap/charmap_Bengali.txt new file mode 100644 index 0000000000000000000000000000000000000000..ce4ea7bf623db562008f5b78d13efe26dc670051 --- /dev/null +++ b/charmap/charmap_Bengali.txt @@ -0,0 +1,62 @@ +0 জিরো +1 ওয়ান +2 টু +3 থ্রী +4 ফোর +5 ফাইভ +6 সিক্স +7 সেবন +8 এইট +9 নাইন +A এ +a এ +B বী +b বী +C সী +c সী +D ডি +d ডি +E ই +e ই +F এফ +f এফ +G জি +g জি +H এছ +h এছ +I আই +i আই +J জে +j জে +K কে +k কে +L এল +l এল +M এম +m এম +N এন +n এন +O ও +o ও +P পি +p পি +Q ক্যু +q ক্যু +R আর +r আর +S এস +s এস +T টি +t টি +U ইউ +u ইউ +V ভি +v ভি +W ডাবলু +w ডাবলু +X এক্স +x এক্স +Y উহাই +y উহাই +Z জেডঃ +z জেডঃ \ No newline at end of file diff --git a/charmap/charmap_Hindi.txt b/charmap/charmap_Hindi.txt new file mode 100644 index 0000000000000000000000000000000000000000..57e4da1d7558995157dc694138b6ac8b4a0ff236 --- /dev/null +++ b/charmap/charmap_Hindi.txt @@ -0,0 +1,81 @@ +0 ज़ीरो +1 वन +2 टू +3 थ्री +4 फोर +5 फाइव +6 सिक्स +7 सेवेन +8 ऐइट +9 नाइन +० ज़ीरो +१ वन +२ टू +३ थ्री +४ फोर +५ फाइव +६ सिक्स +७ सेवेन +८ ऐइट +९ नाइन +A ए +a ए +B बी +b बी +C सी +c सी +D डी +d डी +E इ +e इ +F एफ +f एफ +G जी +g जी +H एच +h एच +I आई +i आई +J जे +j जे +K के +k के +L एल +l एल +M एम +m एम +N एन +n एन +O ओ +o ओ +P पी +p पी +Q क्यू +q क्यू +R आर +r आर +S एस +s एस +T टी +t टी +U यू +u यू +V वी +v वी +W डबलु +w डबलु +X एक्स +x एक्स +Y व्हाई +y व्हाई +Z ज़ेड +z ज़ेड +α अल्फा +φ फ़ाई ++ प्लस += इक्वल +λ लैम्ब्डा +∅ नॉट +ϕ फ़ाई +𝝅 पाई +⇒ इम्प्लइज \ No newline at end of file diff --git a/charmap/charmap_Hindi.xlsx b/charmap/charmap_Hindi.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f87acaf78cd84ff1e386285f4d7d7796672f35fe Binary files /dev/null and b/charmap/charmap_Hindi.xlsx differ diff --git a/charmap/charmap_Malayalam.txt b/charmap/charmap_Malayalam.txt new file mode 100644 index 0000000000000000000000000000000000000000..df0f2356ccab3ae2a23395e6f0667b84bfa1d654 --- /dev/null +++ b/charmap/charmap_Malayalam.txt @@ -0,0 +1,81 @@ +0 സീറോ +1 വൺ +2 ടു +3 ത്രീ +4 ഫോർ +5 ഫൈവ് +6 സിക്സ് +7 സെവൻ +8 എയ്റ്റ് +9 നൈൻ +0 സീറോ +1 വൺ +2 ടു +3 ത്രീ +4 ഫോർ +5 ഫൈവ് +6 സിക്സ് +7 സെവൻ +8 എയ്റ്റ് +9 നൈൻ +A എ +a എ +B ബി +b ബി +C സി +c സി +D ഡി +d ഡി +E ഇ +e ഇ +F എഫ് +f എഫ് +G ജി +g ജി +H എഛ് +h എഛ് +I ഐ +i ഐ +J ജെ +j ജെ +K കെ +k കെ +L എൽ +l എൽ +M എം +m എം +N എൻ +n എൻ +O ഒ +o ഒ +P പി +p പി +Q ക്യു +q ക്യു +R ആർ +r ആർ +S എസ് +s എസ് +T ടി +t ടി +U യു +u യു +V വി +v വി +W ഡബ്ല്യൂ +w ഡബ്ല്യൂ +X എക്സ് +x എക്സ് +Y വൈ +y വൈ +Z സെഡ് +z സെഡ് +α ആൽഫ +φ ഫി ++ പ്ലസ് += ഈക്വൽ +λ ലാംബ്ടാ +∅ നോട്ട് +ϕ സൈ +𝝅 പൈ +⇒ ഇമ്പ്ലെയ്സ് \ No newline at end of file diff --git a/charmap/charmap_Malayalam.xlsx b/charmap/charmap_Malayalam.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..cf8ab68ec1417fa4286b9073d76af6e7e71be60e Binary files /dev/null and b/charmap/charmap_Malayalam.xlsx differ diff --git a/charmap/charmap_Marathi.txt b/charmap/charmap_Marathi.txt new file mode 100644 index 0000000000000000000000000000000000000000..57e4da1d7558995157dc694138b6ac8b4a0ff236 --- /dev/null +++ b/charmap/charmap_Marathi.txt @@ -0,0 +1,81 @@ +0 ज़ीरो +1 वन +2 टू +3 थ्री +4 फोर +5 फाइव +6 सिक्स +7 सेवेन +8 ऐइट +9 नाइन +० ज़ीरो +१ वन +२ टू +३ थ्री +४ फोर +५ फाइव +६ सिक्स +७ सेवेन +८ ऐइट +९ नाइन +A ए +a ए +B बी +b बी +C सी +c सी +D डी +d डी +E इ +e इ +F एफ +f एफ +G जी +g जी +H एच +h एच +I आई +i आई +J जे +j जे +K के +k के +L एल +l एल +M एम +m एम +N एन +n एन +O ओ +o ओ +P पी +p पी +Q क्यू +q क्यू +R आर +r आर +S एस +s एस +T टी +t टी +U यू +u यू +V वी +v वी +W डबलु +w डबलु +X एक्स +x एक्स +Y व्हाई +y व्हाई +Z ज़ेड +z ज़ेड +α अल्फा +φ फ़ाई ++ प्लस += इक्वल +λ लैम्ब्डा +∅ नॉट +ϕ फ़ाई +𝝅 पाई +⇒ इम्प्लइज \ No newline at end of file diff --git a/charmap/charmap_Marathi.xlsx b/charmap/charmap_Marathi.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f87acaf78cd84ff1e386285f4d7d7796672f35fe Binary files /dev/null and b/charmap/charmap_Marathi.xlsx differ diff --git a/charmap/charmap_Tamil.disabled b/charmap/charmap_Tamil.disabled new file mode 100644 index 0000000000000000000000000000000000000000..b8d41ada80d3618531d471284e99632df691e12e --- /dev/null +++ b/charmap/charmap_Tamil.disabled @@ -0,0 +1,76 @@ +0 ஸிரோ +1 ஒன்னு +2 டூ +3 த்ரீ +4 போர் +5 பைவ் +6 சீஸ் +7 செவென் +8 எயிட் +9 நயன் +A எ +a எ +B பி +b பி +C சி +c சி +D டி +d டி +E இ +e இ +F எப் +f எப் +G ஜி +g ஜி +H ஹ் +h ஹ் +I ஐ +i ஐ +J ஜே +j ஜே +K கே +k கே +L ல் +l ல் +M ம் +m ம் +N ன் +n ன் +O ஓ +o ஓ +P பி +p பி +Q கியூ +q கியூ +R ர் +r ர் +S ஸ் +s ஸ் +T டீ +t டீ +U யூ +u யூ +V வி +v வி +W டௌபிள்யூ +w டௌபிள்யூ +X எஸ் +x எஸ் +Y யை +y யை +Z செட் +z செட் +α ஆல்பா +φ பிய் ++ பிளஸ் += ஏகுவாள் +λ லாம்டா +∅ நோட் +ϕ பிய் +𝝅 பை +⇒ இம்ப்ளிஸ் +. பாயிண்ட் +% பெர்ஸண்டாஜ் +°C டிகிரிசெல்சியஸ் +θ தீட்டா +* இண்டூ \ No newline at end of file diff --git a/charmap/charmap_Tamil.xlsx b/charmap/charmap_Tamil.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..cbd0edeb694baec2412d084e49b5e56f07bfd6a3 Binary files /dev/null and b/charmap/charmap_Tamil.xlsx differ diff --git a/charmap/charmap_Telugu.txt b/charmap/charmap_Telugu.txt new file mode 100644 index 0000000000000000000000000000000000000000..9bbb2c127522006561a2341b40fdd05b9f72d522 --- /dev/null +++ b/charmap/charmap_Telugu.txt @@ -0,0 +1,81 @@ +0 జీరొ +1 వన్ +2 టు +3 త్రీ +4 ఫోర్ +5 ఫైవ్ +6 సిక్స్ +7 సెవెన్ +8 ఎఇట్ +9 నైన్ +० జీరొ +१ వన్ +२ టు +३ త్రీ +४ ఫోర్ +५ ఫైవ్ +६ సిక్స్ +७ సెవెన్ +८ ఎఇట్ +९ నైన్ +A ఎ +a ఎ +B బి +b బి +C సి +c సి +D డి +d డి +E ఇ +e ఇ +F ఎఫ్ +f ఎఫ్ +G జి +g జి +H హెచ్ +h హెచ్ +I ఐ +i ఐ +J జె +j జె +K కె +k కె +L ఎల్ +l ఎల్ +M ఎమ్ +m ఎమ్ +N ఎన్ +n ఎన్ +O ఒ +o ఒ +P పి +p పి +Q క్యు +q క్యు +R ఆర్ +r ఆర్ +S ఎస్ +s ఎస్ +T టి +t టి +U యు +u యు +V వి +v వి +W డబ్లు +w డబ్లు +X ఎక్స్ +x ఎక్స్ +Y వై +y వై +Z జెడ్ +z జెడ్ +α ఆల్ఫ +φ ఫై ++ ప్లస్ += ఈక్వల్ +λ లామ్బ్డ +∅ నల్ +ϕ ఫై +𝝅 పై +⇒ ఇమ్ప్లైస్ \ No newline at end of file diff --git a/charmap/charmap_Telugu.xlsx b/charmap/charmap_Telugu.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..15314a139cdb5ec6b4e1d13384b75afbaa5f4449 Binary files /dev/null and b/charmap/charmap_Telugu.xlsx differ diff --git a/english/female/model/config.yaml b/english/female/model/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd5dd8d786136052438b8d9bd2e9c8e76ee2b65a --- /dev/null +++ b/english/female/model/config.yaml @@ -0,0 +1,266 @@ +accum_grad: 8 +allow_variable_data_keys: false +batch_bins: 3000000 +batch_size: 20 +batch_type: numel +best_model_criterion: +- - valid + - loss + - min +- - train + - loss + - min +bpemodel: null +chunk_length: 500 +chunk_shift_ratio: 0.5 +cleaner: null +collect_stats: false +config: conf/tuning/train_fastspeech2.yaml +cudnn_benchmark: false +cudnn_deterministic: true +cudnn_enabled: true +detect_anomaly: false +dist_backend: nccl +dist_init_method: env:// +dist_launcher: null +dist_master_addr: localhost +dist_master_port: 44035 +dist_rank: 0 +dist_world_size: 8 +distributed: true +dry_run: false +early_stopping_criterion: +- valid +- loss +- min +energy_extract: energy +energy_extract_conf: + fs: 22050 + hop_length: 256 + n_fft: 1024 + reduction_factor: 1 + win_length: null +energy_normalize: global_mvn +energy_normalize_conf: + stats_file: /home/speech/Fastspeech2_HS/english/female/model/energy_stats.npz +feats_extract: fbank +feats_extract_conf: + fmax: 8000 + fmin: 0 + fs: 22050 + hop_length: 256 + n_fft: 1024 + n_mels: 80 + win_length: null +fold_length: +- 150 +- 204800 +freeze_param: [] +g2p: g2p_en_no_space +grad_clip: 1.0 +grad_clip_type: 2.0 +grad_noise: false +ignore_init_mismatch: false +init_param: [] +iterator_type: sequence +keep_nbest_models: 5 +local_rank: 0 +log_interval: null +log_level: INFO +max_cache_fd: 32 +max_cache_size: 0.0 +max_epoch: 1000 +model_conf: {} +multiple_iterator: false +multiprocessing_distributed: true +ngpu: 1 +no_forward_run: false +non_linguistic_symbols: null +normalize: global_mvn +normalize_conf: + stats_file: /home/speech/Fastspeech2_HS/english/female/model/feats_stats.npz +num_att_plot: 3 +num_cache_chunks: 1024 +num_iters_per_epoch: 800 +num_workers: 1 +odim: null +optim: adam +optim_conf: + lr: 1.0 +output_dir: exp/tts_train_fastspeech2_raw_char_None +patience: null +pitch_extract: dio +pitch_extract_conf: + f0max: 400 + f0min: 80 + fs: 22050 + hop_length: 256 + n_fft: 1024 + reduction_factor: 1 +pitch_normalize: global_mvn +pitch_normalize_conf: + stats_file: /home/speech/Fastspeech2_HS/english/female/model/pitch_stats.npz +pretrain_path: null +print_config: false +required: +- output_dir +- token_list +resume: true +scheduler: noamlr +scheduler_conf: + model_size: 384 + warmup_steps: 4000 +seed: 0 +sharded_ddp: false +sort_batch: descending +sort_in_batch: descending +token_list: +- +- +- +- a +- r +- n +- "\u091F" +- i +- "\u0921" +- E +- s +- l +- d +- w +- I +- m +- k +- z +- "\u0905" +- f +- h +- "\u0911" +- U +- A +- . +- "\u0910" +- b +- p +- ',' +- "\u0919" +- o +- g +- y +- "\u0936" +- "\u0914" +- t +- u +- j +- c +- '?' +- '!' +- q +- "\u0923" +- "\u0925" +- "\u0937" +- "\u0927" +- B +- H +- P +- D +- M +- C +- R +- "\u0918" +- "\u0916" +- O +- v +- +token_type: char +train_data_path_and_name_and_type: +- - dump/raw/tr_no_dev/text + - text + - text +- - duration_info_from_teacher/decode_use_teacher_forcingtrue_train.loss.ave/tr_no_dev/durations + - durations + - text_int +- - dump/raw/tr_no_dev/wav.scp + - speech + - sound +train_dtype: float32 +train_shape_file: +- exp/tts_stats_raw_char_None/train/text_shape.char +- exp/tts_stats_raw_char_None/train/speech_shape +tts: fastspeech2 +tts_conf: + adim: 384 + aheads: 2 + decoder_normalize_before: true + dlayers: 4 + dunits: 1536 + duration_predictor_chans: 256 + duration_predictor_kernel_size: 3 + duration_predictor_layers: 2 + elayers: 4 + encoder_normalize_before: true + energy_embed_dropout: 0.0 + energy_embed_kernel_size: 1 + energy_predictor_chans: 256 + energy_predictor_dropout: 0.5 + energy_predictor_kernel_size: 3 + energy_predictor_layers: 2 + eunits: 1536 + init_dec_alpha: 1.0 + init_enc_alpha: 1.0 + init_type: xavier_uniform + pitch_embed_dropout: 0.0 + pitch_embed_kernel_size: 1 + pitch_predictor_chans: 256 + pitch_predictor_dropout: 0.5 + pitch_predictor_kernel_size: 5 + pitch_predictor_layers: 5 + positionwise_conv_kernel_size: 3 + positionwise_layer_type: conv1d + postnet_chans: 256 + postnet_filts: 5 + postnet_layers: 5 + reduction_factor: 1 + stop_gradient_from_energy_predictor: false + stop_gradient_from_pitch_predictor: true + transformer_dec_attn_dropout_rate: 0.2 + transformer_dec_dropout_rate: 0.2 + transformer_dec_positional_dropout_rate: 0.2 + transformer_enc_attn_dropout_rate: 0.2 + transformer_enc_dropout_rate: 0.2 + transformer_enc_positional_dropout_rate: 0.2 + use_masking: true + use_scaled_pos_enc: true +unused_parameters: false +use_amp: false +use_preprocessor: true +use_tensorboard: true +use_wandb: false +val_scheduler_criterion: +- valid +- loss +valid_batch_bins: null +valid_batch_size: null +valid_batch_type: null +valid_data_path_and_name_and_type: +- - dump/raw/dev/text + - text + - text +- - duration_info_from_teacher/decode_use_teacher_forcingtrue_train.loss.ave/dev/durations + - durations + - text_int +- - dump/raw/dev/wav.scp + - speech + - sound +valid_max_cache_size: null +valid_shape_file: +- exp/tts_stats_raw_char_None/valid/text_shape.char +- exp/tts_stats_raw_char_None/valid/speech_shape +version: 0.10.3a3 +wandb_entity: null +wandb_id: null +wandb_model_log_interval: -1 +wandb_name: null +wandb_project: null +write_collected_feats: false diff --git a/english/female/model/energy_stats.npz b/english/female/model/energy_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..3a23c0289eccc0249a60372c7d3a02b9f40abb7e --- /dev/null +++ b/english/female/model/energy_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fbd6cac3a0ffa58d825b5107c7272137a33045619b4766222b3dab6ad34290f +size 770 diff --git a/english/female/model/feats_stats.npz b/english/female/model/feats_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..87e4250e495ff4752307db671f3c8148fba7d295 --- /dev/null +++ b/english/female/model/feats_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64edf39613daf282ca50b3738f0667a51f3bdda9feadc3c39cc9bdeb347e5959 +size 1402 diff --git a/english/female/model/feats_type b/english/female/model/feats_type new file mode 100644 index 0000000000000000000000000000000000000000..16b9d46ca2ab51e9b5f8a9e5ba31f3ef5a906ab6 --- /dev/null +++ b/english/female/model/feats_type @@ -0,0 +1 @@ +raw diff --git a/english/female/model/model.pth b/english/female/model/model.pth new file mode 100644 index 0000000000000000000000000000000000000000..b6240808ef79ee6c762e469623e2f2e294ba8633 --- /dev/null +++ b/english/female/model/model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c637a04f92dab17317a404e6dacf19e4d02c32556a275b5cc2f0eec376392e16 +size 148695034 diff --git a/english/female/model/pitch_stats.npz b/english/female/model/pitch_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..9e688734b5d586d1cc13217819700d8bbd880f25 --- /dev/null +++ b/english/female/model/pitch_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434a06aa750fd55b106aca27d829f654a1822635f42e3d51e8e231ef3f5a4e50 +size 770 diff --git a/english/male/model/config.yaml b/english/male/model/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e098dc01b13c3d5c0d253e177997c12bc1622618 --- /dev/null +++ b/english/male/model/config.yaml @@ -0,0 +1,265 @@ +accum_grad: 8 +allow_variable_data_keys: false +batch_bins: 3000000 +batch_size: 20 +batch_type: numel +best_model_criterion: +- - valid + - loss + - min +- - train + - loss + - min +bpemodel: null +chunk_length: 500 +chunk_shift_ratio: 0.5 +cleaner: null +collect_stats: false +config: conf/tuning/train_fastspeech2.yaml +cudnn_benchmark: false +cudnn_deterministic: true +cudnn_enabled: true +detect_anomaly: false +dist_backend: nccl +dist_init_method: env:// +dist_launcher: null +dist_master_addr: localhost +dist_master_port: 59485 +dist_rank: 0 +dist_world_size: 8 +distributed: true +dry_run: false +early_stopping_criterion: +- valid +- loss +- min +energy_extract: energy +energy_extract_conf: + fs: 22050 + hop_length: 256 + n_fft: 1024 + reduction_factor: 1 + win_length: null +energy_normalize: global_mvn +energy_normalize_conf: + stats_file: /home/speech/Fastspeech2_HS/english/male/model/energy_stats.npz +feats_extract: fbank +feats_extract_conf: + fmax: 8000 + fmin: 0 + fs: 22050 + hop_length: 256 + n_fft: 1024 + n_mels: 80 + win_length: null +fold_length: +- 150 +- 204800 +freeze_param: [] +g2p: g2p_en_no_space +grad_clip: 1.0 +grad_clip_type: 2.0 +grad_noise: false +ignore_init_mismatch: false +init_param: [] +iterator_type: sequence +keep_nbest_models: 5 +local_rank: 0 +log_interval: null +log_level: INFO +max_cache_fd: 32 +max_cache_size: 0.0 +max_epoch: 1000 +model_conf: {} +multiple_iterator: false +multiprocessing_distributed: true +ngpu: 1 +no_forward_run: false +non_linguistic_symbols: null +normalize: global_mvn +normalize_conf: + stats_file: /home/speech/Fastspeech2_HS/english/male/model/feats_stats.npz +num_att_plot: 3 +num_cache_chunks: 1024 +num_iters_per_epoch: 800 +num_workers: 1 +odim: null +optim: adam +optim_conf: + lr: 1.0 +output_dir: exp/tts_train_fastspeech2_raw_char_None +patience: null +pitch_extract: dio +pitch_extract_conf: + f0max: 400 + f0min: 40 + fs: 22050 + hop_length: 256 + n_fft: 1024 + reduction_factor: 1 +pitch_normalize: global_mvn +pitch_normalize_conf: + stats_file: /home/speech/Fastspeech2_HS/english/male/model/pitch_stats.npz +pretrain_path: null +print_config: false +required: +- output_dir +- token_list +resume: true +scheduler: noamlr +scheduler_conf: + model_size: 384 + warmup_steps: 4000 +seed: 0 +sharded_ddp: false +sort_batch: descending +sort_in_batch: descending +token_list: +- +- +- +- a +- r +- n +- "\u091F" +- i +- "\u0921" +- E +- s +- l +- d +- w +- I +- m +- k +- z +- f +- "\u0905" +- h +- "\u0911" +- U +- A +- . +- "\u0910" +- ',' +- p +- b +- "\u0919" +- o +- g +- y +- "\u0936" +- "\u0914" +- t +- u +- c +- j +- '?' +- '!' +- q +- "\u0923" +- "\u0927" +- "\u0925" +- "\u0937" +- B +- H +- P +- D +- M +- v +- C +- R +- "\u0918" +- "\u0916" +- +token_type: char +train_data_path_and_name_and_type: +- - dump/raw/tr_no_dev/text + - text + - text +- - duration_info_from_teacher/decode_use_teacher_forcingtrue_train.loss.ave/tr_no_dev/durations + - durations + - text_int +- - dump/raw/tr_no_dev/wav.scp + - speech + - sound +train_dtype: float32 +train_shape_file: +- exp/tts_stats_raw_char_None/train/text_shape.char +- exp/tts_stats_raw_char_None/train/speech_shape +tts: fastspeech2 +tts_conf: + adim: 384 + aheads: 2 + decoder_normalize_before: true + dlayers: 4 + dunits: 1536 + duration_predictor_chans: 256 + duration_predictor_kernel_size: 3 + duration_predictor_layers: 2 + elayers: 4 + encoder_normalize_before: true + energy_embed_dropout: 0.0 + energy_embed_kernel_size: 1 + energy_predictor_chans: 256 + energy_predictor_dropout: 0.5 + energy_predictor_kernel_size: 3 + energy_predictor_layers: 2 + eunits: 1536 + init_dec_alpha: 1.0 + init_enc_alpha: 1.0 + init_type: xavier_uniform + pitch_embed_dropout: 0.0 + pitch_embed_kernel_size: 1 + pitch_predictor_chans: 256 + pitch_predictor_dropout: 0.5 + pitch_predictor_kernel_size: 5 + pitch_predictor_layers: 5 + positionwise_conv_kernel_size: 3 + positionwise_layer_type: conv1d + postnet_chans: 256 + postnet_filts: 5 + postnet_layers: 5 + reduction_factor: 1 + stop_gradient_from_energy_predictor: false + stop_gradient_from_pitch_predictor: true + transformer_dec_attn_dropout_rate: 0.2 + transformer_dec_dropout_rate: 0.2 + transformer_dec_positional_dropout_rate: 0.2 + transformer_enc_attn_dropout_rate: 0.2 + transformer_enc_dropout_rate: 0.2 + transformer_enc_positional_dropout_rate: 0.2 + use_masking: true + use_scaled_pos_enc: true +unused_parameters: false +use_amp: false +use_preprocessor: true +use_tensorboard: true +use_wandb: false +val_scheduler_criterion: +- valid +- loss +valid_batch_bins: null +valid_batch_size: null +valid_batch_type: null +valid_data_path_and_name_and_type: +- - dump/raw/dev/text + - text + - text +- - duration_info_from_teacher/decode_use_teacher_forcingtrue_train.loss.ave/dev/durations + - durations + - text_int +- - dump/raw/dev/wav.scp + - speech + - sound +valid_max_cache_size: null +valid_shape_file: +- exp/tts_stats_raw_char_None/valid/text_shape.char +- exp/tts_stats_raw_char_None/valid/speech_shape +version: 0.10.3a3 +wandb_entity: null +wandb_id: null +wandb_model_log_interval: -1 +wandb_name: null +wandb_project: null +write_collected_feats: false diff --git a/english/male/model/energy_stats.npz b/english/male/model/energy_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..572470063a6e575542c21f2b2c0f06ee881be7e5 --- /dev/null +++ b/english/male/model/energy_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c657f3938e31a80940c0a4397176e9b649afeea1c6ef8215fd2406e1bdffdbe2 +size 770 diff --git a/english/male/model/feats_stats.npz b/english/male/model/feats_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..58fb73b2d7b2c0667a5497c8fc0f5e2868a1bf4a --- /dev/null +++ b/english/male/model/feats_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c165dda3cdbe1f8daed5d6ce7db61d3940b2fae0aa8e673d1bdacd3f0eff05 +size 1402 diff --git a/english/male/model/feats_type b/english/male/model/feats_type new file mode 100644 index 0000000000000000000000000000000000000000..16b9d46ca2ab51e9b5f8a9e5ba31f3ef5a906ab6 --- /dev/null +++ b/english/male/model/feats_type @@ -0,0 +1 @@ +raw diff --git a/english/male/model/model.pth b/english/male/model/model.pth new file mode 100644 index 0000000000000000000000000000000000000000..73a64673bfc10b95fd6aaa0f48677157db5a83dd --- /dev/null +++ b/english/male/model/model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1dd6aac7d5733bc25cc8a4cea7637609a44d0246617d2670e6924f0c863dba +size 148693495 diff --git a/english/male/model/pitch_stats.npz b/english/male/model/pitch_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..0735c1645439f47b9463af4b30ee7aef53699826 --- /dev/null +++ b/english/male/model/pitch_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc23505bb528648b20f5ad9aa380cee8b00cfd1113af56a966abc5092c26a641 +size 770