Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

ckpt_00360000.pth +3 -0
config.yaml +159 -0
semantic_ckpts/hf_1_325000/config.json +71 -0
semantic_ckpts/hf_1_325000/preprocessor_config.json +10 -0
semantic_ckpts/hf_1_325000/pytorch_model.bin +3 -0

ckpt_00360000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8c379ea2d3cbde1c8ba1b9717975220e79ba3f556bb161766fd5e4585dcd59c
+size 1360444883

config.yaml ADDED Viewed

	@@ -0,0 +1,159 @@

+generator:
+  name: SoundStream
+  config:
+    n_filters: 32
+    D: 256
+    target_bandwidths:
+    - 0.5
+    - 1
+    - 1.5
+    - 2
+    - 4
+    - 6
+    ratios:
+    - 8
+    - 5
+    - 4
+    - 2
+    sample_rate: 16000
+    bins: 1024
+d_list:
+- mfd
+mfd:
+  name: MultiFrequencyDiscriminator
+  config:
+    hop_lengths:
+    - 32
+    - 64
+    - 128
+    - 256
+    - 512
+    - 1024
+    hidden_channels:
+    - 64
+    - 128
+    - 256
+    - 512
+    - 512
+    - 512
+    domain: double
+    mel_scale: true
+    sample_rate: 16000
+mpd:
+  name: MultiPeriodDiscriminator
+  config:
+    period_sizes:
+    - 2
+    - 3
+    - 5
+    - 7
+    - 11
+    period_kernel_size: 5
+msd:
+  name: MultiScaleDiscriminator
+  config:
+    num_scales: 3
+    pool_kernel_size: 4
+    pool_stride: 2
+optimizer:
+  g:
+    name: AdamW
+    config:
+      lr: 0.0002
+      betas:
+      - 0.8
+      - 0.99
+      eps: 1.0e-06
+  d:
+    name: AdamW
+    config:
+      lr: 0.0002
+      betas:
+      - 0.8
+      - 0.99
+      eps: 1.0e-06
+lr_scheduler:
+  g:
+    name: ExponentialLR
+    config:
+      gamma: 0.999
+  d:
+    name: ExponentialLR
+    config:
+      gamma: 0.999
+criterion:
+  g_criterion:
+    name: losses.generator_loss.GeneratorSTFTLoss
+    config:
+      use_mel_loss: false
+      adv_criterion: MSEGLoss
+      mel_loss_weight: 45
+      use_feature_match: true
+      feat_match_loss_weight: 20
+      use_full_stft_loss: true
+      use_sub_stft_loss: true
+      full_stft_loss_weight: 1
+      sub_stft_loss_weight: 1
+      mel_scale_loss:
+        sampling_rate: 16000
+        n_fft: 1024
+        num_mels: 80
+        hop_size: 160
+        win_size: 800
+        fmin: 0
+      full_multi_scale_stft_loss:
+        fft_sizes:
+        - 512
+        - 1024
+        - 2048
+        win_sizes:
+        - 480
+        - 960
+        - 1200
+        hop_sizes:
+        - 120
+        - 240
+        - 300
+      sub_multi_scale_stft_loss:
+        num_bands: 6
+        fft_sizes:
+        - 128
+        - 256
+        - 256
+        win_sizes:
+        - 80
+        - 120
+        - 200
+        hop_sizes:
+        - 20
+        - 40
+        - 50
+  d_criterion:
+    name: losses.discriminator_loss.MSEDiscriminatorLoss
+    config: null
+  commit_loss_weight: 1.0
+  codebook_loss_weight: 75
+training_file: /aifs4su/data/zheny/fairseq/vae_v2/codec_final/list/train.txt
+validation_file: /aifs4su/data/zheny/fairseq/vae_v2/codec_final/list/valid.txt
+seed: 2333
+cudnn_deterministic: false
+tensorboard: true
+checkpoint_interval: 5000
+summary_interval: 100
+validation_interval: 500
+num_epoches: 20
+print_freq: 10
+discriminator_iter_start: 0
+num_ckpt_keep: 10
+segment_size: 16000
+audio_norm_scale: 0.95
+batch_size: 48
+num_workers: 8
+num_plots: 8
+local_rank: 1000000
+basic_model_config: config/codec_16k_6kbps_v3_vqdp.yaml
+exp_model_config: null
+log_dir: 0518_20w_ckpts
+ngpus_per_node: 8
+sample_rate: 16000
+model_ckpt_dir: 0518_20w_ckpts/model_ckpts

semantic_ckpts/hf_1_325000/config.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "HubertModel"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "do_stable_layer_norm": false,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "feat_proj_layer_norm": true,
+  "final_dropout": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "hubert",
+  "num_attention_heads": 12,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "torch_dtype": "float32",
+  "transformers_version": "4.27.3",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32
+}

semantic_ckpts/hf_1_325000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "do_normalize": true,
+    "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+    "feature_size": 1,
+    "padding_side": "right",
+    "padding_value": 0,
+    "return_attention_mask": false,
+    "sampling_rate": 16000
+  }

semantic_ckpts/hf_1_325000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5ddbd7fa2468483cb9b2aa53117813471543dd278e65870333a56c54305f527
+size 377555286