File size: 3,915 Bytes
71fd565
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# lightning.pytorch==2.2.5
seed_everything: 0
trainer:
  accelerator: gpu
  strategy: ddp_find_unused_parameters_true
  devices: 8
  num_nodes: 1
  precision: 32
  logger:
    class_path: lightning.pytorch.loggers.WandbLogger
    init_args:
      name: mel_big
      save_dir: vq_audio_simvq_bert_mel/8k_ration_20_loss
      version: null
      offline: false
      dir: null
      id: null
      anonymous: null
      project: endresult_tmp
      log_model: false
      experiment: null
      prefix: ''
      checkpoint_name: null
      job_type: null
      config: null
      entity: null
      reinit: null
      tags: null
      group: null
      notes: null
      magic: null
      config_exclude_keys: null
      config_include_keys: null
      mode: null
      allow_val_change: null
      resume: null
      force: null
      tensorboard: null
      sync_tensorboard: null
      monitor_gym: null
      save_code: null
      fork_from: null
      resume_from: null
      settings: null
  callbacks:
  - class_path: lightning.pytorch.callbacks.ModelCheckpoint
    init_args:
      dirpath: vq_audio_simvq_bert_mel/8k_ration_20_loss
      filename: null
      monitor: null
      verbose: false
      save_last: null
      save_top_k: -1
      save_weights_only: false
      mode: min
      auto_insert_metric_name: true
      every_n_train_steps: null
      train_time_interval: null
      every_n_epochs: null
      save_on_train_epoch_end: null
      enable_version_counter: true
  - class_path: lightning.pytorch.callbacks.LearningRateMonitor
    init_args:
      logging_interval: step
      log_momentum: false
      log_weight_decay: false
  fast_dev_run: false
  max_epochs: 50
  min_epochs: null
  max_steps: -1
  min_steps: null
  max_time: null
  limit_train_batches: null
  limit_val_batches: null
  limit_test_batches: null
  limit_predict_batches: null
  overfit_batches: 0.0
  val_check_interval: null
  check_val_every_n_epoch: 1
  num_sanity_val_steps: 0
  log_every_n_steps: 100
  enable_checkpointing: null
  enable_progress_bar: null
  enable_model_summary: null
  accumulate_grad_batches: 1
  gradient_clip_val: null
  gradient_clip_algorithm: null
  deterministic: null
  benchmark: null
  inference_mode: true
  use_distributed_sampler: true
  profiler: null
  detect_anomaly: false
  barebones: false
  plugins: null
  sync_batchnorm: false
  reload_dataloaders_every_n_epochs: 0
  default_root_dir: null
model:
  class_path: taming.models.vq_audio_simvq_mel.VQModel
  init_args:
    ddconfig:
      causal: true
      dimension: 512
      ratios:
      - 8
      - 8
      - 4
      - 4
    lossconfig:
      target: taming.modules.losses.stft_simvq_mel.VQSTFTWithDiscriminator
      params:
        disc_conditional: false
        disc_in_channels: 1
        disc_start: 0
        codebook_enlarge_ratio: 0
        codebook_enlarge_steps: 2000
        sample_rate: 24000
        commit_weight: 1000.0
        gen_loss_weight: 1.0
        mel_loss_coeff: 45.0
        mrd_loss_coeff: 1.0
    quantconfig: null
    sample_rate: 24000
    target_bandwidths: null
    audio_normalize: false
    segment: None
    ckpt_path: null
    ignore_keys: []
    colorize_nlabels: null
    monitor: null
    learning_rate: 0.0001
    warmup_epochs: 1.0
    scheduler_type: None
    min_learning_rate: 0
    use_ema: true
    stage: null
data:
  class_path: taming.data.speechtokenizer_24k.SpeechTokenizerDataModule
  init_args:
    batch_size: 6
    num_workers: 8
    train_path:
    - /mnt/nfs3/zhangjinouwen/dataset/rep/rep_small_mel_hubert_train.txt
    - /mnt/nfs3/zhangjinouwen/dataset/rep/rep_middle_mel_hubert_train.txt
    - /mnt/nfs3/zhangjinouwen/dataset/rep/rep_Emila1_mel_hubert_train.txt
    - /mnt/nfs3/zhangjinouwen/dataset/rep/rep_vc_mel_hubert_train.txt
    val_path: /mnt/nfs3/zhangjinouwen/dataset/rep/rep_small_wav_eval.txt
optimizer: null
lr_scheduler: null
ckpt_path: null