DATA:
  dataset: multi
  data_root: sample_dataset
  wav_path: wav
  vertices_path: npy
  template_file: templates.pkl
  train_subjects: Arabic English French German Greek Italian Portuguese Russian Spanish Korean Mandarin Japanese

NETWORK:
  arch: stage2
  in_dim: 15069
  hidden_size: 1024
  num_hidden_layers: 6
  num_attention_heads: 8
  intermediate_size: 1536
  window_size: 1
  quant_factor: 0
  face_quan_num: 16
  neg: 0.2
  autoencoder: stage1_vocaset
  INaffine: False
  style_emb_method: nnemb # onehot or nnemb

VQuantizer:
  n_embed: 256
  zquant_dim: 64

PREDICTOR:
  feature_dim: 1024
  vertice_dim: 15069
  device: cuda
  period: 25
  vqvae_pretrained_path: checkpoints/stage1.pth.tar
  wav2vec2model_path: facebook/wav2vec2-large-xlsr-53
  teacher_forcing: True
  num_layers: 6
  n_head: 4 # not used

DEMO:
  model_path: checkpoints/stage2.pth.tar
  #condition: False #if false, the waveform file has the cue for the type of language
  condition: English
  subject: id
  demo_wav_dir_path: demo/input/
  demo_output_path: demo/output/
  fps: 25
  background_black: True # chose the background color of your rendered video