File size: 1,050 Bytes

{
  "ext_weights": {
    "cav_mae_st": {
      "path": "ext_weights/cav_mae_st.pth",
      "size": "727M",
      "description": "CAV-MAE-ST pretrained weights"
    },
    "empty_string": {
      "path": "ext_weights/empty_string.pth",
      "size": "310K",
      "description": "Empty string embedding for classifier-free guidance"
    },
    "music_speech_audioset_clap": {
      "path": "ext_weights/music_speech_audioset_epoch_15_esc_89.98.pt",
      "size": "2.2G",
      "description": "CLAP model trained on music/speech/AudioSet, ESC-50 acc=89.98%"
    },
    "synchformer": {
      "path": "ext_weights/synchformer_state_dict.pth",
      "size": "907M",
      "description": "Synchformer audio-visual synchronization model weights"
    },
    "v1-44": {
      "path": "ext_weights/v1-44.pth",
      "size": "1.2G",
      "description": "VAE checkpoint (44kHz)"
    }
  },
  "weights": {
    "controlfoley": {
      "path": "weights/controlfoley.pth",
      "size": "11G",
      "description": "ControlFoley main model checkpoint"
    }
  }
}