voiceblock / voicebox /src /constants.py
ALeLacheur's picture
Voiceblock demo: Attempt 8
957e2dc
from pathlib import Path
################################################################################
# Project-wide constants
################################################################################
# Static directories
CACHE_DIR = Path(__file__).parent.parent / 'cache'
DATA_DIR = Path(__file__).parent.parent / 'data'
RUNS_DIR = Path(__file__).parent.parent / 'runs'
TEST_DIR = Path(__file__).parent.parent / 'test'
CONFIGS_DIR = Path(__file__).parent.parent / 'configs'
MODELS_DIR = Path(__file__).parent.parent / 'pretrained'
# Set constant properties for streaming operations
WIN_LENGTH = 256
HOP_LENGTH = 128
SAMPLE_RATE = 16000
# VoxCeleb1 dataset
VOXCELEB1_DATA_DIR = DATA_DIR / 'VoxCeleb1'
VOXCELEB1_EXT = 'wav'
# VoxCeleb2 dataset
VOXCELEB2_DATA_DIR = DATA_DIR / 'VoxCeleb2'
# Pretrained phoneme prediction model
PPG_PRETRAINED_PATH = MODELS_DIR / 'phoneme' / 'causal_ppg_256_hidden.pt'
# Pretrained VoiceBox attack
VOICEBOX_PRETRAINED_PATH = MODELS_DIR / 'voicebox' / 'voicebox_final.pt'
# Pretrained universal additive attack
UNIVERSAL_PRETRAINED_PATH = MODELS_DIR / 'universal' / 'universal_final.pt'
# LibriSpeech dataset
LIBRISPEECH_DATA_DIR = DATA_DIR / 'LibriSpeech'
LIBRISPEECH_CACHE_DIR = CACHE_DIR / 'LibriSpeech'
LIBRISPEECH_SIG_LEN = 4.0
LIBRISPEECH_EXT = 'flac'
LIBRISPEECH_PHONEME_EXT = 'TextGrid'
LIBRISPEECH_NUM_PHONEMES = 70 # first phoneme corresponds to silence
LIBRISPEECH_PHONEME_DICT = {
'sil': 0, '': 0, 'sp': 0, 'spn': 0,
'AE1': 1, 'P': 2, 'T': 3, 'ER0': 4,
'W': 5, 'AH1': 6, 'N': 7, 'M': 8,
'IH1': 9, 'S': 10, 'IH0': 11, 'Z': 12,
'R': 13, 'EY1': 14, 'AH0': 15, 'L': 16,
'D': 17, 'AY1': 18, 'V': 19, 'JH': 20,
'EH1': 21, 'DH': 22, 'IY0': 23, 'IY2': 24,
'OW1': 25, 'AW1': 26, 'UW1': 27, 'HH': 28,
'AA1': 29, 'OW0': 30, 'F': 31, 'TH': 32,
'AO1': 33, 'AA2': 34, 'ER1': 35, 'B': 36,
'UH1': 37, 'K': 38, 'Y': 39, 'IY1': 40,
'AO2': 41, 'NG': 42, 'AE0': 43, 'G': 44,
'SH': 45, 'IH2': 46, 'EH2': 47, 'UW0': 48,
'AY2': 49, 'EY2': 50, 'AA0': 51, 'OY1': 52,
'AE2': 53, 'ZH': 54, 'EH0': 55, 'OW2': 56,
'AH2': 57, 'UH2': 58, 'AO0': 59, 'UW2': 60,
'EY0': 61, 'AW2': 62, 'AY0': 63, 'ER2': 64,
'OY0': 65, 'OY2': 66, 'UH0': 67, 'AW0': 68,
'CH': 69}
LIBRISPEECH_FILLER_PHONEMES = ['', 'sil', 'sp', 'spn']
# Streamer Conditioning
CONDITIONING_FOLDER = DATA_DIR / 'streamer'
CONDITIONING_FILENAME = CONDITIONING_FOLDER / 'conditioning.pt'