Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| ################################################################################ | |
| # Project-wide constants | |
| ################################################################################ | |
| # Static directories | |
| CACHE_DIR = Path(__file__).parent.parent / 'cache' | |
| DATA_DIR = Path(__file__).parent.parent / 'data' | |
| RUNS_DIR = Path(__file__).parent.parent / 'runs' | |
| TEST_DIR = Path(__file__).parent.parent / 'test' | |
| CONFIGS_DIR = Path(__file__).parent.parent / 'configs' | |
| MODELS_DIR = Path(__file__).parent.parent / 'pretrained' | |
| # Set constant properties for streaming operations | |
| WIN_LENGTH = 256 | |
| HOP_LENGTH = 128 | |
| SAMPLE_RATE = 16000 | |
| # VoxCeleb1 dataset | |
| VOXCELEB1_DATA_DIR = DATA_DIR / 'VoxCeleb1' | |
| VOXCELEB1_EXT = 'wav' | |
| # VoxCeleb2 dataset | |
| VOXCELEB2_DATA_DIR = DATA_DIR / 'VoxCeleb2' | |
| # Pretrained phoneme prediction model | |
| PPG_PRETRAINED_PATH = MODELS_DIR / 'phoneme' / 'causal_ppg_256_hidden.pt' | |
| # Pretrained VoiceBox attack | |
| VOICEBOX_PRETRAINED_PATH = MODELS_DIR / 'voicebox' / 'voicebox_final.pt' | |
| # Pretrained universal additive attack | |
| UNIVERSAL_PRETRAINED_PATH = MODELS_DIR / 'universal' / 'universal_final.pt' | |
| # LibriSpeech dataset | |
| LIBRISPEECH_DATA_DIR = DATA_DIR / 'LibriSpeech' | |
| LIBRISPEECH_CACHE_DIR = CACHE_DIR / 'LibriSpeech' | |
| LIBRISPEECH_SIG_LEN = 4.0 | |
| LIBRISPEECH_EXT = 'flac' | |
| LIBRISPEECH_PHONEME_EXT = 'TextGrid' | |
| LIBRISPEECH_NUM_PHONEMES = 70 # first phoneme corresponds to silence | |
| LIBRISPEECH_PHONEME_DICT = { | |
| 'sil': 0, '': 0, 'sp': 0, 'spn': 0, | |
| 'AE1': 1, 'P': 2, 'T': 3, 'ER0': 4, | |
| 'W': 5, 'AH1': 6, 'N': 7, 'M': 8, | |
| 'IH1': 9, 'S': 10, 'IH0': 11, 'Z': 12, | |
| 'R': 13, 'EY1': 14, 'AH0': 15, 'L': 16, | |
| 'D': 17, 'AY1': 18, 'V': 19, 'JH': 20, | |
| 'EH1': 21, 'DH': 22, 'IY0': 23, 'IY2': 24, | |
| 'OW1': 25, 'AW1': 26, 'UW1': 27, 'HH': 28, | |
| 'AA1': 29, 'OW0': 30, 'F': 31, 'TH': 32, | |
| 'AO1': 33, 'AA2': 34, 'ER1': 35, 'B': 36, | |
| 'UH1': 37, 'K': 38, 'Y': 39, 'IY1': 40, | |
| 'AO2': 41, 'NG': 42, 'AE0': 43, 'G': 44, | |
| 'SH': 45, 'IH2': 46, 'EH2': 47, 'UW0': 48, | |
| 'AY2': 49, 'EY2': 50, 'AA0': 51, 'OY1': 52, | |
| 'AE2': 53, 'ZH': 54, 'EH0': 55, 'OW2': 56, | |
| 'AH2': 57, 'UH2': 58, 'AO0': 59, 'UW2': 60, | |
| 'EY0': 61, 'AW2': 62, 'AY0': 63, 'ER2': 64, | |
| 'OY0': 65, 'OY2': 66, 'UH0': 67, 'AW0': 68, | |
| 'CH': 69} | |
| LIBRISPEECH_FILLER_PHONEMES = ['', 'sil', 'sp', 'spn'] | |
| # Streamer Conditioning | |
| CONDITIONING_FOLDER = DATA_DIR / 'streamer' | |
| CONDITIONING_FILENAME = CONDITIONING_FOLDER / 'conditioning.pt' | |