| # Constants. | |
| SAMPLE_RATE = 16000 # not higher than that otherwise we may have errors when computing the fbanks. | |
| # Train/Test sets share the same speakers. They contain different utterances. | |
| # 0.8 means 20% of the utterances of each speaker will be held out and placed in the test set. | |
| TRAIN_TEST_RATIO = 0.8 | |
| CHECKPOINTS_SOFTMAX_DIR = 'checkpoints-softmax' | |
| CHECKPOINTS_TRIPLET_DIR = 'checkpoints-triplets' | |
| BATCH_SIZE = 32 * 3 # have to be a multiple of 3. | |
| # Input to the model will be a 4D image: (batch_size, num_frames, num_fbanks, 3) | |
| # Where the 3 channels are: FBANK, DIFF(FBANK), DIFF(DIFF(FBANK)). | |
| NUM_FRAMES = 160 # 1 second ~ 100 frames with default params winlen=0.025,winstep=0.01 | |
| NUM_FBANKS = 64 | |