| | import unittest |
| | import wave |
| | import os |
| | from memory_profiler import memory_usage |
| |
|
| | from pocketsphinx import Vad |
| |
|
| | DATADIR = os.path.join(os.path.dirname(__file__), "../../test/data/vad") |
| |
|
| |
|
| | class VadTests(unittest.TestCase): |
| | @staticmethod |
| | def _load_wave(file_name): |
| | fp = wave.open(file_name, 'rb') |
| | try: |
| | assert fp.getnchannels() == 1, ( |
| | '{0}: sound format is incorrect! Sound must be mono.'.format( |
| | file_name)) |
| | assert fp.getsampwidth() == 2, ( |
| | '{0}: sound format is incorrect! ' |
| | 'Sample width of sound must be 2 bytes.').format(file_name) |
| | assert fp.getframerate() in (8000, 16000, 32000), ( |
| | '{0}: sound format is incorrect! ' |
| | 'Sampling frequency must be 8000 Hz, 16000 Hz or 32000 Hz.') |
| | sampling_frequency = fp.getframerate() |
| | sound_data = fp.readframes(fp.getnframes()) |
| | finally: |
| | fp.close() |
| | del fp |
| | return sound_data, sampling_frequency |
| |
|
| | def test_constructor(self): |
| | _ = Vad() |
| |
|
| | def test_set_mode(self): |
| | _ = Vad(0) |
| | _ = Vad(1) |
| | _ = Vad(2) |
| | _ = Vad(3) |
| | with self.assertRaises(ValueError): |
| | _ = Vad(4) |
| |
|
| | def test_valid_rate_and_frame_length(self): |
| | _ = Vad(sample_rate=8000, frame_length=0.01) |
| | _ = Vad(sample_rate=16000, frame_length=0.02) |
| | _ = Vad(sample_rate=32000, frame_length=0.01) |
| | _ = Vad(sample_rate=48000, frame_length=0.03) |
| | with self.assertRaises(ValueError): |
| | _ = Vad(sample_rate=283423, frame_length=1e-5) |
| |
|
| | def test_process_zeroes(self): |
| | frame_len = 160 |
| | sample = b'\x00' * frame_len * 2 |
| | vad = Vad(sample_rate=16000, frame_length=0.01) |
| | self.assertFalse(vad.is_speech(sample)) |
| |
|
| | def test_process_file(self): |
| | with open(os.path.join(DATADIR, 'test-audio.raw'), 'rb') as f: |
| | data = f.read() |
| | |
| | n = int(8000 * 2 * 30 / 1000.0) |
| | chunks = list(data[pos:pos + n] for pos in range(0, len(data), n)) |
| | if len(chunks[-1]) != n: |
| | chunks = chunks[:-1] |
| | expecteds = [ |
| | '011110111111111111111111111100', |
| | '011110111111111111111111111100', |
| | '000000111111111111111111110000', |
| | '000000111111111111111100000000' |
| | ] |
| | for mode in (0, 1, 2, 3): |
| | vad = Vad(mode=mode, sample_rate=8000, frame_length=0.03) |
| | result = '' |
| | for chunk in chunks: |
| | voiced = vad.is_speech(chunk) |
| | result += '1' if voiced else '0' |
| | self.assertEqual(expecteds[mode], result) |
| |
|
| | def test_leak(self): |
| | sound, fs = self._load_wave(os.path.join(DATADIR, 'leak-test.wav')) |
| | frame_ms = 0.010 |
| | frame_len = int(round(fs * frame_ms)) |
| | n = int(len(sound) / (2 * frame_len)) |
| | nrepeats = 1000 |
| | vad = Vad(mode=3, sample_rate=fs, frame_length=frame_ms) |
| | used_memory_before = memory_usage(-1)[0] |
| | for counter in range(nrepeats): |
| | find_voice = False |
| | for frame_ind in range(n): |
| | slice_start = (frame_ind * 2 * frame_len) |
| | slice_end = ((frame_ind + 1) * 2 * frame_len) |
| | if vad.is_speech(sound[slice_start:slice_end], fs): |
| | find_voice = True |
| | self.assertTrue(find_voice) |
| | used_memory_after = memory_usage(-1)[0] |
| | self.assertGreaterEqual( |
| | used_memory_before / 5.0, |
| | used_memory_after - used_memory_before) |
| |
|
| |
|
| | if __name__ == '__main__': |
| | unittest.main(verbosity=2) |
| |
|