heyarmar / README.md

Create README.md

7a8d5af almost 3 years ago

5.15 kB

	```python
	"""the interface to interact with wakeword model"""
	import pyaudio
	import threading
	import time
	import torchaudio
	import torch
	import numpy as np
	import queue
	from transformers import WavLMForSequenceClassification
	from transformers import AutoFeatureExtractor


	def int2float(sound):
	abs_max = np.abs(sound).max()
	sound = sound.astype('float32')
	if abs_max > 0:
	sound *= 1/abs_max
	sound = sound.squeeze() # depends on the use case
	return sound

	class RealtimeDecoder():

	def __init__(self,
	model,
	) -> None:
	self.model = model
	self.vad_model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
	model='silero_vad',
	force_reload=False,
	onnx=False)

	(self.get_speech_timestamps, _, _, _, _) = utils
	self.SAMPLE_RATE = 16000
	self.cache_output = {
	"cache" : torch.zeros(0, 0, 0, dtype=torch.float),
	"wavchunks": [],
	}
	self.continue_recording = threading.Event()
	self.frame_duration_ms = 1000
	self.audio_queue = queue.SimpleQueue()
	self.speech_queue = queue.SimpleQueue()

	def start_recording(self, wait_enter_to_stop=True):
	def stop():
	input("Press Enter to stop the recording:\n\n")
	self.continue_recording.set()
	def record():
	audio = pyaudio.PyAudio()
	stream = audio.open(format=pyaudio.paInt16,
	channels=1,
	rate=self.SAMPLE_RATE,
	input=True,
	frames_per_buffer=int(self.SAMPLE_RATE / 10))
	while not self.continue_recording.is_set():
	audio_chunk = stream.read(int(self.SAMPLE_RATE * self.frame_duration_ms / 1000.0), exception_on_overflow = False)
	audio_int16 = np.frombuffer(audio_chunk, np.int16)
	audio_float32 = int2float(audio_int16)
	waveform = torch.from_numpy(audio_float32)
	self.audio_queue.put(waveform)
	print("Finish record")
	stream.close()
	if wait_enter_to_stop:
	stop_listener_thread = threading.Thread(target=stop, daemon=False)
	else:
	stop_listener_thread = None
	recording_thread = threading.Thread(target=record, daemon=False)
	return stop_listener_thread, recording_thread

	def finish_realtime_decode(self):
	self.cache_output = {
	"cache" : torch.zeros(0, 0, 0, dtype=torch.float),
	"wavchunks": [],
	}

	def start_decoding(self):
	def decode():
	while not self.continue_recording.is_set():
	if self.audio_queue.qsize() > 0:
	currunt_wavform = self.audio_queue.get()
	if currunt_wavform is not None:
	self.cache_output['wavchunks'].append(currunt_wavform)
	self.cache_output['wavchunks'] = self.cache_output['wavchunks'][-4:]

	if len(self.cache_output['wavchunks']) > 1:
	wavform = torch.cat(self.cache_output['wavchunks'][-2:], dim=-1)
	speech_timestamps = self.get_speech_timestamps(wavform, self.vad_model, sampling_rate=self.SAMPLE_RATE)
	logits = [1, 0]
	if len(speech_timestamps) > 0:
	input_features = feature_extractor.pad([{"input_values": wavform}], padding=True, return_tensors="pt")
	logits = self.model(**input_features).logits.softmax(dim=-1).squeeze()
	if logits[1] > 0.6:
	print("hey armar", logits, wavform.size(-1) / self.SAMPLE_RATE)
	self.cache_output['wavchunks'] = []
	else:
	print('.'+'.'*self.audio_queue.qsize())
	else:
	time.sleep(0.01)
	print("KWS thread finish")
	kws_decode_thread = threading.Thread(target=decode, daemon=False)
	return kws_decode_thread

	if __name__ == "__main__":
	print("Model loading....")

	kws_model = WavLMForSequenceClassification.from_pretrained('nguyenvulebinh/heyarmar')
	feature_extractor = AutoFeatureExtractor.from_pretrained('nguyenvulebinh/heyarmar')

	print("Model loaded....")

	# file_wave = './99.wav'
	# wav, rate = torchaudio.load(file_wave)
	# input_features = feature_extractor.pad([{"input_values": item} for item in wav], padding=True, return_tensors="pt")
	# output = kws_model(**input_features)
	# print(output.logits.softmax(dim=-1))


	obj_decode = RealtimeDecoder(kws_model)
	recording_threads = obj_decode.start_recording()
	kws_decode_thread = obj_decode.start_decoding()
	for thread in recording_threads:
	if thread is not None:
	thread.start()
	kws_decode_thread.start()
	for thread in recording_threads:
	if thread is not None:
	thread.join()
	kws_decode_thread.join()

	```