Spaces:
Paused
Paused
| import pyaudio | |
| import numpy as np | |
| import tensorflow as tf | |
| import zipfile | |
| import wave | |
| import time | |
| # Audio stream configuration | |
| FORMAT = pyaudio.paInt16 # 16-bit PCM | |
| CHANNELS = 1 # Mono channel | |
| RATE = 16000 # 16kHz sample rate | |
| CHUNK = 1024 # Buffer size | |
| TARGET_LENGTH = 15600 | |
| SILENCE_THRESHOLD = 5000 # 5 seconds of silence | |
| audio_buffer = np.zeros(TARGET_LENGTH, dtype=np.float32) | |
| model_path = '1.tflite' | |
| interpreter = tf.lite.Interpreter(model_path=model_path) | |
| interpreter.allocate_tensors() | |
| input_details = interpreter.get_input_details() | |
| output_details = interpreter.get_output_details() | |
| waveform_input_index = input_details[0]['index'] | |
| scores_output_index = output_details[0]['index'] | |
| with zipfile.ZipFile(model_path) as z: | |
| with z.open('yamnet_label_list.txt') as f: | |
| labels = [line.decode('utf-8').strip() for line in f] | |
| # Ensure the input tensor is correctly sized | |
| interpreter.resize_tensor_input(waveform_input_index, [TARGET_LENGTH], strict=False) | |
| interpreter.allocate_tensors() | |
| # Initialize PyAudio | |
| p = pyaudio.PyAudio() | |
| def record_audio(): | |
| try: | |
| # Open the audio stream | |
| stream = p.open(format=FORMAT, | |
| channels=CHANNELS, | |
| rate=RATE, | |
| input=True, | |
| frames_per_buffer=CHUNK) | |
| print("Recording... Press Ctrl+C to stop.") | |
| # Open a .wav file to save the audio | |
| wf = wave.open("audio.wav", 'wb') | |
| wf.setnchannels(CHANNELS) | |
| wf.setsampwidth(p.get_sample_size(FORMAT)) | |
| wf.setframerate(RATE) | |
| last_speech_time = time.time() | |
| # Continuously read from the stream and append to audio_data | |
| while True: | |
| audio_data = stream.read(CHUNK) | |
| audio_chunk = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0 | |
| audio_buffer = np.roll(audio_buffer, -len(audio_chunk)) | |
| audio_buffer[-len(audio_chunk):] = audio_chunk | |
| # Write audio data to the .wav file | |
| wf.writeframes(audio_data) | |
| # Set the tensor data | |
| interpreter.set_tensor(waveform_input_index, audio_buffer) | |
| # Run the model | |
| interpreter.invoke() | |
| scores = interpreter.get_tensor(scores_output_index) | |
| # Get the top classification result | |
| top_class_index = scores.argmax() | |
| prediction = labels[top_class_index] | |
| print(prediction) | |
| # Check for silence | |
| if np.max(np.abs(audio_chunk)) > 0.01: | |
| last_speech_time = time.time() | |
| elif time.time() - last_speech_time > SILENCE_THRESHOLD / 1000: | |
| print("Silence detected. Stopping recording.") | |
| break | |
| except KeyboardInterrupt: | |
| # Handle the KeyboardInterrupt to stop recording | |
| print("\nRecording stopped by user.") | |
| finally: | |
| # Stop and close the stream and terminate PyAudio | |
| stream.stop_stream() | |
| stream.close() | |
| p.terminate() | |
| wf.close() | |
| print("Stream closed and resources released.") | |
| return "audio.wav" | |
| if __name__ == "__main__": | |
| record_audio() | |