Relay / accented.py
FredyHoundayi's picture
Initial commit: Whisper WebSocket API for Hugging Face
da12a71
import numpy as np
import pyaudio
from faster_whisper import WhisperModel
import queue
import threading
import os
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
print(HF_TOKEN)
# Load Whisper model
model = WhisperModel("small", compute_type="int8") # Changed compute_type from float16 to int8 for compatibility
# Audio settings
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 2
print("βœ… Model loaded and ready to transcribe!")
audio_queue = queue.Queue()
print("βœ… Model loaded and ready to transcribe!")
def audio_capture():
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("πŸŽ™οΈ Listening...")
frames = []
while True:
data = stream.read(CHUNK)
frames.append(data)
if len(frames) >= int(RATE / CHUNK * RECORD_SECONDS):
audio_data = b''.join(frames)
frames = []
audio_queue.put(audio_data)
def transcribe():
while True:
audio_data = audio_queue.get()
audio_np = np.frombuffer(audio_data, np.int16).astype(np.float32) / 32768.0
segments, _ = model.transcribe(audio_np, language="en")
for segment in segments:
print("πŸ“", segment.text)
# Start threads
threading.Thread(target=audio_capture).start()
threading.Thread(target=transcribe).start()