STT-TTS-Chatbot / models /STTTTS.py
lI7Il
Demo
edb4d34
from . import *
# Settings
SAMPLE_RATE = 16000
CHANNELS = 1
RECORD_FILE = './recording.wav'
SPEECH_FILE = './speech.wav'
def transcribe(audio_data):
# Save to an in-memory WAV file
# buffer = io.BytesIO()
# sf.write(buffer, audio_data, SAMPLE_RATE, format="WAV")
# buffer.seek(0)
# with open(RECORD_FILE, "wb") as f:
# f.write(buffer.getbuffer())
# print("Transcribing...")
RECORD_FILE = audio_data
with open(RECORD_FILE, 'rb') as file:
response = client.audio.transcriptions.create(
file=file,
model="whisper-large-v3",
prompt="You are transcribing questions about a company called 'Summit' in English or 'سَمِت' in Arabic", # Optional
response_format="verbose_json", # Optional
temperature=0.0 # Optional
)
print("Language:", response.language.strip())
print("Transcription:", response.text.strip())
return response.text.strip(), response.language.strip()
def synthesize(text, lang):
# Choose the right model and voice based on the language
if lang == 'English':
model = 'playai-tts'
voice = 'Fritz-PlayAI'
elif lang == 'Arabic':
model = 'playai-tts-arabic'
voice = 'Ahmad-PlayAI'
# Synthesize
response = client.audio.speech.create(
model=model,
voice=voice,
input=text,
response_format='wav'
)
# Save to file
response.write_to_file(SPEECH_FILE)
# Read the file and play it
data, samplerate = sf.read(SPEECH_FILE)
sd.play(data, samplerate)
sd.wait() # Busy wait until speech is played