Mtkhang90's picture
Update app.py
d08ab17 verified
import gradio as gr
import speech_recognition as sr
import requests
from TTS.api import TTS
import os
import numpy as np
import soundfile as sf
# Initialize TTS model
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
# Groq API key
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
def voice_chat(audio):
if audio is None:
return "No audio input detected.", None
audio_array, sr_rate = audio
sf.write("temp.wav", audio_array, sr_rate)
# SpeechRecognition setup
recognizer = sr.Recognizer()
with sr.AudioFile("temp.wav") as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data)
except sr.UnknownValueError:
return "Could not understand audio.", None
except sr.RequestError as e:
return f"Speech Recognition error: {e}", None
# Call Groq LLM
response = requests.post(
"https://api.groq.com/openai/v1/chat/completions",
headers={
"Authorization": f"Bearer {GROQ_API_KEY}",
"Content-Type": "application/json"
},
json={
"model": "llama3-8b-8192",
"messages": [{"role": "user", "content": text}]
}
)
llm_text = response.json()["choices"][0]["message"]["content"]
# Generate TTS audio file
tts.tts_to_file(text=llm_text, file_path="response.wav")
return llm_text, "response.wav"
demo = gr.Interface(
fn=voice_chat,
inputs=gr.Audio(sources=["microphone", "upload"], type="numpy", label="🎀 Speak or upload"),
outputs=[gr.Textbox(label="Groq Response"), gr.Audio(label="AI Voice")],
title="πŸ“š Speech-to-Text-to-Speech with Groq LLM and TTS"
)
if __name__ == "__main__":
demo.launch()