voice_chatbot / app.py
Dua Rajper
Update app.py
190bcb0 verified
raw
history blame
2.3 kB
import os
import streamlit as st
from groq import Groq
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline # Import pipeline
from espnet2.bin.tts_inference import Text2Speech
import soundfile as sf
from pydub import AudioSegment
import io
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Load Groq API key from .env file
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
st.error("Groq API key not found. Please add it to the .env file.")
st.stop()
# Initialize Groq client
groq_client = Groq(api_key=GROQ_API_KEY)
# Load models
@st.cache_resource # Use st.cache_resource for caching models
def load_models():
# Speech-to-Text
processor = AutoProcessor.from_pretrained("openai/whisper-small")
stt_model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small")
stt_pipe = pipeline(
"automatic-speech-recognition",
model=stt_model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor
)
# Text-to-Speech
tts_model = Text2Speech.from_pretrained("espnet/espnet_tts_vctk_espnet_spk_voxceleb12_rawnet")
return stt_pipe, tts_model
stt_pipe, tts_model = load_models()
# Streamlit app
st.title("Voice-Enabled Chatbot")
# Audio input
audio_file = st.file_uploader("Upload your voice input", type=['wav'])
if audio_file is not None:
audio_bytes = audio_file.read()
audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
audio.export("temp.wav", format="wav")
speech, _ = sf.read("temp.wav")
text = stt_pipe(speech)['text']
st.write("Transcribed Text:", text)
# Generate response using Groq API
try:
chat_completion = groq_client.chat.completions.create(
messages=[{"role": "user", "content": text}],
model="mixtral-8x7b-32768",
temperature=0.5,
max_tokens=1024
)
response = chat_completion.choices[0].message.content
st.write("Generated Response:", response)
# Convert response to speech
speech, *_ = tts_model(response)
sf.write("response.wav", speech, 22050)
st.audio("response.wav")
except Exception as e:
st.error(f"Error generating response: {e}")