# app.py import streamlit as st from utils import save_uploaded_audio, get_voice_preset, load_chat_history, list_chat_histories, save_chat_history from voice_cloner import clone_and_generate_text import os import whisper import torchaudio from groq import Groq from groq_llm import get_groq_response # Load Whisper model for transcription whisper_model = whisper.load_model("tiny") # Initialize Groq LLM client groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) st.set_page_config(page_title="Voice Chat", layout="centered") st.title("🎤💬 Voice & Text Chat using Your Cloned Voice") # Store cloned voice path and voice gender if "clone_path" not in st.session_state: st.session_state.clone_path = None if "voice_gender" not in st.session_state: st.session_state.voice_gender = "Male" # Default to Male if "voice_name" not in st.session_state: st.session_state.voice_name = "MyVoice" if "emotion" not in st.session_state: st.session_state.emotion = "Neutral" if "language" not in st.session_state: st.session_state.language = "English" if "ai_persona" not in st.session_state: st.session_state.ai_persona = "Assistant" # Sidebar for voice cloning setup st.sidebar.header("🧬 Setup Your Clone Voice") voice_option = st.sidebar.radio("Choose how to provide clone voice", ["Upload Voice"]) st.session_state.voice_gender = st.sidebar.selectbox("Select Voice Gender", ["Male", "Female"]) st.session_state.voice_name = st.sidebar.text_input("Name your voice", value=st.session_state.voice_name) st.session_state.emotion = st.sidebar.selectbox("Select Emotion", ["Neutral", "Happy", "Sad", "Angry", "Excited", "Calm"]) st.session_state.language = st.sidebar.selectbox("Select Language", ["English", "Urdu", "Hindi", "Arabic", "Spanish"]) st.session_state.ai_persona = st.sidebar.selectbox("Select AI Personality", ["Assistant", "Urdu Teacher", "Wise Mentor", "Chill Friend", "Formal Assistant"]) if voice_option == "Upload Voice": uploaded = st.sidebar.file_uploader("Upload a voice sample", type=["wav", "mp3", "m4a", "flac", "ogg"]) if uploaded: path = save_uploaded_audio(uploaded, "reference_voice.wav") st.session_state.clone_path = path st.success(f"✅ Voice '{st.session_state.voice_name}' uploaded and saved as your clone voice.") # --- Conversation section --- st.subheader("🗣️ Ask with voice or type text below") tab1, tab2 = st.tabs(["🎤 Voice Input", "💬 Text Input"]) # --- VOICE INPUT TAB --- with tab1: user_voice = st.file_uploader("Upload your voice question", type=["wav", "mp3", "m4a", "flac", "ogg"]) if user_voice: user_voice_path = save_uploaded_audio(user_voice, "user_question.wav") st.audio(user_voice_path) # Step 1: Transcribe voice st.info("Transcribing...") result = whisper_model.transcribe(user_voice_path) user_text = result["text"] st.success(f"📝 You said: {user_text}") # Step 2: Get LLM response st.info("Thinking...") persona_prompt = f"You are a {st.session_state.ai_persona}. Respond in {st.session_state.language} with a {st.session_state.emotion} tone." response = groq_client.chat.completions.create( model="llama3-8b-8192", messages=[{"role": "system", "content": persona_prompt}, {"role": "user", "content": user_text}] ) reply = response.choices[0].message.content st.success(f"🤖 AI says: {reply}") # Step 3: Voice reply if st.session_state.clone_path: st.info("Cloning voice reply...") voice_preset = get_voice_preset(st.session_state.voice_gender, st.session_state.emotion) voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path, voice_preset) st.audio(voice_output_path) else: st.warning("Upload your voice clone first in the sidebar.") # --- TEXT INPUT TAB --- with tab2: user_input = st.text_input("Type your question here:") if st.button("Send Text"): if user_input.strip() == "": st.warning("Please enter a message.") else: # Step 1: Get LLM response st.info("Thinking...") persona_prompt = f"You are a {st.session_state.ai_persona}. Respond in {st.session_state.language} with a {st.session_state.emotion} tone." response = groq_client.chat.completions.create( model="llama3-8b-8192", messages=[{"role": "system", "content": persona_prompt}, {"role": "user", "content": user_input}] ) reply = response.choices[0].message.content st.success(f"🤖 AI says: {reply}") # Step 2: Voice reply if st.session_state.clone_path: st.info("Cloning voice reply...") voice_preset = get_voice_preset(st.session_state.voice_gender, st.session_state.emotion) voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path, voice_preset) st.audio(voice_output_path) else: st.warning("Upload your voice clone first in the sidebar.")