Spaces:
Sleeping
Sleeping
| # app.py | |
| import streamlit as st | |
| from utils import save_uploaded_audio, get_voice_preset, load_chat_history, list_chat_histories, save_chat_history | |
| from voice_cloner import clone_and_generate_text | |
| import os | |
| import whisper | |
| import torchaudio | |
| from groq import Groq | |
| from groq_llm import get_groq_response | |
| # Load Whisper model for transcription | |
| whisper_model = whisper.load_model("tiny") | |
| # Initialize Groq LLM client | |
| groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| st.set_page_config(page_title="Voice Chat", layout="centered") | |
| st.title("π€π¬ Voice & Text Chat using Your Cloned Voice") | |
| # Store cloned voice path and voice gender | |
| if "clone_path" not in st.session_state: | |
| st.session_state.clone_path = None | |
| if "voice_gender" not in st.session_state: | |
| st.session_state.voice_gender = "Male" # Default to Male | |
| if "voice_name" not in st.session_state: | |
| st.session_state.voice_name = "MyVoice" | |
| if "emotion" not in st.session_state: | |
| st.session_state.emotion = "Neutral" | |
| if "language" not in st.session_state: | |
| st.session_state.language = "English" | |
| if "ai_persona" not in st.session_state: | |
| st.session_state.ai_persona = "Assistant" | |
| # Sidebar for voice cloning setup | |
| st.sidebar.header("𧬠Setup Your Clone Voice") | |
| voice_option = st.sidebar.radio("Choose how to provide clone voice", ["Upload Voice"]) | |
| st.session_state.voice_gender = st.sidebar.selectbox("Select Voice Gender", ["Male", "Female"]) | |
| st.session_state.voice_name = st.sidebar.text_input("Name your voice", value=st.session_state.voice_name) | |
| st.session_state.emotion = st.sidebar.selectbox("Select Emotion", ["Neutral", "Happy", "Sad", "Angry", "Excited", "Calm"]) | |
| st.session_state.language = st.sidebar.selectbox("Select Language", ["English", "Urdu", "Hindi", "Arabic", "Spanish"]) | |
| st.session_state.ai_persona = st.sidebar.selectbox("Select AI Personality", ["Assistant", "Urdu Teacher", "Wise Mentor", "Chill Friend", "Formal Assistant"]) | |
| if voice_option == "Upload Voice": | |
| uploaded = st.sidebar.file_uploader("Upload a voice sample", type=["wav", "mp3", "m4a", "flac", "ogg"]) | |
| if uploaded: | |
| path = save_uploaded_audio(uploaded, "reference_voice.wav") | |
| st.session_state.clone_path = path | |
| st.success(f"β Voice '{st.session_state.voice_name}' uploaded and saved as your clone voice.") | |
| # --- Conversation section --- | |
| st.subheader("π£οΈ Ask with voice or type text below") | |
| tab1, tab2 = st.tabs(["π€ Voice Input", "π¬ Text Input"]) | |
| # --- VOICE INPUT TAB --- | |
| with tab1: | |
| user_voice = st.file_uploader("Upload your voice question", type=["wav", "mp3", "m4a", "flac", "ogg"]) | |
| if user_voice: | |
| user_voice_path = save_uploaded_audio(user_voice, "user_question.wav") | |
| st.audio(user_voice_path) | |
| # Step 1: Transcribe voice | |
| st.info("Transcribing...") | |
| result = whisper_model.transcribe(user_voice_path) | |
| user_text = result["text"] | |
| st.success(f"π You said: {user_text}") | |
| # Step 2: Get LLM response | |
| st.info("Thinking...") | |
| persona_prompt = f"You are a {st.session_state.ai_persona}. Respond in {st.session_state.language} with a {st.session_state.emotion} tone." | |
| response = groq_client.chat.completions.create( | |
| model="llama3-8b-8192", | |
| messages=[{"role": "system", "content": persona_prompt}, {"role": "user", "content": user_text}] | |
| ) | |
| reply = response.choices[0].message.content | |
| st.success(f"π€ AI says: {reply}") | |
| # Step 3: Voice reply | |
| if st.session_state.clone_path: | |
| st.info("Cloning voice reply...") | |
| voice_preset = get_voice_preset(st.session_state.voice_gender, st.session_state.emotion) | |
| voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path, voice_preset) | |
| st.audio(voice_output_path) | |
| else: | |
| st.warning("Upload your voice clone first in the sidebar.") | |
| # --- TEXT INPUT TAB --- | |
| with tab2: | |
| user_input = st.text_input("Type your question here:") | |
| if st.button("Send Text"): | |
| if user_input.strip() == "": | |
| st.warning("Please enter a message.") | |
| else: | |
| # Step 1: Get LLM response | |
| st.info("Thinking...") | |
| persona_prompt = f"You are a {st.session_state.ai_persona}. Respond in {st.session_state.language} with a {st.session_state.emotion} tone." | |
| response = groq_client.chat.completions.create( | |
| model="llama3-8b-8192", | |
| messages=[{"role": "system", "content": persona_prompt}, {"role": "user", "content": user_input}] | |
| ) | |
| reply = response.choices[0].message.content | |
| st.success(f"π€ AI says: {reply}") | |
| # Step 2: Voice reply | |
| if st.session_state.clone_path: | |
| st.info("Cloning voice reply...") | |
| voice_preset = get_voice_preset(st.session_state.voice_gender, st.session_state.emotion) | |
| voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path, voice_preset) | |
| st.audio(voice_output_path) | |
| else: | |
| st.warning("Upload your voice clone first in the sidebar.") | |