Spaces:
Sleeping
Sleeping
File size: 5,197 Bytes
566375b 0ec1227 5e98a13 0ec1227 5e98a13 0ec1227 5e98a13 0ec1227 5e98a13 0ec1227 5e98a13 0ec1227 ff8e2d1 b2cade9 0ec1227 5e98a13 0ec1227 5e98a13 5e80325 0ec1227 5e98a13 b2cade9 5e98a13 5e80325 0ec1227 ff8e2d1 bd25b7f 0ec1227 ff8e2d1 0ec1227 5e80325 b2cade9 5e98a13 b2cade9 5e98a13 b2cade9 5e98a13 b2cade9 5e98a13 b2cade9 5e98a13 b2cade9 5e98a13 b2cade9 5e80325 5e98a13 5e80325 5e98a13 b2cade9 5e98a13 b2cade9 5e80325 5e98a13 b2cade9 5e98a13 b2cade9 5e98a13 b2cade9 5e80325 5e98a13 5e80325 5e98a13 b2cade9 5e98a13 b2cade9 5e80325 5e98a13 b2cade9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | # app.py
import streamlit as st
from utils import save_uploaded_audio, get_voice_preset, load_chat_history, list_chat_histories, save_chat_history
from voice_cloner import clone_and_generate_text
import os
import whisper
import torchaudio
from groq import Groq
from groq_llm import get_groq_response
# Load Whisper model for transcription
whisper_model = whisper.load_model("tiny")
# Initialize Groq LLM client
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
st.set_page_config(page_title="Voice Chat", layout="centered")
st.title("π€π¬ Voice & Text Chat using Your Cloned Voice")
# Store cloned voice path and voice gender
if "clone_path" not in st.session_state:
st.session_state.clone_path = None
if "voice_gender" not in st.session_state:
st.session_state.voice_gender = "Male" # Default to Male
if "voice_name" not in st.session_state:
st.session_state.voice_name = "MyVoice"
if "emotion" not in st.session_state:
st.session_state.emotion = "Neutral"
if "language" not in st.session_state:
st.session_state.language = "English"
if "ai_persona" not in st.session_state:
st.session_state.ai_persona = "Assistant"
# Sidebar for voice cloning setup
st.sidebar.header("𧬠Setup Your Clone Voice")
voice_option = st.sidebar.radio("Choose how to provide clone voice", ["Upload Voice"])
st.session_state.voice_gender = st.sidebar.selectbox("Select Voice Gender", ["Male", "Female"])
st.session_state.voice_name = st.sidebar.text_input("Name your voice", value=st.session_state.voice_name)
st.session_state.emotion = st.sidebar.selectbox("Select Emotion", ["Neutral", "Happy", "Sad", "Angry", "Excited", "Calm"])
st.session_state.language = st.sidebar.selectbox("Select Language", ["English", "Urdu", "Hindi", "Arabic", "Spanish"])
st.session_state.ai_persona = st.sidebar.selectbox("Select AI Personality", ["Assistant", "Urdu Teacher", "Wise Mentor", "Chill Friend", "Formal Assistant"])
if voice_option == "Upload Voice":
uploaded = st.sidebar.file_uploader("Upload a voice sample", type=["wav", "mp3", "m4a", "flac", "ogg"])
if uploaded:
path = save_uploaded_audio(uploaded, "reference_voice.wav")
st.session_state.clone_path = path
st.success(f"β
Voice '{st.session_state.voice_name}' uploaded and saved as your clone voice.")
# --- Conversation section ---
st.subheader("π£οΈ Ask with voice or type text below")
tab1, tab2 = st.tabs(["π€ Voice Input", "π¬ Text Input"])
# --- VOICE INPUT TAB ---
with tab1:
user_voice = st.file_uploader("Upload your voice question", type=["wav", "mp3", "m4a", "flac", "ogg"])
if user_voice:
user_voice_path = save_uploaded_audio(user_voice, "user_question.wav")
st.audio(user_voice_path)
# Step 1: Transcribe voice
st.info("Transcribing...")
result = whisper_model.transcribe(user_voice_path)
user_text = result["text"]
st.success(f"π You said: {user_text}")
# Step 2: Get LLM response
st.info("Thinking...")
persona_prompt = f"You are a {st.session_state.ai_persona}. Respond in {st.session_state.language} with a {st.session_state.emotion} tone."
response = groq_client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "system", "content": persona_prompt}, {"role": "user", "content": user_text}]
)
reply = response.choices[0].message.content
st.success(f"π€ AI says: {reply}")
# Step 3: Voice reply
if st.session_state.clone_path:
st.info("Cloning voice reply...")
voice_preset = get_voice_preset(st.session_state.voice_gender, st.session_state.emotion)
voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path, voice_preset)
st.audio(voice_output_path)
else:
st.warning("Upload your voice clone first in the sidebar.")
# --- TEXT INPUT TAB ---
with tab2:
user_input = st.text_input("Type your question here:")
if st.button("Send Text"):
if user_input.strip() == "":
st.warning("Please enter a message.")
else:
# Step 1: Get LLM response
st.info("Thinking...")
persona_prompt = f"You are a {st.session_state.ai_persona}. Respond in {st.session_state.language} with a {st.session_state.emotion} tone."
response = groq_client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "system", "content": persona_prompt}, {"role": "user", "content": user_input}]
)
reply = response.choices[0].message.content
st.success(f"π€ AI says: {reply}")
# Step 2: Voice reply
if st.session_state.clone_path:
st.info("Cloning voice reply...")
voice_preset = get_voice_preset(st.session_state.voice_gender, st.session_state.emotion)
voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path, voice_preset)
st.audio(voice_output_path)
else:
st.warning("Upload your voice clone first in the sidebar.")
|