Spaces:

meraj12
/

cloningapp

Sleeping

File size: 5,197 Bytes

566375b
0ec1227
5e98a13
 
0ec1227
 
5e98a13
0ec1227
5e98a13
0ec1227
5e98a13
0ec1227
 
5e98a13
0ec1227
 
ff8e2d1
b2cade9
0ec1227
5e98a13
0ec1227
 
5e98a13
 
5e80325
 
 
 
 
 
 
 
0ec1227
5e98a13
b2cade9
 
5e98a13
5e80325
 
 
 
0ec1227
ff8e2d1
bd25b7f
0ec1227
ff8e2d1
0ec1227
5e80325
b2cade9
5e98a13
b2cade9
5e98a13
b2cade9
 
5e98a13
b2cade9
 
 
 
 
 
5e98a13
b2cade9
5e98a13
 
b2cade9
 
5e98a13
b2cade9
5e80325
5e98a13
 
5e80325
5e98a13
 
b2cade9
 
5e98a13
b2cade9
 
5e80325
5e98a13
b2cade9
 
 
 
5e98a13
b2cade9
 
 
 
 
 
5e98a13
b2cade9
5e80325
5e98a13
 
5e80325
5e98a13
 
b2cade9
 
5e98a13
b2cade9
 
5e80325
5e98a13
b2cade9

# app.py
import streamlit as st
from utils import save_uploaded_audio, get_voice_preset, load_chat_history, list_chat_histories, save_chat_history
from voice_cloner import clone_and_generate_text
import os
import whisper
import torchaudio
from groq import Groq
from groq_llm import get_groq_response

# Load Whisper model for transcription
whisper_model = whisper.load_model("tiny")

# Initialize Groq LLM client
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

st.set_page_config(page_title="Voice Chat", layout="centered")
st.title("🎤💬 Voice & Text Chat using Your Cloned Voice")

# Store cloned voice path and voice gender
if "clone_path" not in st.session_state:
    st.session_state.clone_path = None
if "voice_gender" not in st.session_state:
    st.session_state.voice_gender = "Male"  # Default to Male
if "voice_name" not in st.session_state:
    st.session_state.voice_name = "MyVoice"
if "emotion" not in st.session_state:
    st.session_state.emotion = "Neutral"
if "language" not in st.session_state:
    st.session_state.language = "English"
if "ai_persona" not in st.session_state:
    st.session_state.ai_persona = "Assistant"

# Sidebar for voice cloning setup
st.sidebar.header("🧬 Setup Your Clone Voice")
voice_option = st.sidebar.radio("Choose how to provide clone voice", ["Upload Voice"])
st.session_state.voice_gender = st.sidebar.selectbox("Select Voice Gender", ["Male", "Female"])
st.session_state.voice_name = st.sidebar.text_input("Name your voice", value=st.session_state.voice_name)
st.session_state.emotion = st.sidebar.selectbox("Select Emotion", ["Neutral", "Happy", "Sad", "Angry", "Excited", "Calm"])
st.session_state.language = st.sidebar.selectbox("Select Language", ["English", "Urdu", "Hindi", "Arabic", "Spanish"])
st.session_state.ai_persona = st.sidebar.selectbox("Select AI Personality", ["Assistant", "Urdu Teacher", "Wise Mentor", "Chill Friend", "Formal Assistant"])

if voice_option == "Upload Voice":
    uploaded = st.sidebar.file_uploader("Upload a voice sample", type=["wav", "mp3", "m4a", "flac", "ogg"])
    if uploaded:
        path = save_uploaded_audio(uploaded, "reference_voice.wav")
        st.session_state.clone_path = path
        st.success(f"✅ Voice '{st.session_state.voice_name}' uploaded and saved as your clone voice.")

# --- Conversation section ---
st.subheader("🗣️ Ask with voice or type text below")

tab1, tab2 = st.tabs(["🎤 Voice Input", "💬 Text Input"])

# --- VOICE INPUT TAB ---
with tab1:
    user_voice = st.file_uploader("Upload your voice question", type=["wav", "mp3", "m4a", "flac", "ogg"])
    if user_voice:
        user_voice_path = save_uploaded_audio(user_voice, "user_question.wav")
        st.audio(user_voice_path)

        # Step 1: Transcribe voice
        st.info("Transcribing...")
        result = whisper_model.transcribe(user_voice_path)
        user_text = result["text"]
        st.success(f"📝 You said: {user_text}")

        # Step 2: Get LLM response
        st.info("Thinking...")
        persona_prompt = f"You are a {st.session_state.ai_persona}. Respond in {st.session_state.language} with a {st.session_state.emotion} tone."
        response = groq_client.chat.completions.create(
            model="llama3-8b-8192",
            messages=[{"role": "system", "content": persona_prompt}, {"role": "user", "content": user_text}]
        )
        reply = response.choices[0].message.content
        st.success(f"🤖 AI says: {reply}")

        # Step 3: Voice reply
        if st.session_state.clone_path:
            st.info("Cloning voice reply...")
            voice_preset = get_voice_preset(st.session_state.voice_gender, st.session_state.emotion)
            voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path, voice_preset)
            st.audio(voice_output_path)
        else:
            st.warning("Upload your voice clone first in the sidebar.")

# --- TEXT INPUT TAB ---
with tab2:
    user_input = st.text_input("Type your question here:")
    if st.button("Send Text"):
        if user_input.strip() == "":
            st.warning("Please enter a message.")
        else:
            # Step 1: Get LLM response
            st.info("Thinking...")
            persona_prompt = f"You are a {st.session_state.ai_persona}. Respond in {st.session_state.language} with a {st.session_state.emotion} tone."
            response = groq_client.chat.completions.create(
                model="llama3-8b-8192",
                messages=[{"role": "system", "content": persona_prompt}, {"role": "user", "content": user_input}]
            )
            reply = response.choices[0].message.content
            st.success(f"🤖 AI says: {reply}")

            # Step 2: Voice reply
            if st.session_state.clone_path:
                st.info("Cloning voice reply...")
                voice_preset = get_voice_preset(st.session_state.voice_gender, st.session_state.emotion)
                voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path, voice_preset)
                st.audio(voice_output_path)
            else:
                st.warning("Upload your voice clone first in the sidebar.")