| import speech_recognition as sr |
| import google.generativeai as genai |
| from dotenv import load_dotenv |
| import os |
| from gtts import gTTS |
| import streamlit as st |
|
|
| |
| st.set_page_config(page_title="Multilingual AI Assistant", layout="wide") |
|
|
| |
| load_dotenv() |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") |
| genai.configure(api_key=GOOGLE_API_KEY) |
|
|
| |
| if 'conversation_history' not in st.session_state: |
| st.session_state.conversation_history = [] |
|
|
| |
| LANGUAGE_MAP = { |
| "English": "en", |
| "Hindi": "hi", |
| "Spanish": "es", |
| "French": "fr", |
| "German": "de", |
| "Chinese": "zh" |
| } |
|
|
| |
| def voice_input(language='en'): |
| recognizer = sr.Recognizer() |
| with sr.Microphone() as source: |
| try: |
| st.info("ποΈ Adjusting for background noise... Please wait.") |
| recognizer.adjust_for_ambient_noise(source, duration=2) |
| st.success("β
Ready! Please speak now.") |
| audio = recognizer.listen(source) |
| st.info("π Recognizing speech...") |
|
|
| text = recognizer.recognize_google(audio, language=language) |
| st.success(f"π£οΈ You said: {text}") |
| return text |
| except sr.UnknownValueError: |
| st.error("β Sorry, I couldn't understand what you said.") |
| return None |
| except sr.RequestError as e: |
| st.error(f"β Error with speech recognition: {e}") |
| return None |
|
|
| |
| def text_to_speech(text, language='en'): |
| try: |
| tts = gTTS(text=text, lang=language) |
| tts.save("speech.mp3") |
| st.success("π Speech generated successfully!") |
| except Exception as e: |
| st.error(f"β Error generating speech: {e}") |
|
|
| |
| def llm_model_object(user_text): |
| model = genai.GenerativeModel('gemini-2.0-flash') |
| |
| |
| conversation_context = "\n".join(st.session_state.conversation_history[-6:]) |
| prompt = f"{conversation_context}\nUser: {user_text}\nAI:" |
|
|
| try: |
| response = model.generate_content(prompt) |
| ai_response = response.text |
|
|
| |
| st.session_state.conversation_history.append(f"User: {user_text}") |
| st.session_state.conversation_history.append(f"AI: {ai_response}") |
|
|
| return ai_response |
| except Exception as e: |
| st.error(f"β Error from Gemini AI: {e}") |
| return "I'm sorry, there was an error generating a response." |
|
|
| |
| def display_chat(): |
| st.markdown(""" |
| <style> |
| .chat-container { |
| height: 450px; |
| overflow-y: auto; |
| border-radius: 10px; |
| border: 1px solid #ccc; |
| padding: 15px; |
| background-color: #F9F9F9; |
| } |
| .user-message { |
| background-color: #DCF8C6; |
| padding: 12px; |
| border-radius: 20px; |
| margin-bottom: 10px; |
| text-align: right; |
| color: #333; |
| max-width: 70%; |
| margin-left: auto; |
| } |
| .ai-message { |
| background-color: #E5E5EA; |
| padding: 12px; |
| border-radius: 20px; |
| margin-bottom: 10px; |
| text-align: left; |
| color: #333; |
| max-width: 70%; |
| margin-right: auto; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| st.markdown("<div class='chat-container'>", unsafe_allow_html=True) |
| for msg in st.session_state.conversation_history: |
| if msg.startswith("User:"): |
| st.markdown(f"<div class='user-message'>{msg.replace('User: ', '')}</div>", unsafe_allow_html=True) |
| else: |
| st.markdown(f"<div class='ai-message'>{msg.replace('AI: ', '')}</div>", unsafe_allow_html=True) |
| st.markdown("</div>", unsafe_allow_html=True) |
|
|
| |
| def main(): |
| st.markdown("<h1 style='text-align: center; font-size: 60px; color: #1F51FF;'>π Multilingual Voice-Based AI Assistant</h1>", unsafe_allow_html=True) |
| |
| |
| col1, col2 = st.columns(2) |
| with col1: |
| input_lang_name = st.selectbox("π Select Input Language:", list(LANGUAGE_MAP.keys())) |
| with col2: |
| output_lang_name = st.selectbox("π Select Output Language:", list(LANGUAGE_MAP.keys())) |
|
|
| input_lang = LANGUAGE_MAP[input_lang_name] |
| output_lang = LANGUAGE_MAP[output_lang_name] |
|
|
| |
| display_chat() |
|
|
| |
| input_method = st.radio("π€ Choose Input Method:", ["Voice", "Text"]) |
|
|
| |
| user_text = "" |
| if input_method == "Voice": |
| if st.button("ποΈ Start Speaking"): |
| user_text = voice_input(language=input_lang) |
|
|
| |
| else: |
| user_text = st.text_input("βοΈ Type your message here") |
| if st.button("Send"): |
| if not user_text.strip(): |
| st.warning("β Please enter a valid message.") |
| |
| if user_text: |
| with st.spinner("π§ Generating AI Response..."): |
| response_text = llm_model_object(user_text) |
| |
| st.empty() |
|
|
| if response_text: |
| with st.spinner("π Converting AI Response to Speech..."): |
| text_to_speech(response_text, language=output_lang) |
|
|
| st.empty() |
|
|
| |
| st.markdown(f"**π€ AI Response:** {response_text}") |
|
|
| |
| try: |
| with open("speech.mp3", "rb") as audio_file: |
| audio_bytes = audio_file.read() |
| st.audio(audio_bytes, format="audio/mp3") |
| st.download_button("π₯ Download AI Response", data=audio_bytes, file_name="response.mp3", mime="audio/mp3") |
| except FileNotFoundError: |
| st.error("β Error: Audio file not found.") |
| else: |
| st.error("β No response generated by the AI.") |
| |
|
|
| |
| |
| if __name__ == "__main__": |
| main() |
|
|