Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import uuid | |
| import os | |
| import speech_recognition as sr | |
| from gtts import gTTS | |
| from langchain_community.llms import Ollama | |
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from langchain_community.chat_message_histories import ChatMessageHistory | |
| from langchain_core.runnables.history import RunnableWithMessageHistory | |
| # Initialize the model and prompt template | |
| chat = Ollama(model="llama3:latest") | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", """ | |
| You are a helpful AI assistant. Your task is to engage in conversation with users, | |
| answer their questions, and assist them with various tasks. | |
| Communicate politely and maintain focus on the user's needs. | |
| Keep responses concise, typically two to three sentences. | |
| """), | |
| MessagesPlaceholder(variable_name="history"), | |
| ("human", "{input}"), | |
| ]) | |
| runnable = prompt | chat | |
| with_message_history = RunnableWithMessageHistory( | |
| runnable, | |
| lambda session_id: ChatMessageHistory(), | |
| input_messages_key="input", | |
| history_messages_key="history", | |
| ) | |
| def text_to_speech(text, file_name): | |
| tts = gTTS(text=text, lang='en', slow=False) | |
| file_path = os.path.join(os.getcwd(), file_name) | |
| tts.save(file_path) | |
| return file_path | |
| def speech_to_text(audio): | |
| if audio is None: | |
| return "No audio input received." | |
| recognizer = sr.Recognizer() | |
| try: | |
| with sr.AudioFile(audio) as source: | |
| audio_data = recognizer.record(source) | |
| try: | |
| text = recognizer.recognize_google(audio_data) | |
| print(text) | |
| return text | |
| except sr.UnknownValueError: | |
| return "Speech recognition could not understand the audio" | |
| except sr.RequestError: | |
| return "Could not request results from the speech recognition service" | |
| except Exception as e: | |
| return f"Error processing audio: {str(e)}" | |
| def chat_function(input_type, text_input=None, audio_input=None, history=None): | |
| if history is None: | |
| history = [] | |
| if input_type == "text": | |
| user_input = text_input | |
| elif input_type == "audio": | |
| if audio_input is not None: | |
| user_input = speech_to_text(audio_input) | |
| else: | |
| user_input = "No audio input received." | |
| else: | |
| return history, history, None | |
| print(f"User input: {user_input}") # Debug information | |
| # Get LLM response | |
| response = with_message_history.invoke( | |
| {"input": user_input}, | |
| config={"configurable": {"session_id": "chat_history"}}, | |
| ) | |
| # Generate audio for LLM response | |
| audio_file = f"response_{uuid.uuid4()}.mp3" | |
| audio_path = text_to_speech(response, audio_file) | |
| # Update history in the correct format | |
| history.append((user_input, response)) | |
| return history, history, audio_path | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot() | |
| with gr.Row(): | |
| text_input = gr.Textbox(placeholder="Type your message here...") | |
| audio_input = gr.Audio(sources=['microphone'], type="filepath") | |
| with gr.Row(): | |
| text_button = gr.Button("Send Text") | |
| audio_button = gr.Button("Send Audio") | |
| audio_output = gr.Audio() | |
| def on_audio_change(audio): | |
| if audio is not None: | |
| return speech_to_text(audio) | |
| return "" | |
| audio_input.change(on_audio_change, inputs=[audio_input], outputs=[text_input]) | |
| text_button.click(chat_function, inputs=[gr.Textbox(value="text"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output]) | |
| audio_button.click(chat_function, inputs=[gr.Textbox(value="audio"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output]) | |
| demo.launch(server_name='0.0.0.0',share=True,max_threads=10) |