rakeshrohan commited on
Commit
07cf042
·
verified ·
1 Parent(s): a7fce0d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import gradio as gr
4
+ import uuid
5
+ import os
6
+ import speech_recognition as sr
7
+ from gtts import gTTS
8
+ from langchain_community.llms import Ollama
9
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
+ from langchain_community.chat_message_histories import ChatMessageHistory
11
+ from langchain_core.runnables.history import RunnableWithMessageHistory
12
+
13
+ # Initialize the model and prompt template
14
+ chat = Ollama(model="llama3:latest")
15
+
16
+ prompt = ChatPromptTemplate.from_messages([
17
+ ("system", """
18
+ You are a helpful AI assistant. Your task is to engage in conversation with users,
19
+ answer their questions, and assist them with various tasks.
20
+ Communicate politely and maintain focus on the user's needs.
21
+ Keep responses concise, typically two to three sentences.
22
+ """),
23
+ MessagesPlaceholder(variable_name="history"),
24
+ ("human", "{input}"),
25
+ ])
26
+
27
+ runnable = prompt | chat
28
+
29
+ with_message_history = RunnableWithMessageHistory(
30
+ runnable,
31
+ lambda session_id: ChatMessageHistory(),
32
+ input_messages_key="input",
33
+ history_messages_key="history",
34
+ )
35
+
36
+ def text_to_speech(text, file_name):
37
+ tts = gTTS(text=text, lang='en', slow=False)
38
+ file_path = os.path.join(os.getcwd(), file_name)
39
+ tts.save(file_path)
40
+ return file_path
41
+
42
+ def speech_to_text(audio):
43
+ if audio is None:
44
+ return "No audio input received."
45
+
46
+ recognizer = sr.Recognizer()
47
+ try:
48
+ with sr.AudioFile(audio) as source:
49
+ audio_data = recognizer.record(source)
50
+ try:
51
+ text = recognizer.recognize_google(audio_data)
52
+ print(text)
53
+ return text
54
+ except sr.UnknownValueError:
55
+ return "Speech recognition could not understand the audio"
56
+ except sr.RequestError:
57
+ return "Could not request results from the speech recognition service"
58
+ except Exception as e:
59
+ return f"Error processing audio: {str(e)}"
60
+
61
+ def chat_function(input_type, text_input=None, audio_input=None, history=None):
62
+ if history is None:
63
+ history = []
64
+
65
+ if input_type == "text":
66
+ user_input = text_input
67
+ elif input_type == "audio":
68
+ if audio_input is not None:
69
+ user_input = speech_to_text(audio_input)
70
+ else:
71
+ user_input = "No audio input received."
72
+ else:
73
+ return history, history, None
74
+
75
+ print(f"User input: {user_input}") # Debug information
76
+
77
+ # Get LLM response
78
+ response = with_message_history.invoke(
79
+ {"input": user_input},
80
+ config={"configurable": {"session_id": "chat_history"}},
81
+ )
82
+
83
+ # Generate audio for LLM response
84
+ audio_file = f"response_{uuid.uuid4()}.mp3"
85
+ audio_path = text_to_speech(response, audio_file)
86
+
87
+ # Update history in the correct format
88
+ history.append((user_input, response))
89
+
90
+ return history, history, audio_path
91
+
92
+ # Gradio interface
93
+ with gr.Blocks() as demo:
94
+ chatbot = gr.Chatbot()
95
+ with gr.Row():
96
+ text_input = gr.Textbox(placeholder="Type your message here...")
97
+ audio_input = gr.Audio(sources=['microphone'], type="filepath")
98
+ with gr.Row():
99
+ text_button = gr.Button("Send Text")
100
+ audio_button = gr.Button("Send Audio")
101
+ audio_output = gr.Audio()
102
+
103
+ def on_audio_change(audio):
104
+ if audio is not None:
105
+ return speech_to_text(audio)
106
+ return ""
107
+
108
+ audio_input.change(on_audio_change, inputs=[audio_input], outputs=[text_input])
109
+ text_button.click(chat_function, inputs=[gr.Textbox(value="text"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output])
110
+ audio_button.click(chat_function, inputs=[gr.Textbox(value="audio"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output])
111
+
112
+ demo.launch(server_name='192.168.3.151',share=True,upload_limit=10, max_threads=10)