YoussefA7med commited on
Commit
ccc23c4
·
verified ·
1 Parent(s): e1aa210

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -19
app.py CHANGED
@@ -6,6 +6,9 @@ import gradio as gr
6
  from dotenv import load_dotenv
7
  import os
8
  import tempfile
 
 
 
9
 
10
  # Load environment variables
11
  load_dotenv()
@@ -22,6 +25,9 @@ TTS_PASSWORD = os.getenv("TTS_PASSWORD")
22
  TTS_VOICE = os.getenv("TTS_VOICE", "coral")
23
  TTS_SEED = int(os.getenv("TTS_SEED", "12345"))
24
 
 
 
 
25
  # التحقق من وجود المتغيرات المطلوبة
26
  required_env_vars = {
27
  "DEEPSEEK_API_KEY": API_KEY,
@@ -159,16 +165,44 @@ def format_response(response_dict):
159
  html += "</div>"
160
  return html
161
 
162
- def chat(message, history):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  """Handle chat interactions"""
164
- if not message:
165
- # Generate welcome message for empty input
166
- welcome = tutor.get_welcome_message()
167
- audio_path = tutor.text_to_speech(welcome)[0]
168
- return welcome, audio_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  # Get bot response
171
- response = tutor.get_bot_response(message)
172
 
173
  # Generate audio for the main response
174
  audio_path = tutor.text_to_speech(response["response"])[0]
@@ -176,24 +210,35 @@ def chat(message, history):
176
  # Format the complete response
177
  formatted_response = format_response(response)
178
 
179
- return formatted_response, audio_path
 
 
 
 
 
 
 
 
 
 
180
 
181
  # Create Gradio interface
182
  with gr.Blocks(css="footer {display: none}") as demo:
183
  gr.Markdown("# 🤖 Sam - Your English Tutor")
184
- gr.Markdown("Welcome to your personalized English learning session! Type your message below to start chatting.")
185
 
186
  chatbot = gr.Chatbot(
187
  show_label=False,
188
  height=400,
189
- bubble_full_width=False,
190
  )
191
 
192
  with gr.Row():
193
- txt = gr.Textbox(
194
- show_label=False,
195
- placeholder="Type your message here...",
196
- container=False,
 
197
  )
198
  audio_output = gr.Audio(
199
  label="Sam's Voice",
@@ -201,11 +246,25 @@ with gr.Blocks(css="footer {display: none}") as demo:
201
  type="filepath"
202
  )
203
 
204
- txt.submit(chat, [txt, chatbot], [chatbot, audio_output])
205
- txt.submit(lambda: "", [], [txt]) # Clear textbox after submit
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  # Launch the interface
208
  if __name__ == "__main__":
209
- # Generate welcome message at startup
210
- welcome = tutor.get_welcome_message()
211
- demo.launch()
 
 
 
6
  from dotenv import load_dotenv
7
  import os
8
  import tempfile
9
+ import speech_recognition as sr
10
+ import io
11
+ import soundfile as sf
12
 
13
  # Load environment variables
14
  load_dotenv()
 
25
  TTS_VOICE = os.getenv("TTS_VOICE", "coral")
26
  TTS_SEED = int(os.getenv("TTS_SEED", "12345"))
27
 
28
+ # إعداد Speech Recognition
29
+ recognizer = sr.Recognizer()
30
+
31
  # التحقق من وجود المتغيرات المطلوبة
32
  required_env_vars = {
33
  "DEEPSEEK_API_KEY": API_KEY,
 
165
  html += "</div>"
166
  return html
167
 
168
+ def speech_to_text(audio_path):
169
+ """Convert speech to text using speech_recognition"""
170
+ try:
171
+ # Load audio file
172
+ with sr.AudioFile(audio_path) as source:
173
+ # Record the audio file
174
+ audio = recognizer.record(source)
175
+ # Use Google Speech Recognition
176
+ text = recognizer.recognize_google(audio)
177
+ return text
178
+ except Exception as e:
179
+ print(f"Error in speech recognition: {str(e)}")
180
+ return None
181
+
182
+ def chat(audio, history):
183
  """Handle chat interactions"""
184
+ if audio is None:
185
+ # Return empty response if no audio
186
+ return history, None
187
+
188
+ # Convert audio to WAV format for speech recognition
189
+ audio_data = audio[1] # Get the numpy array
190
+ sample_rate = audio[0] # Get the sample rate
191
+
192
+ # Save as temporary WAV file
193
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
194
+ sf.write(temp_wav.name, audio_data, sample_rate)
195
+ # Convert speech to text
196
+ audio_text = speech_to_text(temp_wav.name)
197
+
198
+ # Clean up temporary file
199
+ os.unlink(temp_wav.name)
200
+
201
+ if not audio_text:
202
+ return history, None
203
 
204
  # Get bot response
205
+ response = tutor.get_bot_response(audio_text)
206
 
207
  # Generate audio for the main response
208
  audio_path = tutor.text_to_speech(response["response"])[0]
 
210
  # Format the complete response
211
  formatted_response = format_response(response)
212
 
213
+ # Update history in the correct format for gr.Chatbot
214
+ history = history or []
215
+ history.append((audio_text, formatted_response))
216
+
217
+ return history, audio_path
218
+
219
+ def show_welcome():
220
+ """Show welcome message on startup"""
221
+ welcome = tutor.get_welcome_message()
222
+ audio_path = tutor.text_to_speech(welcome)[0]
223
+ return [(None, welcome)], audio_path
224
 
225
  # Create Gradio interface
226
  with gr.Blocks(css="footer {display: none}") as demo:
227
  gr.Markdown("# 🤖 Sam - Your English Tutor")
228
+ gr.Markdown("Welcome to your personalized English learning session! Click the microphone and start speaking!")
229
 
230
  chatbot = gr.Chatbot(
231
  show_label=False,
232
  height=400,
233
+ type="messages"
234
  )
235
 
236
  with gr.Row():
237
+ audio_input = gr.Audio(
238
+ source="microphone",
239
+ type="numpy",
240
+ label="Speak here",
241
+ show_label=True
242
  )
243
  audio_output = gr.Audio(
244
  label="Sam's Voice",
 
246
  type="filepath"
247
  )
248
 
249
+ # Handle audio input
250
+ audio_input.stop_recording(
251
+ fn=chat,
252
+ inputs=[audio_input, chatbot],
253
+ outputs=[chatbot, audio_output],
254
+ queue=False
255
+ )
256
+
257
+ # Show welcome message on page load
258
+ demo.load_event(
259
+ fn=show_welcome,
260
+ inputs=None,
261
+ outputs=[chatbot, audio_output]
262
+ )
263
 
264
  # Launch the interface
265
  if __name__ == "__main__":
266
+ demo.launch(
267
+ server_name="0.0.0.0",
268
+ server_port=7860,
269
+ share=False
270
+ )