YoussefA7med commited on
Commit
d4e37c6
·
verified ·
1 Parent(s): f8538a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -63
app.py CHANGED
@@ -8,25 +8,18 @@ import os
8
  import speech_recognition as sr
9
  from pydub import AudioSegment
10
 
11
- # تحميل المتغيرات البيئية
12
  load_dotenv()
13
 
14
- # إعدادات API
15
  API_KEY = os.getenv("DEEPSEEK_API_KEY")
16
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
17
  TTS_PASSWORD = os.getenv("TTS_PASSWORD")
18
 
19
- # التأكد من وجود المتغيرات المطلوبة
20
  if not all([API_KEY, HF_TOKEN, TTS_PASSWORD]):
21
  raise ValueError("Missing required environment variables!")
22
 
23
- # إعداد TTS
24
  TTS_CLIENT = Client("KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited", hf_token=HF_TOKEN)
25
-
26
- # إعداد محرك تحويل الكلام لنص
27
  recognizer = sr.Recognizer()
28
 
29
- # البرومبت الرئيسي
30
  MAIN_SYSTEM_PROMPT = {
31
  "role": "system",
32
  "content": """You are Sam, an intelligent and adaptive English tutor. Your responses must be in JSON format with these keys:
@@ -36,7 +29,6 @@ MAIN_SYSTEM_PROMPT = {
36
  'level_assessment': Current assessment of user's English level (beginner/intermediate/advanced),
37
  'encouragement': A motivating comment,
38
  'context_memory': Important details about the user to remember (interests, job, etc.)
39
-
40
  Guidelines:
41
  1. Maintain natural conversation while gathering user information
42
  2. Adapt language complexity to user's level
@@ -52,11 +44,9 @@ MAIN_SYSTEM_PROMPT = {
52
  - If user mentions hobbies → Engage in that topic
53
  - For beginners → Use simple words and short sentences
54
  - For intermediate/advanced → Introduce more complex vocabulary
55
-
56
  Always maintain conversation history and adapt based on it."""
57
  }
58
 
59
- # برومبت الترحيب
60
  WELCOME_PROMPT = {
61
  "role": "system",
62
  "content": """Create a warm welcome message that:
@@ -80,10 +70,8 @@ class EnglishTutor:
80
  "profession": None,
81
  "goals": None
82
  }
83
- self.is_first_interaction = True
84
 
85
  def get_welcome_message(self):
86
- """توليد رسالة ترحيب"""
87
  try:
88
  response = requests.post(
89
  "https://api.deepseek.com/v1/chat/completions",
@@ -102,9 +90,7 @@ class EnglishTutor:
102
  return "Hi! I'm Sam, your English tutor. What's your name?"
103
 
104
  def get_bot_response(self, user_message):
105
- """الحصول على رد من البوت"""
106
  try:
107
- # إضافة رسالة المستخدم للمحادثة
108
  self.chat_history.append({"role": "user", "content": user_message})
109
 
110
  response = requests.post(
@@ -120,16 +106,13 @@ class EnglishTutor:
120
 
121
  bot_response = json.loads(response.json()["choices"][0]["message"]["content"])
122
 
123
- # تحديث معلومات المستخدم
124
  if "level_assessment" in bot_response:
125
  self.user_info["level"] = bot_response["level_assessment"]
126
  if "context_memory" in bot_response:
127
  self._update_user_info(bot_response["context_memory"])
128
 
129
- # تنسيق الرد
130
  formatted_response = self._format_response(bot_response)
131
 
132
- # إضافة رد البوت للمحادثة
133
  self.chat_history.append({"role": "assistant", "content": json.dumps(bot_response)})
134
 
135
  return formatted_response
@@ -138,14 +121,12 @@ class EnglishTutor:
138
  return "I apologize, but I couldn't process that properly. Could you try again?"
139
 
140
  def _update_user_info(self, context_memory):
141
- """تحديث معلومات المستخدم من الذاكرة السياقية"""
142
  if isinstance(context_memory, dict):
143
  for key in self.user_info:
144
  if key in context_memory:
145
  self.user_info[key] = context_memory[key]
146
 
147
  def _format_response(self, response_dict):
148
- """تنسيق الرد بشكل جميل"""
149
  formatted = response_dict["response"]
150
 
151
  if response_dict.get("corrections"):
@@ -160,9 +141,7 @@ class EnglishTutor:
160
  return formatted
161
 
162
  def convert_audio_to_text(audio_path):
163
- """تحويل الصوت إلى نص"""
164
  try:
165
- # تحويل الصوت إلى WAV إذا لم يكن كذلك
166
  if not audio_path.endswith('.wav'):
167
  audio = AudioSegment.from_file(audio_path)
168
  wav_path = audio_path + '.wav'
@@ -178,7 +157,6 @@ def convert_audio_to_text(audio_path):
178
  return None
179
 
180
  def text_to_speech(text):
181
- """تحويل النص إلى صوت"""
182
  try:
183
  result = TTS_CLIENT.predict(
184
  password=TTS_PASSWORD,
@@ -194,72 +172,96 @@ def text_to_speech(text):
194
  print(f"Error in text to speech: {str(e)}")
195
  return None
196
 
197
- # إنشاء كائن المعلم
198
  tutor = EnglishTutor()
199
 
200
- def chat_function(audio, history):
201
- """الدالة الرئيسية للمحادثة"""
202
  try:
203
- # إذا كانت أول محادثة، نعرض رسالة الترحيب
204
- if not history:
205
- welcome = tutor.get_welcome_message()
206
- welcome_audio = text_to_speech(welcome)
207
- return [(None, welcome)], welcome_audio
 
208
 
209
- # إذا لم يكن هناك صوت، نرجع بدون تغيير
 
210
  if audio is None:
211
- return history, None
212
 
213
- # تحويل الصوت إلى نص
214
  user_message = convert_audio_to_text(audio)
215
  if not user_message:
216
- return history, None
217
 
218
- # الحصول على رد البوت
219
  bot_response = tutor.get_bot_response(user_message)
220
-
221
- # تحويل رد البوت إلى صوت
222
  audio_response = text_to_speech(bot_response)
223
 
224
- # تحديث المحادثة
225
  history = history or []
226
  history.append((user_message, bot_response))
227
 
228
- return history, audio_response
 
 
229
  except Exception as e:
230
- print(f"Error in chat function: {str(e)}")
231
- return history, None
 
 
 
 
 
232
 
233
- # إنشاء واجهة المستخدم
234
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
235
  gr.Markdown("# 🎓 English Learning Assistant")
236
  gr.Markdown("Speak naturally and I'll help you improve your English!")
237
 
238
- chatbot = gr.Chatbot(
239
- height=400,
240
- bubble_full_width=False,
241
- show_label=False
242
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  with gr.Row():
245
- audio_input = gr.Audio(
246
- label="Your Voice",
247
- type="filepath",
248
- show_label=True
249
- )
250
- audio_output = gr.Audio(
251
- label="Tutor's Voice",
252
- type="filepath",
253
- show_label=True
254
- )
255
-
256
- # ربط الأحداث
257
  audio_input.change(
258
- chat_function,
259
- inputs=[audio_input, chatbot],
260
- outputs=[chatbot, audio_output]
 
 
 
 
 
 
 
 
 
 
261
  )
262
 
263
- # تشغيل التطبيق
264
  if __name__ == "__main__":
265
  demo.launch()
 
8
  import speech_recognition as sr
9
  from pydub import AudioSegment
10
 
 
11
  load_dotenv()
12
 
 
13
  API_KEY = os.getenv("DEEPSEEK_API_KEY")
14
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
15
  TTS_PASSWORD = os.getenv("TTS_PASSWORD")
16
 
 
17
  if not all([API_KEY, HF_TOKEN, TTS_PASSWORD]):
18
  raise ValueError("Missing required environment variables!")
19
 
 
20
  TTS_CLIENT = Client("KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited", hf_token=HF_TOKEN)
 
 
21
  recognizer = sr.Recognizer()
22
 
 
23
  MAIN_SYSTEM_PROMPT = {
24
  "role": "system",
25
  "content": """You are Sam, an intelligent and adaptive English tutor. Your responses must be in JSON format with these keys:
 
29
  'level_assessment': Current assessment of user's English level (beginner/intermediate/advanced),
30
  'encouragement': A motivating comment,
31
  'context_memory': Important details about the user to remember (interests, job, etc.)
 
32
  Guidelines:
33
  1. Maintain natural conversation while gathering user information
34
  2. Adapt language complexity to user's level
 
44
  - If user mentions hobbies → Engage in that topic
45
  - For beginners → Use simple words and short sentences
46
  - For intermediate/advanced → Introduce more complex vocabulary
 
47
  Always maintain conversation history and adapt based on it."""
48
  }
49
 
 
50
  WELCOME_PROMPT = {
51
  "role": "system",
52
  "content": """Create a warm welcome message that:
 
70
  "profession": None,
71
  "goals": None
72
  }
 
73
 
74
  def get_welcome_message(self):
 
75
  try:
76
  response = requests.post(
77
  "https://api.deepseek.com/v1/chat/completions",
 
90
  return "Hi! I'm Sam, your English tutor. What's your name?"
91
 
92
  def get_bot_response(self, user_message):
 
93
  try:
 
94
  self.chat_history.append({"role": "user", "content": user_message})
95
 
96
  response = requests.post(
 
106
 
107
  bot_response = json.loads(response.json()["choices"][0]["message"]["content"])
108
 
 
109
  if "level_assessment" in bot_response:
110
  self.user_info["level"] = bot_response["level_assessment"]
111
  if "context_memory" in bot_response:
112
  self._update_user_info(bot_response["context_memory"])
113
 
 
114
  formatted_response = self._format_response(bot_response)
115
 
 
116
  self.chat_history.append({"role": "assistant", "content": json.dumps(bot_response)})
117
 
118
  return formatted_response
 
121
  return "I apologize, but I couldn't process that properly. Could you try again?"
122
 
123
  def _update_user_info(self, context_memory):
 
124
  if isinstance(context_memory, dict):
125
  for key in self.user_info:
126
  if key in context_memory:
127
  self.user_info[key] = context_memory[key]
128
 
129
  def _format_response(self, response_dict):
 
130
  formatted = response_dict["response"]
131
 
132
  if response_dict.get("corrections"):
 
141
  return formatted
142
 
143
  def convert_audio_to_text(audio_path):
 
144
  try:
 
145
  if not audio_path.endswith('.wav'):
146
  audio = AudioSegment.from_file(audio_path)
147
  wav_path = audio_path + '.wav'
 
157
  return None
158
 
159
  def text_to_speech(text):
 
160
  try:
161
  result = TTS_CLIENT.predict(
162
  password=TTS_PASSWORD,
 
172
  print(f"Error in text to speech: {str(e)}")
173
  return None
174
 
 
175
  tutor = EnglishTutor()
176
 
177
+ def initialize_chat():
 
178
  try:
179
+ welcome = tutor.get_welcome_message()
180
+ welcome_audio = text_to_speech(welcome)
181
+ return [(None, welcome)], welcome_audio, ""
182
+ except Exception as e:
183
+ print(f"Error initializing chat: {str(e)}")
184
+ return [(None, "Hi! I'm Sam, your English tutor. What's your name?")], None, ""
185
 
186
+ def process_audio(audio, history, transcript):
187
+ try:
188
  if audio is None:
189
+ return history, None, transcript
190
 
 
191
  user_message = convert_audio_to_text(audio)
192
  if not user_message:
193
+ return history, None, transcript
194
 
 
195
  bot_response = tutor.get_bot_response(user_message)
 
 
196
  audio_response = text_to_speech(bot_response)
197
 
 
198
  history = history or []
199
  history.append((user_message, bot_response))
200
 
201
+ new_transcript = transcript + f"\n\n🎤 You said: {user_message}\n🤖 Sam replied: {bot_response}"
202
+
203
+ return history, audio_response, new_transcript
204
  except Exception as e:
205
+ print(f"Error in process_audio: {str(e)}")
206
+ return history, None, transcript
207
+
208
+ def clear_chat():
209
+ global tutor
210
+ tutor = EnglishTutor()
211
+ return initialize_chat()
212
 
 
213
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
214
  gr.Markdown("# 🎓 English Learning Assistant")
215
  gr.Markdown("Speak naturally and I'll help you improve your English!")
216
 
217
+ with gr.Row():
218
+ with gr.Column(scale=2):
219
+ chatbot = gr.Chatbot(
220
+ height=400,
221
+ bubble_full_width=False,
222
+ show_label=False
223
+ )
224
+
225
+ with gr.Row():
226
+ audio_input = gr.Audio(
227
+ label="Your Voice",
228
+ type="filepath",
229
+ show_label=True
230
+ )
231
+ audio_output = gr.Audio(
232
+ label="Tutor's Voice",
233
+ type="filepath",
234
+ show_label=True
235
+ )
236
+
237
+ with gr.Column(scale=1):
238
+ gr.Markdown("### 📝 Conversation Transcript")
239
+ transcript_display = gr.Textbox(
240
+ lines=15,
241
+ max_lines=15,
242
+ show_label=False,
243
+ interactive=False,
244
+ placeholder="Your conversation transcript will appear here..."
245
+ )
246
 
247
  with gr.Row():
248
+ clear_btn = gr.Button("🔄 Clear Chat", variant="secondary")
249
+
 
 
 
 
 
 
 
 
 
 
250
  audio_input.change(
251
+ process_audio,
252
+ inputs=[audio_input, chatbot, transcript_display],
253
+ outputs=[chatbot, audio_output, transcript_display]
254
+ )
255
+
256
+ clear_btn.click(
257
+ clear_chat,
258
+ outputs=[chatbot, audio_output, transcript_display]
259
+ )
260
+
261
+ demo.load(
262
+ initialize_chat,
263
+ outputs=[chatbot, audio_output, transcript_display]
264
  )
265
 
 
266
  if __name__ == "__main__":
267
  demo.launch()