YoussefA7med commited on
Commit
8f1aebf
·
verified ·
1 Parent(s): dc42314

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -216
app.py CHANGED
@@ -1,271 +1,142 @@
 
1
  import requests
2
  import json
3
  import random
4
  from gradio_client import Client
5
- import gradio as gr
6
  from dotenv import load_dotenv
7
  import os
8
- import uuid
9
- from pydub import AudioSegment
10
  import speech_recognition as sr
 
11
 
12
- # Load environment variables
13
  load_dotenv()
14
 
15
  # إعدادات API
16
- API_URL = "https://api.deepseek.com/v1/chat/completions"
17
  API_KEY = os.getenv("DEEPSEEK_API_KEY")
18
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
19
-
20
- # إعداد TTS
21
- TTS_MODEL = os.getenv("TTS_MODEL", "KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited")
22
- TTS_CLIENT = Client(TTS_MODEL, hf_token=HF_TOKEN)
23
  TTS_PASSWORD = os.getenv("TTS_PASSWORD")
24
- TTS_VOICE = os.getenv("TTS_VOICE", "coral")
25
- TTS_SEED = int(os.getenv("TTS_SEED", "12345"))
26
-
27
- # إعداد Speech Recognition
28
- recognizer = sr.Recognizer()
29
 
30
- # التحقق من وجود المتغيرات المطلوبة
31
- required_env_vars = {
32
- "DEEPSEEK_API_KEY": API_KEY,
33
- "HUGGINGFACE_TOKEN": HF_TOKEN,
34
- "TTS_PASSWORD": TTS_PASSWORD
35
- }
36
 
37
- for var_name, var_value in required_env_vars.items():
38
- if not var_value:
39
- raise ValueError(f"Missing required environment variable: {var_name}")
40
 
41
- def convert_to_wav(input_path):
42
- """تحويل الصوت إلى صيغة WAV"""
43
- output_path = os.path.join("uploads", f"converted_{uuid.uuid4()}.wav")
44
- os.makedirs("uploads", exist_ok=True)
45
- try:
46
- audio = AudioSegment.from_file(input_path)
47
- audio.export(output_path, format="wav")
48
- return output_path
49
- except Exception as e:
50
- print(f"Error converting audio: {e}")
51
- return None
52
 
53
- def transcribe_audio(audio_path):
54
  """تحويل الصوت إلى نص"""
55
  try:
 
 
 
 
 
 
 
56
  with sr.AudioFile(audio_path) as source:
57
  audio = recognizer.record(source)
58
- text = recognizer.recognize_google(audio, language='en-US')
59
- return text
60
  except Exception as e:
61
- print(f"Error in speech recognition: {e}")
62
  return None
63
 
64
- # البرومبت الرئيسي للشات بوت
65
- MAIN_SYSTEM_PROMPT = {
66
- "role": "system",
67
- "content": (
68
- "You are Sam, a friendly and encouraging English conversation tutor. "
69
- "Your responses must be in JSON with these keys: "
70
- "'response': Your main response to the user, "
71
- "'corrections': Grammar or pronunciation corrections if needed, "
72
- "'vocabulary': Suggested alternative words or phrases, "
73
- "'level': Assessment of user's English level (beginner/intermediate/advanced), "
74
- "'encouragement': A motivating comment. "
75
- "\n\nGuidelines:"
76
- "\n1. Adapt your language to their level"
77
- "\n2. Keep conversations natural and engaging"
78
- "\n3. Focus on their interests and context"
79
- "\n4. Be patient and supportive"
80
- "\n5. Provide gentle corrections"
81
- "\n6. Suggest vocabulary improvements naturally"
82
- "\n7. Keep responses clear and structured"
83
- )
84
- }
85
-
86
- # برومبت خاص بالترحيب (مختصر)
87
- WELCOME_SYSTEM_PROMPT = {
88
- "role": "system",
89
- "content": (
90
- "You are Sam, a friendly English tutor. Create a short, warm welcome message (2-3 sentences max) that: "
91
- "1) Introduces yourself briefly "
92
- "2) Asks for the user's name and what they'd like to practice. "
93
- "Make it casual and friendly. Return ONLY the greeting in JSON format with a single key 'greeting'."
94
- "Example: {'greeting': 'Hi! I'm Sam, your English buddy. What's your name and what would you like to practice today? 😊'}"
95
- )
96
- }
97
-
98
- class EnglishTutor:
99
- def __init__(self):
100
- self.chat_history = []
101
- self.user_info = {
102
- "name": None,
103
- "level": None,
104
- "interests": None,
105
- "goals": None
106
- }
107
- # Initialize with welcome message
108
- self.chat_history = [MAIN_SYSTEM_PROMPT]
109
-
110
- def get_welcome_message(self):
111
- """توليد رسالة ترحيب فريدة"""
112
- response = requests.post(
113
- API_URL,
114
- headers={"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"},
115
- json={
116
- "model": "deepseek-chat",
117
- "messages": [WELCOME_SYSTEM_PROMPT],
118
- "temperature": random.uniform(0.9, 1),
119
- "response_format": {"type": "json_object"}
120
- }
121
- )
122
- welcome_json = json.loads(response.json()["choices"][0]["message"]["content"])
123
- return welcome_json["greeting"]
124
-
125
- def get_bot_response(self, user_message):
126
- """معالجة رسالة المستخدم والحصول على رد"""
127
- self.chat_history.append({"role": "user", "content": user_message})
128
-
129
  response = requests.post(
130
- API_URL,
131
- headers={"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"},
132
  json={
133
  "model": "deepseek-chat",
134
- "messages": self.chat_history,
135
- "temperature": random.uniform(0.9, 1.0),
136
- "response_format": {"type": "json_object"}
 
 
 
 
 
 
 
 
137
  }
138
  )
139
-
140
- bot_message = response.json()["choices"][0]["message"]["content"]
141
- bot_json = json.loads(bot_message)
142
-
143
- # تحديث معلومات المستخدم إذا وجدت
144
- if "level" in bot_json:
145
- self.user_info["level"] = bot_json["level"]
146
-
147
- self.chat_history.append({"role": "assistant", "content": bot_message})
148
- return bot_json
149
-
150
- def text_to_speech(self, text):
151
- """تحويل نص إلى صوت مع مراعاة المبتدئين في اللغة الإنجليزية"""
152
- # تنظيف النص من أي علامات إضافية أو نصوص زائدة
153
- text = text.strip()
154
- if text.startswith('"') and text.endswith('"'):
155
- text = text[1:-1]
156
-
157
- tts_prompt = text
158
- tts_emotion = "Warm, encouraging, and clear with a friendly and supportive tone."
159
 
160
- return TTS_CLIENT.predict(
 
 
 
161
  password=TTS_PASSWORD,
162
- prompt=tts_prompt,
163
- voice=TTS_VOICE,
164
- emotion=tts_emotion,
165
  use_random_seed=True,
166
- specific_seed=TTS_SEED,
167
  api_name="/text_to_speech_app"
168
  )
 
 
 
 
169
 
170
- # Create a single instance of EnglishTutor
171
- tutor = EnglishTutor()
 
 
 
 
172
 
173
- def format_response(response_dict):
174
- """Format the response dictionary into a nice HTML string"""
175
- html = f"<div style='font-size: 16px;'>"
176
- html += f"<p>{response_dict['response']}</p>"
177
-
178
- if response_dict['corrections']:
179
- html += f"<p><b>✍️ Corrections:</b> {response_dict['corrections']}</p>"
180
-
181
- if response_dict['vocabulary']:
182
- html += f"<p><b>📚 Vocabulary:</b> {response_dict['vocabulary']}</p>"
183
-
184
- if response_dict['encouragement']:
185
- html += f"<p><b>🌟 Encouragement:</b> {response_dict['encouragement']}</p>"
186
-
187
- html += "</div>"
188
- return html
189
 
190
- def chat(audio, history):
191
- """Handle chat interactions"""
192
- if audio is None:
193
- # Return empty response if no audio
194
- return history, None
195
-
196
- # Convert audio to WAV and transcribe
197
- wav_path = convert_to_wav(audio)
198
- if wav_path is None:
199
- return history, None
200
 
201
- audio_text = transcribe_audio(wav_path)
202
- # Clean up temporary file
203
- os.remove(wav_path)
204
-
205
- if not audio_text:
206
- return history, None
207
-
208
- # Get bot response
209
- response = tutor.get_bot_response(audio_text)
210
-
211
- # Generate audio for the main response
212
- audio_path = tutor.text_to_speech(response["response"])[0]
213
-
214
- # Format the complete response
215
- formatted_response = format_response(response)
216
-
217
- # Update history in the correct format for gr.Chatbot
218
- history = history or []
219
- history.append((audio_text, formatted_response))
220
-
221
- return history, audio_path
222
 
223
- def show_welcome():
224
- """Show welcome message on startup"""
225
- welcome = tutor.get_welcome_message()
226
- audio_path = tutor.text_to_speech(welcome)[0]
227
- return [(None, welcome)], audio_path
 
 
 
228
 
229
- # Create Gradio interface
230
- with gr.Blocks(css="footer {display: none}") as demo:
231
- gr.Markdown("# 🤖 Sam - Your English Tutor")
232
- gr.Markdown("Welcome to your personalized English learning session! Click the microphone and start speaking!")
233
 
234
- chatbot = gr.Chatbot(
235
- show_label=False,
236
- height=400,
237
- type="messages"
238
- )
239
 
240
  with gr.Row():
241
  audio_input = gr.Audio(
242
- label="Speak here",
243
- show_label=True,
244
- type="filepath",
245
- format="wav"
246
  )
247
  audio_output = gr.Audio(
248
- label="Sam's Voice",
249
- show_label=True,
250
  type="filepath"
251
  )
252
-
253
- # Handle audio input
254
  audio_input.change(
255
- fn=chat,
256
  inputs=[audio_input, chatbot],
257
- outputs=[chatbot, audio_output],
258
- queue=False
259
- )
260
-
261
- # Show welcome message on page load
262
- demo.load_event(
263
- fn=show_welcome,
264
- inputs=None,
265
  outputs=[chatbot, audio_output]
266
  )
267
 
268
- # Launch the interface
269
  if __name__ == "__main__":
270
- demo.launch(
271
- )
 
1
+ import gradio as gr
2
  import requests
3
  import json
4
  import random
5
  from gradio_client import Client
 
6
  from dotenv import load_dotenv
7
  import os
 
 
8
  import speech_recognition as sr
9
+ from pydub import AudioSegment
10
 
11
+ # تحميل المتغيرات البيئية
12
  load_dotenv()
13
 
14
  # إعدادات API
 
15
  API_KEY = os.getenv("DEEPSEEK_API_KEY")
16
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
 
 
 
 
17
  TTS_PASSWORD = os.getenv("TTS_PASSWORD")
 
 
 
 
 
18
 
19
+ # التأكد من وجود المتغيرات المطلوبة
20
+ if not all([API_KEY, HF_TOKEN, TTS_PASSWORD]):
21
+ raise ValueError("Missing required environment variables!")
 
 
 
22
 
23
+ # إعداد TTS
24
+ TTS_CLIENT = Client("KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited", hf_token=HF_TOKEN)
 
25
 
26
+ # إعداد محرك تحويل الكلام لنص
27
+ recognizer = sr.Recognizer()
 
 
 
 
 
 
 
 
 
28
 
29
+ def convert_audio_to_text(audio_path):
30
  """تحويل الصوت إلى نص"""
31
  try:
32
+ # تحويل الصوت إلى WAV إذا لم يكن كذلك
33
+ if not audio_path.endswith('.wav'):
34
+ audio = AudioSegment.from_file(audio_path)
35
+ wav_path = audio_path + '.wav'
36
+ audio.export(wav_path, format='wav')
37
+ audio_path = wav_path
38
+
39
  with sr.AudioFile(audio_path) as source:
40
  audio = recognizer.record(source)
41
+ text = recognizer.recognize_google(audio, language='en-US')
42
+ return text
43
  except Exception as e:
44
+ print(f"Error in speech recognition: {str(e)}")
45
  return None
46
 
47
+ def get_bot_response(message):
48
+ """الحصول على رد من البوت"""
49
+ try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  response = requests.post(
51
+ "https://api.deepseek.com/v1/chat/completions",
52
+ headers={"Authorization": f"Bearer {API_KEY}"},
53
  json={
54
  "model": "deepseek-chat",
55
+ "messages": [
56
+ {
57
+ "role": "system",
58
+ "content": "You are Sam, a friendly English tutor. Keep responses short and encouraging."
59
+ },
60
+ {
61
+ "role": "user",
62
+ "content": message
63
+ }
64
+ ],
65
+ "temperature": 0.7
66
  }
67
  )
68
+ return response.json()["choices"][0]["message"]["content"]
69
+ except Exception as e:
70
+ print(f"Error getting bot response: {str(e)}")
71
+ return "Sorry, I couldn't process that. Could you try again?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ def text_to_speech(text):
74
+ """تحويل النص إلى صوت"""
75
+ try:
76
+ result = TTS_CLIENT.predict(
77
  password=TTS_PASSWORD,
78
+ prompt=text,
79
+ voice="coral",
80
+ emotion="Warm and friendly",
81
  use_random_seed=True,
82
+ specific_seed=12345,
83
  api_name="/text_to_speech_app"
84
  )
85
+ return result[0] if isinstance(result, (list, tuple)) else result
86
+ except Exception as e:
87
+ print(f"Error in text to speech: {str(e)}")
88
+ return None
89
 
90
+ def chat_function(audio, history):
91
+ """الدالة الرئيسية للمحادثة"""
92
+ try:
93
+ # إذا لم يكن هناك صوت، نرجع بدون تغيير
94
+ if audio is None:
95
+ return history, None
96
 
97
+ # تحويل الصوت إلى نص
98
+ user_message = convert_audio_to_text(audio)
99
+ if not user_message:
100
+ return history, None
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ # الحصول على رد البوت
103
+ bot_response = get_bot_response(user_message)
 
 
 
 
 
 
 
 
104
 
105
+ # تحويل رد البوت إلى صوت
106
+ audio_response = text_to_speech(bot_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ # تحديث المحادثة
109
+ history = history or []
110
+ history.append((user_message, bot_response))
111
+
112
+ return history, audio_response
113
+ except Exception as e:
114
+ print(f"Error in chat function: {str(e)}")
115
+ return history, None
116
 
117
+ # إنشاء واجهة المستخدم
118
+ with gr.Blocks() as demo:
119
+ gr.Markdown("# 🎓 English Tutor Chatbot")
 
120
 
121
+ chatbot = gr.Chatbot(height=400)
 
 
 
 
122
 
123
  with gr.Row():
124
  audio_input = gr.Audio(
125
+ label="Your Voice",
126
+ type="filepath"
 
 
127
  )
128
  audio_output = gr.Audio(
129
+ label="Tutor's Voice",
 
130
  type="filepath"
131
  )
132
+
133
+ # ربط الأحداث
134
  audio_input.change(
135
+ chat_function,
136
  inputs=[audio_input, chatbot],
 
 
 
 
 
 
 
 
137
  outputs=[chatbot, audio_output]
138
  )
139
 
140
+ # تشغيل التطبيق
141
  if __name__ == "__main__":
142
+ demo.launch(server_name="0.0.0.0", server_port=7860)