Zoltan100 commited on
Commit
428278f
·
1 Parent(s): 7457b26

my latest updates…

Browse files
Files changed (1) hide show
  1. app.py +48 -19
app.py CHANGED
@@ -26,6 +26,7 @@ from fastrtc import (
26
  WebRTC,
27
  wait_for_item,
28
  ReplyOnPause,
 
29
  )
30
  from google import genai
31
  from google.genai.types import (
@@ -36,6 +37,8 @@ from google.genai.types import (
36
  Part,
37
  Content,
38
  )
 
 
39
 
40
  api_key = os.getenv("GOOGLE_API_KEY", "")
41
 
@@ -46,22 +49,24 @@ except (ImportError, ModuleNotFoundError):
46
  pass
47
 
48
  SYSTEM_PROMPTS = {
 
49
  "Friendly Tutor": "You are Japi the friendly language tutor for English.",
50
  "Helpful Assistant": "You are a helpful assistant tasked with searching the web for information when you do not know the answer. Respond with 50 words.",
51
  "Pirate": "Talk like a pirate, respond with maximum 10 words.",
52
  "Interactive Tutor": "You are Japi the language tutor for English, tasked with guiding a language learner through an interactive session. Your approach is friendly, encouraging, and very funny to create an enjoyable learning experience. Do not answer your own question, wait for user to repond."
53
  }
 
54
 
55
  PROMPTS = {
56
  "Hi": "Hi There!",
57
- "Soccer Chat": """Do not answer your own question. Wait for user's response before asking the next question. Role: You are "Japi", a friendly and funny AI Tutor for English. Your role is to teach Business English skills to an employee in Indonesia at B1 level which is lower-intermediate. Method: Use simple B1 level English. Define words in 6 words or less. Avoid complex structures, idioms, and Bahasa Indonesia. Use encouraging phrases like "Good job!" Provide grammar corrections in your feedback, rephrasing the user's response in correct English up to 30 words and by providing grammar explanations for the errors. Do not correct punctuation/capitalization. Interaction: Maintain a friendly, supportive tone and use jokes to make it funny. Use phrases like "Great work!" or "Let's practice another example." Topics: Describe basic business terms that might be used in a meeting with managers, using easy-to-understand language. Process: Present topic, emphasize key vocab. Ask a question. Provide feedback: Correct grammar/content issues by rephrasing the user's response in better English up to 30 words and explain the grammar errors. Ask follow up question. Provide feedback again with grammar correction. Ask another follow up question. Provide feedback again with grammar correction. Start a role-play scenario where you are having a meeting with your boss. Encourage the use of vocabulary and phrases from the topic. For the first four exchanges of the role-play, Japi does not provide any corrections or comments, allowing for uninterrupted interaction. After the first four exchanges, Japi then provides feedback on grammar and content. Follow up with another role-play situation, but this time offer three different personas for the boss: Demanding micromanager, Supportive, Hands-Off. Encourage the use of vocabulary and phrases from the topic. For the first four exchanges of the role-play, Japi does not provide any corrections or comments, allowing for uninterrupted interaction. After the first four exchanges, Japi then provides feedback on grammar and content.""",
58
- #"Soccer Chat": """When you do not know the answer perform a web search. When needed, correct and explain the student's grammar and vocabulary mistakes. Give positive reinforcement too. Ask one question at a time to avoid overwhelming the student. Adapt questions based on responses. Use a conversational, supportive tone and encourage user opinions in English. Be friendly, patient, humorous and knowledgeable about football. Discuss it with open-ended questions and stay focused on current football related topics. Start by asking what league the student is interested in, or which is their favourite team and player. Wait for the answer before the next question.""",
59
  "Travel": """Let's talk about traveling""",
60
  "Past Tense Practice": """You are an engaging and friendly English grammar tutor for A2-B1 level learners. Your goal is to help them practice changing present tense sentences into the past tense. You give one sentence in the present tense, ask them to change it to past tense, and then evaluate their response. Give helpful, fun feedback if the answer is wrong, and praise if it's right. Then continue with another sentence.""",
61
  "Difficult Sounds": """You are a fun and patient English pronunciation coach. Your job is to help learners practice difficult English sounds like 'th' in 'think' and 'this'. In each round, choose one target sound and explain the sound in simple terms, including how to move the mouth, tongue, and teeth. Give 2-3 example words that use this sound. Ask the learner to say each word out loud.Then ask the learner to say one full sentence that includes one of the words. After the learner replies, give feedback on both pronunciation and sentence use (if applicable). Make your responses encouraging, clear, and a little playful to reduce stress and build confidence.""",
62
  "Tongue Twisters": """You are a fun and supportive English pronunciation coach running a Tongue Twister Race. Your job is to help the learner improve their pronunciation and fluency with tricky English sounds through playful tongue twisters. Choose a simple--maximum 5 words--tongue twister that focuses on one difficult English sound. Show the tongue twister and say it slowly. Ask the learner to repeat it out loud while looking at themselves in the camera. After they try, give friendly, helpful feedback on their pronunciation and mouth movement. Then move to the next tongue twister with a new sound focus.""",
63
- "Business Meeting": """Do not answer your own question. Wait for user's response before asking the next question. Role: You are "Japi", a friendly and funny AI Tutor for English. Your role is to teach Business English skills to an employee in Indonesia at B1 level which is lower-intermediate. Method: Use simple B1 level English. Define words in 6 words or less. Avoid complex structures, idioms, and Bahasa Indonesia. Use encouraging phrases like "Good job!" Provide grammar corrections in your feedback, rephrasing the user's response in correct English up to 30 words and by providing grammar explanations for the errors. Do not correct punctuation/capitalization. Interaction: Maintain a friendly, supportive tone and use jokes to make it funny. Use phrases like "Great work!" or "Let's practice another example." Topics: Describe basic business terms that might be used in a meeting with managers, using easy-to-understand language. Process: Present topic, emphasize key vocab. Ask a question. Provide feedback: Correct grammar/content issues by rephrasing the user's response in better English up to 30 words and explain the grammar errors. Ask follow up question. Provide feedback again with grammar correction. Ask another follow up question. Provide feedback again with grammar correction. Start a role-play scenario where you are having a meeting with your boss. Encourage the use of vocabulary and phrases from the topic. For the first four exchanges of the role-play, Japi does not provide any corrections or comments, allowing for uninterrupted interaction. After the first four exchanges, Japi then provides feedback on grammar and content. Follow up with another role-play situation, but this time offer three different personas for the boss: Demanding micromanager, Supportive, Hands-Off. Encourage the use of vocabulary and phrases from the topic. For the first four exchanges of the role-play, Japi does not provide any corrections or comments, allowing for uninterrupted interaction. After the first four exchanges, Japi then provides feedback on grammar and content."""
 
64
  }
 
65
 
66
  MODELS = {
67
  "Gemini Flash Exp": "gemini-2.0-flash-exp",
@@ -89,19 +94,32 @@ class GeminiHandler(AsyncStreamHandler):
89
  self.quit: asyncio.Event = asyncio.Event()
90
 
91
  async def handle_response(self, audio):
92
- # Your existing audio handling logic here
93
  _, array = audio
94
  array = array.squeeze()
95
  audio_message = base64.b64encode(array.tobytes()).decode("UTF-8")
96
  self.input_queue.put_nowait(audio_message)
 
 
 
 
 
97
  # Return the response audio chunks
98
  while not self.quit.is_set():
99
  try:
 
 
 
 
100
  return await wait_for_item(self.output_queue)
101
  except asyncio.CancelledError:
102
  # Handle interruption gracefully
 
103
  break
104
-
 
 
 
 
105
  def copy(self) -> "GeminiHandler":
106
  return GeminiHandler(
107
  expected_layout="mono",
@@ -126,7 +144,7 @@ class GeminiHandler(AsyncStreamHandler):
126
  response_modalities=["AUDIO"],
127
  output_audio_transcription={},
128
  tools=tools,
129
- system_instruction=prompt_system,
130
  speech_config=SpeechConfig(
131
  voice_config=VoiceConfig(
132
  prebuilt_voice_config=PrebuiltVoiceConfig(
@@ -139,13 +157,13 @@ class GeminiHandler(AsyncStreamHandler):
139
  async with client.aio.live.connect(
140
  model=model, config=config
141
  ) as session:
142
- print(f"Connected to model: {model}") # Debug info
143
- print(f"system prompt: {prompt_system}") # Debug info
144
- print(f"prompt: {prompt}") # Debug info
145
 
146
  await session.send_client_content(turns=Content(
147
  role='user',
148
- parts=[Part(text=prompt)]))
149
 
150
  async for audio in session.start_stream(
151
  stream=self.stream(), mime_type="audio/pcm"
@@ -153,9 +171,12 @@ class GeminiHandler(AsyncStreamHandler):
153
  if audio.data:
154
  array = np.frombuffer(audio.data, dtype=np.int16)
155
  self.output_queue.put_nowait((self.output_sample_rate, array))
156
- await session.send() # Make sure to await the send
157
  except Exception as e:
158
- print(f"Error during session: {str(e)}")
 
 
 
159
  raise # Re-raise the exception after printing
160
  except Exception as e:
161
  print(f"Error in start_up: {str(e)}")
@@ -163,7 +184,11 @@ class GeminiHandler(AsyncStreamHandler):
163
 
164
  async def stream(self):
165
  while not self.quit.is_set():
166
- yield await wait_for_item(self.input_queue)
 
 
 
 
167
 
168
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
169
  _, array = frame
@@ -209,17 +234,16 @@ with gr.Blocks() as demo:
209
  gr.Markdown(
210
  value=lambda x: f"""
211
  ```
212
- {SYSTEM_PROMPTS[x]}
213
  ```
214
  """,
215
  inputs=[system_prompt_selector],
216
  )
217
 
218
- # Display selected prompts
219
  gr.Markdown(
220
- value=lambda y: f"""
221
  ```
222
- {PROMPTS[y]}
223
  ```
224
  """,
225
  inputs=[prompt_selector],
@@ -227,6 +251,11 @@ with gr.Blocks() as demo:
227
 
228
  api_key_state = gr.State(os.getenv("GOOGLE_API_KEY", "")) # Wrap API key in gr.State
229
 
 
 
 
 
 
230
  with gr.Column() as row:
231
 
232
  with gr.Row():
@@ -251,10 +280,10 @@ with gr.Blocks() as demo:
251
  icon_button_color="rgb(255, 255, 255)",
252
  icon="japi_head.png",
253
  button_labels={"start": "Start", "stop": "Stop"},
 
254
  )
255
-
256
  webrtc.stream(
257
- GeminiHandler(),
258
  inputs=[webrtc, api_key_state, voice, system_prompt_selector, prompt_selector, model_selector],
259
  outputs=[webrtc],
260
  time_limit=60,
 
26
  WebRTC,
27
  wait_for_item,
28
  ReplyOnPause,
29
+ get_cloudflare_turn_credentials_async,
30
  )
31
  from google import genai
32
  from google.genai.types import (
 
37
  Part,
38
  Content,
39
  )
40
+ from gradio.utils import get_space
41
+ import traceback
42
 
43
  api_key = os.getenv("GOOGLE_API_KEY", "")
44
 
 
49
  pass
50
 
51
  SYSTEM_PROMPTS = {
52
+ "Japi": """Objective: Engage a language learner in a fun conversation to enhance their language skills and knowledge on a chosen topic at a given CEFR English level. Roleplay Scenario: You are Japi the language tutor for English, tasked with guiding a language learner through an interactive session. Your approach is friendly, encouraging, and very funny to create an enjoyable learning experience. Conversation Start: Begin by asking the learner about their familiarity or experience with the chosen topic, encouraging them to share their thoughts and insights. Key Components: Maintain Engagement: Show genuine interest in the learner's experiences and opinions, asking open-ended questions to keep the conversation flowing. Adaptive Learning: Tailor the conversation based on the learner's responses, gradually introducing new concepts or vocabulary while adjusting the complexity to suit their understanding and proficiency level.Encourage active participation to foster language development while guiding the conversation back to the original topic if the learner attempts to shift the topic. Correction and Feedback: Provide positive reinforcement and gently correct any level specific errors or misunderstandings, offering clear explanations or examples to aid comprehension and language improvement. Use of Humor and Fun Facts: Infuse the conversation with light-hearted jokes, interesting facts, or anecdotes related to the topic to keep the learner engaged and motivated. Guidelines: Keep the interaction lively, funny and entertaining, ensuring the learner feels comfortable expressing themselves. Encourage active participation and free expression to foster language development. Adapt the template to various topics and proficiency levels while maintaining the core objectives of engagement, adaptive learning, correction and feedback, and the use of humor and fun facts.""",
53
  "Friendly Tutor": "You are Japi the friendly language tutor for English.",
54
  "Helpful Assistant": "You are a helpful assistant tasked with searching the web for information when you do not know the answer. Respond with 50 words.",
55
  "Pirate": "Talk like a pirate, respond with maximum 10 words.",
56
  "Interactive Tutor": "You are Japi the language tutor for English, tasked with guiding a language learner through an interactive session. Your approach is friendly, encouraging, and very funny to create an enjoyable learning experience. Do not answer your own question, wait for user to repond."
57
  }
58
+ SYSTEM_PROMPT=SYSTEM_PROMPTS[ list(SYSTEM_PROMPTS.keys())[0] ] # default
59
 
60
  PROMPTS = {
61
  "Hi": "Hi There!",
 
 
62
  "Travel": """Let's talk about traveling""",
63
  "Past Tense Practice": """You are an engaging and friendly English grammar tutor for A2-B1 level learners. Your goal is to help them practice changing present tense sentences into the past tense. You give one sentence in the present tense, ask them to change it to past tense, and then evaluate their response. Give helpful, fun feedback if the answer is wrong, and praise if it's right. Then continue with another sentence.""",
64
  "Difficult Sounds": """You are a fun and patient English pronunciation coach. Your job is to help learners practice difficult English sounds like 'th' in 'think' and 'this'. In each round, choose one target sound and explain the sound in simple terms, including how to move the mouth, tongue, and teeth. Give 2-3 example words that use this sound. Ask the learner to say each word out loud.Then ask the learner to say one full sentence that includes one of the words. After the learner replies, give feedback on both pronunciation and sentence use (if applicable). Make your responses encouraging, clear, and a little playful to reduce stress and build confidence.""",
65
  "Tongue Twisters": """You are a fun and supportive English pronunciation coach running a Tongue Twister Race. Your job is to help the learner improve their pronunciation and fluency with tricky English sounds through playful tongue twisters. Choose a simple--maximum 5 words--tongue twister that focuses on one difficult English sound. Show the tongue twister and say it slowly. Ask the learner to repeat it out loud while looking at themselves in the camera. After they try, give friendly, helpful feedback on their pronunciation and mouth movement. Then move to the next tongue twister with a new sound focus.""",
66
+ "Business Meeting": """Do not answer your own question. Wait for user's response before asking the next question. Role: You are "Japi", a friendly and funny AI Tutor for English. Your role is to teach Business English skills to an employee in Indonesia at B1 level which is lower-intermediate. Method: Use simple B1 level English. Define words in 6 words or less. Avoid complex structures, idioms, and Bahasa Indonesia. Use encouraging phrases like "Good job!" Provide grammar corrections in your feedback, rephrasing the user's response in correct English up to 30 words and by providing grammar explanations for the errors. Do not correct punctuation/capitalization. Interaction: Maintain a friendly, supportive tone and use jokes to make it funny. Use phrases like "Great work!" or "Let's practice another example." Topics: Describe basic business terms that might be used in a meeting with managers, using easy-to-understand language. Process: Present topic, emphasize key vocab. Ask a question. Provide feedback: Correct grammar/content issues by rephrasing the user's response in better English up to 30 words and explain the grammar errors. Ask follow up question. Provide feedback again with grammar correction. Ask another follow up question. Provide feedback again with grammar correction. Start a role-play scenario where you are having a meeting with your boss. Encourage the use of vocabulary and phrases from the topic. For the first four exchanges of the role-play, Japi does not provide any corrections or comments, allowing for uninterrupted interaction. After the first four exchanges, Japi then provides feedback on grammar and content. Follow up with another role-play situation, but this time offer three different personas for the boss: Demanding micromanager, Supportive, Hands-Off. Encourage the use of vocabulary and phrases from the topic. For the first four exchanges of the role-play, Japi does not provide any corrections or comments, allowing for uninterrupted interaction. After the first four exchanges, Japi then provides feedback on grammar and content.""",
67
+ "Soccer Chat": """When you do not know the answer perform a web search. When needed, correct and explain the student's grammar and vocabulary mistakes. Give positive reinforcement too. Ask one question at a time to avoid overwhelming the student. Adapt questions based on responses. Use a conversational, supportive tone and encourage user opinions in English. Be friendly, patient, humorous and knowledgeable about football. Discuss it with open-ended questions and stay focused on current football related topics. Start by asking what league the student is interested in, or which is their favourite team and player. Wait for the answer before the next question.""",
68
  }
69
+ PROMPT=PROMPTS[ list(PROMPTS.keys())[0] ] # default
70
 
71
  MODELS = {
72
  "Gemini Flash Exp": "gemini-2.0-flash-exp",
 
94
  self.quit: asyncio.Event = asyncio.Event()
95
 
96
  async def handle_response(self, audio):
 
97
  _, array = audio
98
  array = array.squeeze()
99
  audio_message = base64.b64encode(array.tobytes()).decode("UTF-8")
100
  self.input_queue.put_nowait(audio_message)
101
+
102
+ # Check for interruption
103
+ if self.handler.can_interrupt:
104
+ print("Interruption is enabled.")
105
+
106
  # Return the response audio chunks
107
  while not self.quit.is_set():
108
  try:
109
+ # Check if interruption is needed
110
+ if self.handler.can_interrupt and self.quit.is_set():
111
+ print("Interruption detected, breaking the loop.")
112
+ break
113
  return await wait_for_item(self.output_queue)
114
  except asyncio.CancelledError:
115
  # Handle interruption gracefully
116
+ print("Task was cancelled, handling interruption.")
117
  break
118
+
119
+ def shutdown(self) -> None:
120
+ print("Shutting down, setting quit event.")
121
+ self.quit.set()
122
+
123
  def copy(self) -> "GeminiHandler":
124
  return GeminiHandler(
125
  expected_layout="mono",
 
144
  response_modalities=["AUDIO"],
145
  output_audio_transcription={},
146
  tools=tools,
147
+ system_instruction=SYSTEM_PROMPTS[prompt_system],
148
  speech_config=SpeechConfig(
149
  voice_config=VoiceConfig(
150
  prebuilt_voice_config=PrebuiltVoiceConfig(
 
157
  async with client.aio.live.connect(
158
  model=model, config=config
159
  ) as session:
160
+ print(f"\nConnected to model: {model}") # Debug info
161
+ print(f"system prompt: {prompt_system} - {SYSTEM_PROMPTS[prompt_system]}") # Debug info
162
+ print(f"prompt: {prompt} - {PROMPTS[prompt]}") # Debug info
163
 
164
  await session.send_client_content(turns=Content(
165
  role='user',
166
+ parts=[Part(text=PROMPTS[prompt])]))
167
 
168
  async for audio in session.start_stream(
169
  stream=self.stream(), mime_type="audio/pcm"
 
171
  if audio.data:
172
  array = np.frombuffer(audio.data, dtype=np.int16)
173
  self.output_queue.put_nowait((self.output_sample_rate, array))
174
+ #await session.send() # Make sure to await the send
175
  except Exception as e:
176
+ #print(f"Error during session: {str(e)}")
177
+ #print(f"Error during session: {repr(e)}")
178
+ print("Error during session:")
179
+ traceback.print_exc()
180
  raise # Re-raise the exception after printing
181
  except Exception as e:
182
  print(f"Error in start_up: {str(e)}")
 
184
 
185
  async def stream(self):
186
  while not self.quit.is_set():
187
+ #yield await wait_for_item(self.input_queue)
188
+ item = await wait_for_item(self.input_queue)
189
+ # Only yield if item is valid (e.g., not None or empty)
190
+ if item:
191
+ yield item
192
 
193
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
194
  _, array = frame
 
234
  gr.Markdown(
235
  value=lambda x: f"""
236
  ```
237
+ {SYSTEM_PROMPTS[x] if x in SYSTEM_PROMPTS else "Please select the system prompt."}
238
  ```
239
  """,
240
  inputs=[system_prompt_selector],
241
  )
242
 
 
243
  gr.Markdown(
244
+ value=lambda x: f"""
245
  ```
246
+ {PROMPTS[x] if x in PROMPTS else "Please select the prompt."}
247
  ```
248
  """,
249
  inputs=[prompt_selector],
 
251
 
252
  api_key_state = gr.State(os.getenv("GOOGLE_API_KEY", "")) # Wrap API key in gr.State
253
 
254
+ # make sure you don't commit your token to git!
255
+ TOKEN = os.getenv('HF_TOKEN')
256
+ async def get_credentials():
257
+ return await get_cloudflare_turn_credentials_async(hf_token=TOKEN)
258
+
259
  with gr.Column() as row:
260
 
261
  with gr.Row():
 
280
  icon_button_color="rgb(255, 255, 255)",
281
  icon="japi_head.png",
282
  button_labels={"start": "Start", "stop": "Stop"},
283
+ rtc_configuration=get_cloudflare_turn_credentials_async if get_space() else None,
284
  )
 
285
  webrtc.stream(
286
+ GeminiHandler(ReplyOnPause(AsyncStreamHandler, can_interrupt=True)),
287
  inputs=[webrtc, api_key_state, voice, system_prompt_selector, prompt_selector, model_selector],
288
  outputs=[webrtc],
289
  time_limit=60,