ParulPandey commited on
Commit
328a9a8
Β·
verified Β·
1 Parent(s): fb67b66

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +382 -0
app.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import difflib
4
+ from gradio_client import Client, file as gradio_file
5
+ import time
6
+ import google.generativeai as genai # Import Gemini library
7
+
8
+ # --- Configuration & Clients ---
9
+
10
+ # Function to initialize Gemini client (handles local env var and HF Secrets)
11
+ def configure_gemini_api():
12
+ api_key = None
13
+ try:
14
+ # This will only work if running on Hugging Face Spaces with the secret set
15
+ api_key = gr.Secrets.get("GOOGLE_API_KEY")
16
+ except AttributeError: # Running locally, gr.Secrets not available
17
+ api_key = os.environ.get("GOOGLE_API_KEY")
18
+ except FileNotFoundError: # gr.Secrets.get can raise this if no secrets file found
19
+ api_key = os.environ.get("GOOGLE_API_KEY")
20
+
21
+ if api_key:
22
+ try:
23
+ genai.configure(api_key=api_key)
24
+ return True
25
+ except Exception as e:
26
+ print(f"Error configuring Gemini API: {e}")
27
+ return False
28
+ else:
29
+ print("WARN: GOOGLE_API_KEY not found in Gradio Secrets or environment. Story generation with Gemini will be disabled.")
30
+ return False
31
+
32
+ # Configure Gemini API at startup
33
+ GEMINI_API_CONFIGURED = configure_gemini_api()
34
+
35
+ # Initialize TTS Client for Bark (suno/bark)
36
+ try:
37
+ bark_tts_client = Client("suno/bark")
38
+ except Exception as e:
39
+ print(f"Fatal: Could not initialize Bark TTS client (suno/bark): {e}. TTS will not work.")
40
+ bark_tts_client = None
41
+
42
+ # Initialize STT Client for Whisper (abidlabs/whisper-large-v2)
43
+ try:
44
+ whisper_stt_client = Client("abidlabs/whisper-large-v2")
45
+ except Exception as e:
46
+ print(f"Fatal: Could not initialize Whisper STT client (abidlabs/whisper-large-v2): {e}. STT will not work.")
47
+ whisper_stt_client = None
48
+
49
+ # --- Helper Functions ---
50
+
51
+ def generate_story_with_gemini(name, grade, topic):
52
+ if not GEMINI_API_CONFIGURED:
53
+ return "Google Gemini API key not configured. Story generation is disabled. πŸ”‘"
54
+
55
+ try:
56
+ # Choose a Gemini model. 'gemini-1.5-flash-latest' is good for speed and general tasks.
57
+ # Other options: 'gemini-1.0-pro', 'gemini-1.5-pro-latest' (if available and needed)
58
+ model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest") # Or 'gemini-pro'
59
+
60
+ prompt = (
61
+ f"You are a super friendly and imaginative storyteller for kids. "
62
+ f"Please write an exciting and fun short story (around 100-120 words) for a student named {name} who is in Grade {grade}. "
63
+ f"The story must be about '{topic}'. "
64
+ f"Use simple words and sentences that a Grade {grade} student can easily read aloud and understand. "
65
+ f"Make the story engaging and positive. Jump right into the story without any introduction like 'Here is a story for you'."
66
+ )
67
+
68
+ # For safety, though Gemini has built-in safety settings
69
+ safety_settings = [
70
+ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
71
+ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
72
+ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
73
+ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
74
+ ]
75
+
76
+ generation_config = genai.types.GenerationConfig(
77
+ candidate_count=1,
78
+ # stop_sequences=["\n\n\n"], # Optional: if you notice overly long outputs
79
+ max_output_tokens=300, # Generous for a 120-word story
80
+ temperature=0.75, # For creativity
81
+ # top_p=0.9, # Optional: nucleus sampling
82
+ # top_k=40 # Optional: top-k sampling
83
+ )
84
+
85
+ response = model.generate_content(
86
+ prompt,
87
+ generation_config=generation_config,
88
+ safety_settings=safety_settings
89
+ )
90
+
91
+ if response.candidates and response.candidates[0].content.parts:
92
+ story = response.text # .text directly gives the generated string
93
+ # Check for safety blocks
94
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
95
+ return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked by the safety filter (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
96
+ if not story.strip(): # Empty response despite no block
97
+ return f"Hmm, Gemini gave me a blank page for '{topic}'. Let's try a different topic or try again! ✨"
98
+ return story.strip()
99
+ else: # No valid candidates or blocked
100
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
101
+ return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked by the safety filter (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
102
+ print(f"Gemini API response issue: {response}")
103
+ return f"Hmm, Gemini's story magic seems to be on a little break for '{topic}'. Maybe try another topic? πŸ€”"
104
+
105
+ except Exception as e:
106
+ print(f"Error generating story with Gemini: {e}")
107
+ # Check for common API key related errors
108
+ if "API_KEY_INVALID" in str(e) or "API key not valid" in str(e):
109
+ return "Oops! The Google Gemini API key seems to be having a problem. Please tell the grown-ups to check it! πŸ”‘"
110
+ return f"Oh no! 😟 I had a little trouble dreaming up a story with Gemini. Error: {e}"
111
+
112
+
113
+ def text_to_speech_bark(text_to_speak):
114
+ if not bark_tts_client:
115
+ return "The Bark TTS sound machine isn't working right now. πŸ› οΈ Please tell the grown-ups!"
116
+ try:
117
+ voice_preset = "v2/en_speaker_7"
118
+ job = bark_tts_client.submit(
119
+ text_to_speak,
120
+ voice_preset,
121
+ api_name="/generate_audio"
122
+ )
123
+ audio_result = job.result(timeout=180)
124
+
125
+ if isinstance(audio_result, tuple) and len(audio_result) > 0:
126
+ audio_filepath = audio_result[0]
127
+ elif isinstance(audio_result, str):
128
+ audio_filepath = audio_result
129
+ else:
130
+ print(f"Unexpected Bark TTS result format: {audio_result}")
131
+ return "Hmm, the sound came out a bit funny from Bark. πŸ€”"
132
+ return audio_filepath
133
+ except Exception as e:
134
+ print(f"Error with Bark TTS (suno/bark): {e}")
135
+ if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower() or " ΰ€Έΰ€°ΰ₯ΰ€΅ΰ€° ΰ€΅ΰ₯ΰ€―ΰ€Έΰ₯ΰ€€ ΰ€Ήΰ₯ˆ" in str(e):
136
+ return "The Bark sound machine is super busy with other kids! 인기폭발! ΠΎΡ‡Π΅Ρ€Π΅Π΄ΡŒ! Please try again in a little bit. πŸ•’"
137
+ if "generator" in str(e).lower() and "choices" in str(e).lower():
138
+ return f"Oops! Bark had a hiccup with the voice. Chosen: '{voice_preset}'. Maybe try later? Details: {e}"
139
+ return f"Oh dear, Bark couldn't make the sound. πŸ”‡ Error: {e}"
140
+
141
+ def speech_to_text_whisper_space(audio_filepath):
142
+ if not whisper_stt_client:
143
+ return "The Whisper listening ears aren't working right now. πŸ› οΈ Please tell the grown-ups!"
144
+ if not audio_filepath:
145
+ return "Oops! I didn't get any recording to listen to. 🎀"
146
+ try:
147
+ job = whisper_stt_client.submit(
148
+ gradio_file(audio_filepath),
149
+ "transcribe",
150
+ "English",
151
+ api_name="/predict"
152
+ )
153
+ result_dict = job.result(timeout=120)
154
+
155
+ if isinstance(result_dict, dict) and 'text' in result_dict:
156
+ return result_dict['text']
157
+ elif isinstance(result_dict, str):
158
+ return result_dict
159
+ else:
160
+ print(f"Unexpected Whisper STT result format: {result_dict}")
161
+ return "Hmm, I couldn't quite understand the words from Whisper. πŸ€”"
162
+ except Exception as e:
163
+ print(f"Error transcribing audio with Whisper Space: {e}")
164
+ if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower():
165
+ return "The Whisper listening ears are super busy! 인기폭발! ΠΎΡ‡Π΅Ρ€Π΅Π΄ΡŒ! Please try again in a bit. πŸ•’"
166
+ return f"Oh no! Whisper had trouble hearing that. πŸ™‰ Error: {e}"
167
+
168
+ def clean_text_for_comparison(text):
169
+ if not isinstance(text, str): return []
170
+ text = text.lower()
171
+ punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~"
172
+ text = text.translate(str.maketrans('', '', punctuation_to_remove))
173
+ return text.split()
174
+
175
+ def compare_texts_for_feedback(original_text, student_text):
176
+ original_words = clean_text_for_comparison(original_text)
177
+ student_words = clean_text_for_comparison(student_text)
178
+
179
+ if not student_words:
180
+ return "It sounds like you didn't record anything, or maybe it was super quiet! 🀫 Try recording again nice and clear!", ""
181
+
182
+ matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
183
+ feedback_lines = []
184
+ highlighted_passage_parts = []
185
+
186
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
187
+ original_segment = original_words[i1:i2]
188
+ student_segment = student_words[j1:j2]
189
+
190
+ if tag == 'equal':
191
+ highlighted_passage_parts.append(" ".join(original_segment))
192
+ elif tag == 'replace':
193
+ if len(original_segment) == len(student_segment):
194
+ for i in range(len(original_segment)):
195
+ o_word = original_segment[i]
196
+ s_word = student_segment[i]
197
+ feedback_lines.append(f"- You said: \"*{s_word}*\" instead of: \"**{o_word}**\"")
198
+ highlighted_passage_parts.append(f"~~{o_word}~~ **{s_word}**")
199
+ else:
200
+ feedback_lines.append(f"- Instead of: \"**{' '.join(original_segment)}**\", you said: \"*{' '.join(student_segment)}*\"")
201
+ highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ **{' '.join(student_segment)}**")
202
+ elif tag == 'delete':
203
+ feedback_lines.append(f"- You missed: \"**{' '.join(original_segment)}**\"")
204
+ highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ (*skipped*)")
205
+ elif tag == 'insert':
206
+ feedback_lines.append(f"- You added: \"*{' '.join(student_segment)}*\" (which wasn't in the story)")
207
+ highlighted_passage_parts.append(f"(*added:* **{' '.join(student_segment)}**)")
208
+
209
+ final_highlighted_text = " ".join(highlighted_passage_parts)
210
+
211
+ if not feedback_lines:
212
+ return "πŸŽ‰πŸ₯³ WOOHOO! Amazing reading! You got all the words spot on! πŸ₯³πŸŽ‰", final_highlighted_text
213
+ else:
214
+ feedback_summary = "Great try! Here are a few words to practice to make it even better:\n" + "\n".join(feedback_lines)
215
+ return feedback_summary, final_highlighted_text
216
+
217
+ # --- Gradio UI Functions ---
218
+ def generate_story_and_audio_for_ui(name, grade, topic, progress=gr.Progress(track_tqdm=True)):
219
+ if not name or not grade or not topic:
220
+ return "Oops! Please tell me your name, grade, and a fun topic first! 😊", None, gr.update(visible=False), ""
221
+
222
+ progress(0.1, desc="πŸ“– Asking Gemini to dream up a cool story for you...")
223
+ story_text = generate_story_with_gemini(name, grade, topic) # USE GEMINI FUNCTION
224
+
225
+ # Check for Gemini specific error messages or general failure indicators
226
+ gemini_error_keywords = ["Gemini API key not configured", "Oh no!", "Oops!", "Hmm,"]
227
+ if any(keyword in story_text for keyword in gemini_error_keywords) or not story_text.strip() :
228
+ # Keep recording area hidden if story generation failed
229
+ return story_text, None, gr.update(visible=False), story_text
230
+
231
+ progress(0.5, desc="🎧 Warming up the Bark sound machine... (this can take a moment, like magic!)")
232
+ tts_audio_path = text_to_speech_bark(story_text)
233
+
234
+ error_conditions_tts = [
235
+ "couldn't make the sound", "sound came out a bit funny", "sound machine isn't working",
236
+ "sound machine is super busy", "Bark had a hiccup"
237
+ ]
238
+ if any(err in (tts_audio_path or "") for err in error_conditions_tts):
239
+ # Show story, but show TTS error and hide recording parts
240
+ return story_text, tts_audio_path, gr.update(visible=False), story_text
241
+
242
+ progress(1.0, desc="βœ… Story and sound are ready! Let's go!")
243
+ return (
244
+ story_text,
245
+ tts_audio_path,
246
+ gr.update(visible=True), # Show recording_assessment_area
247
+ story_text # Pass story_text to gr.State
248
+ )
249
+
250
+ def assess_student_reading_ui(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
251
+ if not student_audio_path:
252
+ return "🎀 Whoops! Did you forget to record your awesome reading? Try again!", ""
253
+ if not original_passage_state:
254
+ return "Hmm, I lost the story! 😟 Please generate a new story first.", ""
255
+
256
+ progress(0.2, desc="πŸ‘‚ Whisper is listening carefully to your recording...")
257
+ transcribed_text = speech_to_text_whisper_space(student_audio_path)
258
+
259
+ error_conditions_stt = [
260
+ "couldn't understand the words", "had trouble hearing that", "listening ears aren't working",
261
+ "listening ears are super busy", "didn't get any recording"
262
+ ]
263
+ if any(err in (transcribed_text or "") for err in error_conditions_stt):
264
+ return transcribed_text, ""
265
+
266
+ progress(0.7, desc="🧠 Thinking about the words...")
267
+ feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
268
+ progress(1.0, desc="⭐ Feedback is ready!")
269
+ return feedback, highlighted_passage
270
+
271
+ # --- Gradio Interface ---
272
+ css = """
273
+ body { font-family: 'Comic Sans MS', 'Chalkboard SE', 'Comic Neue', cursive; background-color: #F0F8FF; } /* AliceBlue background */
274
+ .gr-button {
275
+ background-color: #FF69B4 !important; /* HotPink */
276
+ color: white !important;
277
+ border-radius: 20px !important;
278
+ font-weight: bold !important;
279
+ border: 2px solid #FF1493 !important; /* DeepPink border */
280
+ box-shadow: 0px 3px 5px rgba(0,0,0,0.2) !important;
281
+ }
282
+ .gr-button:hover { background-color: #FF1493 !important; } /* DeepPink on hover */
283
+ .gr-panel {
284
+ border-radius: 15px !important;
285
+ box-shadow: 5px 5px 15px rgba(0,0,0,0.1) !important;
286
+ background-color: #FFFACD !important; /* LemonChiffon panel background */
287
+ border: 2px dashed #FFD700 !important; /* Gold dashed border */
288
+ }
289
+ label, .gr-checkbox-label { color: #4B0082 !important; font-weight: bold !important; } /* Indigo */
290
+ .gr-textbox, .gr-dropdown { border-radius: 10px !important; border: 1px solid #DDA0DD !important; } /* Plum border for inputs */
291
+ #student_audio_input audio { background-color: #E6E6FA; border-radius: 10px; } /* Lavender for audio player */
292
+ #feedback_output, #highlighted_passage_output {
293
+ background-color: #FFFFE0; /* LightYellow */
294
+ padding: 15px;
295
+ border-radius: 10px;
296
+ border: 1px solid #FAFAD2; /* LightGoldenrodYellow */
297
+ }
298
+ """
299
+
300
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.pink, secondary_hue=gr.themes.colors.purple), css=css) as app:
301
+ gr.Markdown(
302
+ """
303
+ <div style="text-align: center; padding: 20px 0;">
304
+ <h1 style="color: #FF6347; font-size: 3em; text-shadow: 2px 2px #D3D3D3;">πŸŒˆπŸ¦„βœ¨ AI Reading Buddy βœ¨πŸ¦„πŸŒˆ</h1>
305
+ <p style="font-size: 1.3em; color: #483D8B;">Let's read a super fun story from Gemini and practice our words!</p>
306
+ </div>
307
+ """
308
+ )
309
+
310
+ original_passage_state = gr.State("")
311
+
312
+ with gr.Row():
313
+ with gr.Column(scale=1):
314
+ gr.Markdown("### <span style='color:#DB7093;'>✏️ Tell Me About You!</span>")
315
+ student_name_input = gr.Textbox(label="πŸ‘‘ Your Awesome Name:", placeholder="E.g., Princess Lily")
316
+ student_grade_input = gr.Dropdown(
317
+ label="πŸ§‘β€πŸŽ“ Your Grade:",
318
+ choices=[f"{i}" for i in range(1, 11)],
319
+ value="3"
320
+ )
321
+ topic_input = gr.Textbox(label="πŸš€ Story Topic Idea:", placeholder="E.g., brave little astronaut")
322
+ generate_button = gr.Button(value="🎈 Get My Gemini Story!")
323
+
324
+ with gr.Column(scale=2):
325
+ gr.Markdown("### <span style='color:#DB7093;'>πŸ“– Your Special Story (from Gemini AI):</span>")
326
+ passage_output = gr.Textbox(label="Read this aloud:", lines=10, interactive=False)
327
+ gr.Markdown("### <span style='color:#DB7093;'>πŸ”Š Listen to the Story:</span>")
328
+ audio_output = gr.Audio(label="Hear how it sounds (with Bark TTS Bark️)", type="filepath")
329
+
330
+ gr.Markdown("<hr style='border:1px dashed #FFB6C1;'>") # LightPink dashed separator
331
+
332
+ with gr.Row(visible=False) as recording_assessment_area:
333
+ with gr.Column(scale=1):
334
+ gr.Markdown("### <span style='color:#32CD32;'>🀩 Your Turn to Shine! 🀩</span>")
335
+ student_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎀 Record yourself reading the story! Press the mic, then stop.", elem_id="student_audio_input")
336
+ assess_button = gr.Button(value="🧐 Check My Reading!", elem_id="assess_button")
337
+
338
+ with gr.Column(scale=2):
339
+ gr.Markdown("### <span style='color:#32CD32;'>πŸ’‘ Word Detective Feedback:</span>")
340
+ feedback_output = gr.Markdown(value="Your amazing feedback will pop up here! ✨", elem_id="feedback_output")
341
+ highlighted_passage_output = gr.Markdown(value="See your reading journey here! πŸ—ΊοΈ", elem_id="highlighted_passage_output")
342
+
343
+
344
+ generate_button.click(
345
+ fn=generate_story_and_audio_for_ui,
346
+ inputs=[student_name_input, student_grade_input, topic_input],
347
+ outputs=[
348
+ passage_output,
349
+ audio_output,
350
+ recording_assessment_area, # Directly control visibility of the row
351
+ original_passage_state
352
+ ]
353
+ )
354
+
355
+ assess_button.click(
356
+ fn=assess_student_reading_ui,
357
+ inputs=[original_passage_state, student_audio_input],
358
+ outputs=[feedback_output, highlighted_passage_output]
359
+ )
360
+
361
+ gr.Markdown(
362
+ """
363
+ ---
364
+ <div style="text-align: center; font-size: 0.9em; color: #555;">
365
+ Built with ❀️ for the Agentic Demo Track Hackathon! Tag: <code>agent-demo-track</code>
366
+ <br>Stories by Google Gemini, voices by Suno Bark @ HF, and listening by Whisper @ HF.
367
+ </div>
368
+ """
369
+ )
370
+
371
+ # --- Launching the App ---
372
+ if __name__ == "__main__":
373
+ if not GEMINI_API_CONFIGURED:
374
+ print("🚨 GOOGLE_API_KEY not configured for local testing or failed to initialize!")
375
+ print("Please set it: export GOOGLE_API_KEY='your_key_here'")
376
+
377
+ if not bark_tts_client:
378
+ print("🚨 Bark TTS client (suno/bark) could not be initialized. TTS will not work.")
379
+ if not whisper_stt_client:
380
+ print("🚨 Whisper STT client (abidlabs/whisper-large-v2) could not be initialized. STT will not work.")
381
+
382
+ app.launch(debug=True)