ReadRight

Sleeping

App Files Files Community

ParulPandey commited on Jun 6, 2025

Commit

328a9a8

verified ·

1 Parent(s): fb67b66

Create app.py

Browse files

Files changed (1) hide show

app.py +382 -0

app.py ADDED Viewed

	@@ -0,0 +1,382 @@

+import gradio as gr
+import os
+import difflib
+from gradio_client import Client, file as gradio_file
+import time
+import google.generativeai as genai # Import Gemini library
+# --- Configuration & Clients ---
+# Function to initialize Gemini client (handles local env var and HF Secrets)
+def configure_gemini_api():
+    api_key = None
+    try:
+        # This will only work if running on Hugging Face Spaces with the secret set
+        api_key = gr.Secrets.get("GOOGLE_API_KEY")
+    except AttributeError: # Running locally, gr.Secrets not available
+        api_key = os.environ.get("GOOGLE_API_KEY")
+    except FileNotFoundError: # gr.Secrets.get can raise this if no secrets file found
+        api_key = os.environ.get("GOOGLE_API_KEY")
+    if api_key:
+        try:
+            genai.configure(api_key=api_key)
+            return True
+        except Exception as e:
+            print(f"Error configuring Gemini API: {e}")
+            return False
+    else:
+        print("WARN: GOOGLE_API_KEY not found in Gradio Secrets or environment. Story generation with Gemini will be disabled.")
+        return False
+# Configure Gemini API at startup
+GEMINI_API_CONFIGURED = configure_gemini_api()
+# Initialize TTS Client for Bark (suno/bark)
+try:
+    bark_tts_client = Client("suno/bark")
+except Exception as e:
+    print(f"Fatal: Could not initialize Bark TTS client (suno/bark): {e}. TTS will not work.")
+    bark_tts_client = None
+# Initialize STT Client for Whisper (abidlabs/whisper-large-v2)
+try:
+    whisper_stt_client = Client("abidlabs/whisper-large-v2")
+except Exception as e:
+    print(f"Fatal: Could not initialize Whisper STT client (abidlabs/whisper-large-v2): {e}. STT will not work.")
+    whisper_stt_client = None
+# --- Helper Functions ---
+def generate_story_with_gemini(name, grade, topic):
+    if not GEMINI_API_CONFIGURED:
+        return "Google Gemini API key not configured. Story generation is disabled. 🔑"
+    try:
+        # Choose a Gemini model. 'gemini-1.5-flash-latest' is good for speed and general tasks.
+        # Other options: 'gemini-1.0-pro', 'gemini-1.5-pro-latest' (if available and needed)
+        model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest") # Or 'gemini-pro'
+        prompt = (
+            f"You are a super friendly and imaginative storyteller for kids. "
+            f"Please write an exciting and fun short story (around 100-120 words) for a student named {name} who is in Grade {grade}. "
+            f"The story must be about '{topic}'. "
+            f"Use simple words and sentences that a Grade {grade} student can easily read aloud and understand. "
+            f"Make the story engaging and positive. Jump right into the story without any introduction like 'Here is a story for you'."
+        )
+        # For safety, though Gemini has built-in safety settings
+        safety_settings = [
+            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+        ]
+        generation_config = genai.types.GenerationConfig(
+            candidate_count=1,
+            # stop_sequences=["\n\n\n"], # Optional: if you notice overly long outputs
+            max_output_tokens=300, # Generous for a 120-word story
+            temperature=0.75, # For creativity
+            # top_p=0.9,      # Optional: nucleus sampling
+            # top_k=40        # Optional: top-k sampling
+        )
+        response = model.generate_content(
+            prompt,
+            generation_config=generation_config,
+            safety_settings=safety_settings
+        )
+        if response.candidates and response.candidates[0].content.parts:
+            story = response.text # .text directly gives the generated string
+            # Check for safety blocks
+            if response.prompt_feedback and response.prompt_feedback.block_reason:
+                 return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked by the safety filter (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
+            if not story.strip(): # Empty response despite no block
+                return f"Hmm, Gemini gave me a blank page for '{topic}'. Let's try a different topic or try again! ✨"
+            return story.strip()
+        else: # No valid candidates or blocked
+            if response.prompt_feedback and response.prompt_feedback.block_reason:
+                 return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked by the safety filter (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
+            print(f"Gemini API response issue: {response}")
+            return f"Hmm, Gemini's story magic seems to be on a little break for '{topic}'. Maybe try another topic? 🤔"
+    except Exception as e:
+        print(f"Error generating story with Gemini: {e}")
+        # Check for common API key related errors
+        if "API_KEY_INVALID" in str(e) or "API key not valid" in str(e):
+            return "Oops! The Google Gemini API key seems to be having a problem. Please tell the grown-ups to check it! 🔑"
+        return f"Oh no! 😟 I had a little trouble dreaming up a story with Gemini. Error: {e}"
+def text_to_speech_bark(text_to_speak):
+    if not bark_tts_client:
+        return "The Bark TTS sound machine isn't working right now. 🛠️ Please tell the grown-ups!"
+    try:
+        voice_preset = "v2/en_speaker_7"
+        job = bark_tts_client.submit(
+            text_to_speak,
+            voice_preset,
+            api_name="/generate_audio"
+        )
+        audio_result = job.result(timeout=180)
+        if isinstance(audio_result, tuple) and len(audio_result) > 0:
+            audio_filepath = audio_result[0]
+        elif isinstance(audio_result, str):
+            audio_filepath = audio_result
+        else:
+            print(f"Unexpected Bark TTS result format: {audio_result}")
+            return "Hmm, the sound came out a bit funny from Bark. 🤔"
+        return audio_filepath
+    except Exception as e:
+        print(f"Error with Bark TTS (suno/bark): {e}")
+        if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower() or " सर्वर व्यस्त है" in str(e):
+            return "The Bark sound machine is super busy with other kids!  인기폭발!  очередь! Please try again in a little bit. 🕒"
+        if "generator" in str(e).lower() and "choices" in str(e).lower():
+             return f"Oops! Bark had a hiccup with the voice. Chosen: '{voice_preset}'. Maybe try later? Details: {e}"
+        return f"Oh dear, Bark couldn't make the sound. 🔇 Error: {e}"
+def speech_to_text_whisper_space(audio_filepath):
+    if not whisper_stt_client:
+        return "The Whisper listening ears aren't working right now. 🛠️ Please tell the grown-ups!"
+    if not audio_filepath:
+        return "Oops! I didn't get any recording to listen to. 🎤"
+    try:
+        job = whisper_stt_client.submit(
+            gradio_file(audio_filepath),
+            "transcribe",
+            "English",
+            api_name="/predict"
+        )
+        result_dict = job.result(timeout=120)
+        if isinstance(result_dict, dict) and 'text' in result_dict:
+            return result_dict['text']
+        elif isinstance(result_dict, str):
+            return result_dict
+        else:
+            print(f"Unexpected Whisper STT result format: {result_dict}")
+            return "Hmm, I couldn't quite understand the words from Whisper. 🤔"
+    except Exception as e:
+        print(f"Error transcribing audio with Whisper Space: {e}")
+        if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower():
+            return "The Whisper listening ears are super busy! 인기폭발! очередь! Please try again in a bit. 🕒"
+        return f"Oh no! Whisper had trouble hearing that. 🙉 Error: {e}"
+def clean_text_for_comparison(text):
+    if not isinstance(text, str): return []
+    text = text.lower()
+    punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~"
+    text = text.translate(str.maketrans('', '', punctuation_to_remove))
+    return text.split()
+def compare_texts_for_feedback(original_text, student_text):
+    original_words = clean_text_for_comparison(original_text)
+    student_words = clean_text_for_comparison(student_text)
+    if not student_words:
+        return "It sounds like you didn't record anything, or maybe it was super quiet! 🤫 Try recording again nice and clear!", ""
+    matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
+    feedback_lines = []
+    highlighted_passage_parts = []
+    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        original_segment = original_words[i1:i2]
+        student_segment = student_words[j1:j2]
+        if tag == 'equal':
+            highlighted_passage_parts.append(" ".join(original_segment))
+        elif tag == 'replace':
+            if len(original_segment) == len(student_segment):
+                for i in range(len(original_segment)):
+                    o_word = original_segment[i]
+                    s_word = student_segment[i]
+                    feedback_lines.append(f"- You said: \"*{s_word}*\" instead of: \"**{o_word}**\"")
+                    highlighted_passage_parts.append(f"~~{o_word}~~ **{s_word}**")
+            else:
+                feedback_lines.append(f"- Instead of: \"**{' '.join(original_segment)}**\", you said: \"*{' '.join(student_segment)}*\"")
+                highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ **{' '.join(student_segment)}**")
+        elif tag == 'delete':
+            feedback_lines.append(f"- You missed: \"**{' '.join(original_segment)}**\"")
+            highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ (*skipped*)")
+        elif tag == 'insert':
+            feedback_lines.append(f"- You added: \"*{' '.join(student_segment)}*\" (which wasn't in the story)")
+            highlighted_passage_parts.append(f"(*added:* **{' '.join(student_segment)}**)")
+    final_highlighted_text = " ".join(highlighted_passage_parts)
+    if not feedback_lines:
+        return "🎉🥳 WOOHOO! Amazing reading! You got all the words spot on! 🥳🎉", final_highlighted_text
+    else:
+        feedback_summary = "Great try! Here are a few words to practice to make it even better:\n" + "\n".join(feedback_lines)
+        return feedback_summary, final_highlighted_text
+# --- Gradio UI Functions ---
+def generate_story_and_audio_for_ui(name, grade, topic, progress=gr.Progress(track_tqdm=True)):
+    if not name or not grade or not topic:
+        return "Oops! Please tell me your name, grade, and a fun topic first! 😊", None, gr.update(visible=False), ""
+    progress(0.1, desc="📖 Asking Gemini to dream up a cool story for you...")
+    story_text = generate_story_with_gemini(name, grade, topic) # USE GEMINI FUNCTION
+    # Check for Gemini specific error messages or general failure indicators
+    gemini_error_keywords = ["Gemini API key not configured", "Oh no!", "Oops!", "Hmm,"]
+    if any(keyword in story_text for keyword in gemini_error_keywords) or not story_text.strip() :
+         # Keep recording area hidden if story generation failed
+        return story_text, None, gr.update(visible=False), story_text
+    progress(0.5, desc="🎧 Warming up the Bark sound machine... (this can take a moment, like magic!)")
+    tts_audio_path = text_to_speech_bark(story_text)
+    error_conditions_tts = [
+        "couldn't make the sound", "sound came out a bit funny", "sound machine isn't working",
+        "sound machine is super busy", "Bark had a hiccup"
+    ]
+    if any(err in (tts_audio_path or "") for err in error_conditions_tts):
+        # Show story, but show TTS error and hide recording parts
+        return story_text, tts_audio_path, gr.update(visible=False), story_text
+    progress(1.0, desc="✅ Story and sound are ready! Let's go!")
+    return (
+        story_text,
+        tts_audio_path,
+        gr.update(visible=True), # Show recording_assessment_area
+        story_text               # Pass story_text to gr.State
+    )
+def assess_student_reading_ui(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
+    if not student_audio_path:
+        return "🎤 Whoops! Did you forget to record your awesome reading? Try again!", ""
+    if not original_passage_state:
+        return "Hmm, I lost the story! 😟 Please generate a new story first.", ""
+    progress(0.2, desc="👂 Whisper is listening carefully to your recording...")
+    transcribed_text = speech_to_text_whisper_space(student_audio_path)
+    error_conditions_stt = [
+        "couldn't understand the words", "had trouble hearing that", "listening ears aren't working",
+        "listening ears are super busy", "didn't get any recording"
+    ]
+    if any(err in (transcribed_text or "") for err in error_conditions_stt):
+        return transcribed_text, ""
+    progress(0.7, desc="🧠 Thinking about the words...")
+    feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
+    progress(1.0, desc="⭐ Feedback is ready!")
+    return feedback, highlighted_passage
+# --- Gradio Interface ---
+css = """
+body { font-family: 'Comic Sans MS', 'Chalkboard SE', 'Comic Neue', cursive; background-color: #F0F8FF; } /* AliceBlue background */
+.gr-button {
+    background-color: #FF69B4 !important; /* HotPink */
+    color: white !important;
+    border-radius: 20px !important;
+    font-weight: bold !important;
+    border: 2px solid #FF1493 !important; /* DeepPink border */
+    box-shadow: 0px 3px 5px rgba(0,0,0,0.2) !important;
+}
+.gr-button:hover { background-color: #FF1493 !important; } /* DeepPink on hover */
+.gr-panel {
+    border-radius: 15px !important;
+    box-shadow: 5px 5px 15px rgba(0,0,0,0.1) !important;
+    background-color: #FFFACD !important; /* LemonChiffon panel background */
+    border: 2px dashed #FFD700 !important; /* Gold dashed border */
+}
+label, .gr-checkbox-label { color: #4B0082 !important; font-weight: bold !important; } /* Indigo */
+.gr-textbox, .gr-dropdown { border-radius: 10px !important; border: 1px solid #DDA0DD !important; } /* Plum border for inputs */
+#student_audio_input audio { background-color: #E6E6FA; border-radius: 10px; } /* Lavender for audio player */
+#feedback_output, #highlighted_passage_output {
+    background-color: #FFFFE0; /* LightYellow */
+    padding: 15px;
+    border-radius: 10px;
+    border: 1px solid #FAFAD2; /* LightGoldenrodYellow */
+}
+"""
+with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.pink, secondary_hue=gr.themes.colors.purple), css=css) as app:
+    gr.Markdown(
+        """
+        <div style="text-align: center; padding: 20px 0;">
+            <h1 style="color: #FF6347; font-size: 3em; text-shadow: 2px 2px #D3D3D3;">🌈🦄✨ AI Reading Buddy ✨🦄🌈</h1>
+            <p style="font-size: 1.3em; color: #483D8B;">Let's read a super fun story from Gemini and practice our words!</p>
+        </div>
+        """
+    )
+    original_passage_state = gr.State("")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### <span style='color:#DB7093;'>✏️ Tell Me About You!</span>")
+            student_name_input = gr.Textbox(label="👑 Your Awesome Name:", placeholder="E.g., Princess Lily")
+            student_grade_input = gr.Dropdown(
+                label="🧑‍🎓 Your Grade:",
+                choices=[f"{i}" for i in range(1, 11)],
+                value="3"
+            )
+            topic_input = gr.Textbox(label="🚀 Story Topic Idea:", placeholder="E.g., brave little astronaut")
+            generate_button = gr.Button(value="🎈 Get My Gemini Story!")
+        with gr.Column(scale=2):
+            gr.Markdown("### <span style='color:#DB7093;'>📖 Your Special Story (from Gemini AI):</span>")
+            passage_output = gr.Textbox(label="Read this aloud:", lines=10, interactive=False)
+            gr.Markdown("### <span style='color:#DB7093;'>🔊 Listen to the Story:</span>")
+            audio_output = gr.Audio(label="Hear how it sounds (with Bark TTS Bark️)", type="filepath")
+    gr.Markdown("<hr style='border:1px dashed #FFB6C1;'>") # LightPink dashed separator
+    with gr.Row(visible=False) as recording_assessment_area:
+        with gr.Column(scale=1):
+            gr.Markdown("### <span style='color:#32CD32;'>🤩 Your Turn to Shine! 🤩</span>")
+            student_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Record yourself reading the story! Press the mic, then stop.", elem_id="student_audio_input")
+            assess_button = gr.Button(value="🧐 Check My Reading!", elem_id="assess_button")
+        with gr.Column(scale=2):
+            gr.Markdown("### <span style='color:#32CD32;'>💡 Word Detective Feedback:</span>")
+            feedback_output = gr.Markdown(value="Your amazing feedback will pop up here! ✨", elem_id="feedback_output")
+            highlighted_passage_output = gr.Markdown(value="See your reading journey here! 🗺️", elem_id="highlighted_passage_output")
+    generate_button.click(
+        fn=generate_story_and_audio_for_ui,
+        inputs=[student_name_input, student_grade_input, topic_input],
+        outputs=[
+            passage_output,
+            audio_output,
+            recording_assessment_area, # Directly control visibility of the row
+            original_passage_state
+        ]
+    )
+    assess_button.click(
+        fn=assess_student_reading_ui,
+        inputs=[original_passage_state, student_audio_input],
+        outputs=[feedback_output, highlighted_passage_output]
+    )
+    gr.Markdown(
+        """
+        ---
+        <div style="text-align: center; font-size: 0.9em; color: #555;">
+        Built with ❤️ for the Agentic Demo Track Hackathon! Tag: <code>agent-demo-track</code>
+        <br>Stories by Google Gemini, voices by Suno Bark @ HF, and listening by Whisper @ HF.
+        </div>
+        """
+    )
+# --- Launching the App ---
+if __name__ == "__main__":
+    if not GEMINI_API_CONFIGURED:
+        print("🚨 GOOGLE_API_KEY not configured for local testing or failed to initialize!")
+        print("Please set it: export GOOGLE_API_KEY='your_key_here'")
+    if not bark_tts_client:
+        print("🚨 Bark TTS client (suno/bark) could not be initialized. TTS will not work.")
+    if not whisper_stt_client:
+        print("🚨 Whisper STT client (abidlabs/whisper-large-v2) could not be initialized. STT will not work.")
+    app.launch(debug=True)