Spaces:
Running
Running
| import os | |
| import asyncio | |
| import google.generativeai as genai | |
| from google.adk.agents import Agent | |
| from google.adk.models.google_llm import Gemini | |
| from google.adk.runners import InMemoryRunner | |
| from google.adk.tools import google_search | |
| from google.genai import types | |
| from google.cloud import texttospeech | |
| from pydub import AudioSegment | |
| import gradio as gr | |
| # TinyTutor App | |
| # --- Configure API Keys --- | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| if not GOOGLE_API_KEY: | |
| raise RuntimeError("❌ Missing GOOGLE_API_KEY environment variable.") | |
| genai.configure(api_key=GOOGLE_API_KEY) | |
| SERVICE_ACCOUNT_JSON = os.getenv("GCP_VI_SERVICE_ACCOUNT_JSON") | |
| if not SERVICE_ACCOUNT_JSON: | |
| raise RuntimeError("❌ Missing GCP_VI_SERVICE_ACCOUNT_JSON environment variable.") | |
| with open("tinytutor-tss-agent.json", "w") as f: | |
| f.write(SERVICE_ACCOUNT_JSON) | |
| os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "tinytutor-tss-agent.json" | |
| tts_client = texttospeech.TextToSpeechClient() | |
| # --- Retry Options --- | |
| retry_config = types.HttpRetryOptions( | |
| attempts=5, | |
| exp_base=7, | |
| initial_delay=1, | |
| http_status_codes=[429, 500, 503, 504] | |
| ) | |
| # --- Pedagogy Agent --- | |
| pedagogy_agent = Agent( | |
| name="PedagogyAgent", | |
| model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config), | |
| description="Explains topics in simple ELI5 style.", | |
| instruction="Explain the topic like I'm 5. Use google_search if needed.", | |
| tools=[google_search], | |
| ) | |
| runner = InMemoryRunner(agent=pedagogy_agent) | |
| async def run_pedagogy_async(topic: str) -> str: | |
| response = await runner.run_debug(topic) | |
| return response[0].content.parts[0].text | |
| # --- ScriptWriter Agent --- | |
| SCRIPTWRITER_SYSTEM_PROMPT = """ | |
| You are a Teacher. | |
| Your role is to take a simplified explanation created by the Pedagogy Agent and turn it into a clear, friendly teaching script suitable for a young child around the age of 5. | |
| The script you produce will be used by a Text-to-Speech (TTS) system, so write in a way that sounds natural when spoken aloud. | |
| Follow these steps: | |
| 1. Read the simplified explanation provided by the Pedagogy Agent. | |
| 2. Transform it into a spoken-style teaching script that: | |
| - Uses short, clear sentences. | |
| - Uses warm, encouraging language. | |
| - Keeps a playful, curious tone suitable for a young child. | |
| - Avoids complex words unless they were already explained. | |
| - Includes gentle teacher-like transitions (“Let’s imagine…”, “Did you know…?”, “Now let’s think about…”). | |
| - **Do NOT use sound effects or onomatopoeia (e.g., “boing,” “zoom,” “pow”).** | |
| - **Do NOT repeat words for dramatic effect (e.g., “straight, straight, straight”).** | |
| - Keep playfulness through ideas and imagery, not noises. | |
| 3. Add exactly 2 learning questions inside the story to spark curiosity. | |
| - The questions must feel natural within the flow of the explanation. | |
| - They should be simple, open-ended questions a young child can think about. | |
| - Do NOT place both questions back-to-back. | |
| 4. Make sure the script is vivid and engaging: | |
| - Use simple imagery. | |
| - Ask simple rhetorical questions. | |
| - Use examples familiar to young children. | |
| 5. Avoid: | |
| - Any reference to agents, prompts, or system instructions. | |
| - Visual descriptions that don't make sense in audio (“look at this picture”). | |
| - Overly long paragraphs—keep pacing steady for TTS. | |
| 6. Output only the final teaching script, nothing else. No labels, no titles, no markdown. | |
| """ | |
| def run_scriptwriter(explanation: str) -> str: | |
| model = genai.GenerativeModel( | |
| model_name="gemini-2.5-flash", | |
| system_instruction=SCRIPTWRITER_SYSTEM_PROMPT | |
| ) | |
| response = model.generate_content( | |
| f"Write a children's story based on this:\n{explanation}", | |
| generation_config=genai.GenerationConfig( | |
| temperature=0.9, | |
| max_output_tokens=4096 | |
| ) | |
| ) | |
| try: | |
| return response.text | |
| except Exception: | |
| try: | |
| return response.candidates[0].content.parts[0].text | |
| except Exception: | |
| return "⚠️ ScriptWriter failed." | |
| # --- Audio Generator --- | |
| def chunk_text(text, max_chars=4500): | |
| text = text.strip() | |
| if len(text) <= max_chars: | |
| return [text] | |
| chunks = [] | |
| while len(text) > max_chars: | |
| cut = text.rfind(". ", 0, max_chars) | |
| if cut == -1: | |
| cut = max_chars | |
| chunks.append(text[:cut+1]) | |
| text = text[cut+1:].strip() | |
| chunks.append(text) | |
| return chunks | |
| def tts_segment(text): | |
| synthesis_input = texttospeech.SynthesisInput(text=text) | |
| voice = texttospeech.VoiceSelectionParams( | |
| language_code="en-US", | |
| name="en-US-Journey-F" | |
| ) | |
| audio_cfg = texttospeech.AudioConfig( | |
| audio_encoding=texttospeech.AudioEncoding.MP3, | |
| speaking_rate=0.94, | |
| pitch=0.0, | |
| volume_gain_db=0.0 | |
| ) | |
| response = tts_client.synthesize_speech( | |
| input=synthesis_input, | |
| voice=voice, | |
| audio_config=audio_cfg | |
| ) | |
| return response.audio_content | |
| def audio_writer(script_text: str, out="story.mp3"): | |
| chunks = chunk_text(script_text) | |
| audio = AudioSegment.silent(200) | |
| for i, chunk in enumerate(chunks, 1): | |
| path = f"seg_{i}.mp3" | |
| with open(path, "wb") as f: | |
| f.write(tts_segment(chunk)) | |
| audio += AudioSegment.from_mp3(path) | |
| audio += AudioSegment.silent(150) | |
| audio.export(out, format="mp3") | |
| return out | |
| # --- Full Pipeline --- | |
| async def full_pipeline(topic: str): | |
| eli5 = await run_pedagogy_async(topic) | |
| script = run_scriptwriter(eli5) | |
| audio_path = audio_writer(script, "story.mp3") | |
| return eli5, script, audio_path | |
| # --- Gradio App --- | |
| app = gr.Interface( | |
| fn=full_pipeline, | |
| inputs=gr.Textbox(label="Your Topic"), | |
| outputs=[ | |
| gr.Textbox(label="ELI5 Explanation", lines=8), | |
| gr.Textbox(label="Generated Story Script", lines=20), | |
| gr.Audio(label="Generated Audio") | |
| ], | |
| title="🎧 TinyTutor — Full Pipeline" | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() |