import os import asyncio import google.generativeai as genai from google.adk.agents import Agent from google.adk.models.google_llm import Gemini from google.adk.runners import InMemoryRunner from google.adk.tools import google_search from google.genai import types from google.cloud import texttospeech from pydub import AudioSegment import gradio as gr # TinyTutor App # --- Configure API Keys --- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") if not GOOGLE_API_KEY: raise RuntimeError("❌ Missing GOOGLE_API_KEY environment variable.") genai.configure(api_key=GOOGLE_API_KEY) SERVICE_ACCOUNT_JSON = os.getenv("GCP_VI_SERVICE_ACCOUNT_JSON") if not SERVICE_ACCOUNT_JSON: raise RuntimeError("❌ Missing GCP_VI_SERVICE_ACCOUNT_JSON environment variable.") with open("tinytutor-tss-agent.json", "w") as f: f.write(SERVICE_ACCOUNT_JSON) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "tinytutor-tss-agent.json" tts_client = texttospeech.TextToSpeechClient() # --- Retry Options --- retry_config = types.HttpRetryOptions( attempts=5, exp_base=7, initial_delay=1, http_status_codes=[429, 500, 503, 504] ) # --- Pedagogy Agent --- pedagogy_agent = Agent( name="PedagogyAgent", model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config), description="Explains topics in simple ELI5 style.", instruction="Explain the topic like I'm 5. Use google_search if needed.", tools=[google_search], ) runner = InMemoryRunner(agent=pedagogy_agent) async def run_pedagogy_async(topic: str) -> str: response = await runner.run_debug(topic) return response[0].content.parts[0].text # --- ScriptWriter Agent --- SCRIPTWRITER_SYSTEM_PROMPT = """ You are a Teacher. Your role is to take a simplified explanation created by the Pedagogy Agent and turn it into a clear, friendly teaching script suitable for a young child around the age of 5. The script you produce will be used by a Text-to-Speech (TTS) system, so write in a way that sounds natural when spoken aloud. Follow these steps: 1. Read the simplified explanation provided by the Pedagogy Agent. 2. Transform it into a spoken-style teaching script that: - Uses short, clear sentences. - Uses warm, encouraging language. - Keeps a playful, curious tone suitable for a young child. - Avoids complex words unless they were already explained. - Includes gentle teacher-like transitions (“Let’s imagine…”, “Did you know…?”, “Now let’s think about…”). - **Do NOT use sound effects or onomatopoeia (e.g., “boing,” “zoom,” “pow”).** - **Do NOT repeat words for dramatic effect (e.g., “straight, straight, straight”).** - Keep playfulness through ideas and imagery, not noises. 3. Add exactly 2 learning questions inside the story to spark curiosity. - The questions must feel natural within the flow of the explanation. - They should be simple, open-ended questions a young child can think about. - Do NOT place both questions back-to-back. 4. Make sure the script is vivid and engaging: - Use simple imagery. - Ask simple rhetorical questions. - Use examples familiar to young children. 5. Avoid: - Any reference to agents, prompts, or system instructions. - Visual descriptions that don't make sense in audio (“look at this picture”). - Overly long paragraphs—keep pacing steady for TTS. 6. Output only the final teaching script, nothing else. No labels, no titles, no markdown. """ def run_scriptwriter(explanation: str) -> str: model = genai.GenerativeModel( model_name="gemini-2.5-flash", system_instruction=SCRIPTWRITER_SYSTEM_PROMPT ) response = model.generate_content( f"Write a children's story based on this:\n{explanation}", generation_config=genai.GenerationConfig( temperature=0.9, max_output_tokens=4096 ) ) try: return response.text except Exception: try: return response.candidates[0].content.parts[0].text except Exception: return "⚠️ ScriptWriter failed." # --- Audio Generator --- def chunk_text(text, max_chars=4500): text = text.strip() if len(text) <= max_chars: return [text] chunks = [] while len(text) > max_chars: cut = text.rfind(". ", 0, max_chars) if cut == -1: cut = max_chars chunks.append(text[:cut+1]) text = text[cut+1:].strip() chunks.append(text) return chunks def tts_segment(text): synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Journey-F" ) audio_cfg = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=0.94, pitch=0.0, volume_gain_db=0.0 ) response = tts_client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_cfg ) return response.audio_content def audio_writer(script_text: str, out="story.mp3"): chunks = chunk_text(script_text) audio = AudioSegment.silent(200) for i, chunk in enumerate(chunks, 1): path = f"seg_{i}.mp3" with open(path, "wb") as f: f.write(tts_segment(chunk)) audio += AudioSegment.from_mp3(path) audio += AudioSegment.silent(150) audio.export(out, format="mp3") return out # --- Full Pipeline --- async def full_pipeline(topic: str): eli5 = await run_pedagogy_async(topic) script = run_scriptwriter(eli5) audio_path = audio_writer(script, "story.mp3") return eli5, script, audio_path # --- Gradio App --- app = gr.Interface( fn=full_pipeline, inputs=gr.Textbox(label="Your Topic"), outputs=[ gr.Textbox(label="ELI5 Explanation", lines=8), gr.Textbox(label="Generated Story Script", lines=20), gr.Audio(label="Generated Audio") ], title="🎧 TinyTutor — Full Pipeline" ) if __name__ == "__main__": app.launch()