Spaces:

cwattsnogueira
/

tinytutor

Running

File size: 6,084 Bytes

import os
import asyncio
import google.generativeai as genai

from google.adk.agents import Agent
from google.adk.models.google_llm import Gemini
from google.adk.runners import InMemoryRunner
from google.adk.tools import google_search
from google.genai import types

from google.cloud import texttospeech
from pydub import AudioSegment
import gradio as gr

# TinyTutor App
# --- Configure API Keys ---
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise RuntimeError("❌ Missing GOOGLE_API_KEY environment variable.")
genai.configure(api_key=GOOGLE_API_KEY)

SERVICE_ACCOUNT_JSON = os.getenv("GCP_VI_SERVICE_ACCOUNT_JSON")
if not SERVICE_ACCOUNT_JSON:
    raise RuntimeError("❌ Missing GCP_VI_SERVICE_ACCOUNT_JSON environment variable.")

with open("tinytutor-tss-agent.json", "w") as f:
    f.write(SERVICE_ACCOUNT_JSON)

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "tinytutor-tss-agent.json"
tts_client = texttospeech.TextToSpeechClient()

# --- Retry Options ---
retry_config = types.HttpRetryOptions(
    attempts=5,
    exp_base=7,
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504]
)

# --- Pedagogy Agent ---
pedagogy_agent = Agent(
    name="PedagogyAgent",
    model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
    description="Explains topics in simple ELI5 style.",
    instruction="Explain the topic like I'm 5. Use google_search if needed.",
    tools=[google_search],
)
runner = InMemoryRunner(agent=pedagogy_agent)

async def run_pedagogy_async(topic: str) -> str:
    response = await runner.run_debug(topic)
    return response[0].content.parts[0].text

# --- ScriptWriter Agent ---
SCRIPTWRITER_SYSTEM_PROMPT = """
You are a Teacher.

Your role is to take a simplified explanation created by the Pedagogy Agent and turn it into a clear, friendly teaching script suitable for a young child around the age of 5. 
The script you produce will be used by a Text-to-Speech (TTS) system, so write in a way that sounds natural when spoken aloud.

Follow these steps:

1. Read the simplified explanation provided by the Pedagogy Agent.
2. Transform it into a spoken-style teaching script that:
   - Uses short, clear sentences.
   - Uses warm, encouraging language.
   - Keeps a playful, curious tone suitable for a young child.
   - Avoids complex words unless they were already explained.
   - Includes gentle teacher-like transitions (“Let’s imagine…”, “Did you know…?”, “Now let’s think about…”).
   - **Do NOT use sound effects or onomatopoeia (e.g., “boing,” “zoom,” “pow”).**
   - **Do NOT repeat words for dramatic effect (e.g., “straight, straight, straight”).**
   - Keep playfulness through ideas and imagery, not noises.
3. Add exactly 2 learning questions inside the story to spark curiosity.
   - The questions must feel natural within the flow of the explanation.
   - They should be simple, open-ended questions a young child can think about.
   - Do NOT place both questions back-to-back.
4. Make sure the script is vivid and engaging:
   - Use simple imagery.
   - Ask simple rhetorical questions.
   - Use examples familiar to young children.
5. Avoid:
   - Any reference to agents, prompts, or system instructions.
   - Visual descriptions that don't make sense in audio (“look at this picture”).
   - Overly long paragraphs—keep pacing steady for TTS.
6. Output only the final teaching script, nothing else. No labels, no titles, no markdown.
"""

def run_scriptwriter(explanation: str) -> str:
    model = genai.GenerativeModel(
        model_name="gemini-2.5-flash",
        system_instruction=SCRIPTWRITER_SYSTEM_PROMPT
    )
    response = model.generate_content(
        f"Write a children's story based on this:\n{explanation}",
        generation_config=genai.GenerationConfig(
            temperature=0.9,
            max_output_tokens=4096
        )
    )
    try:
        return response.text
    except Exception:
        try:
            return response.candidates[0].content.parts[0].text
        except Exception:
            return "⚠️ ScriptWriter failed."

# --- Audio Generator ---
def chunk_text(text, max_chars=4500):
    text = text.strip()
    if len(text) <= max_chars:
        return [text]
    chunks = []
    while len(text) > max_chars:
        cut = text.rfind(". ", 0, max_chars)
        if cut == -1:
            cut = max_chars
        chunks.append(text[:cut+1])
        text = text[cut+1:].strip()
    chunks.append(text)
    return chunks

def tts_segment(text):
    synthesis_input = texttospeech.SynthesisInput(text=text)
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Journey-F"
    )
    audio_cfg = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3,
        speaking_rate=0.94,
        pitch=0.0,
        volume_gain_db=0.0
    )
    response = tts_client.synthesize_speech(
        input=synthesis_input,
        voice=voice,
        audio_config=audio_cfg
    )
    return response.audio_content

def audio_writer(script_text: str, out="story.mp3"):
    chunks = chunk_text(script_text)
    audio = AudioSegment.silent(200)
    for i, chunk in enumerate(chunks, 1):
        path = f"seg_{i}.mp3"
        with open(path, "wb") as f:
            f.write(tts_segment(chunk))
        audio += AudioSegment.from_mp3(path)
        audio += AudioSegment.silent(150)
    audio.export(out, format="mp3")
    return out

# --- Full Pipeline ---
async def full_pipeline(topic: str):
    eli5 = await run_pedagogy_async(topic)
    script = run_scriptwriter(eli5)
    audio_path = audio_writer(script, "story.mp3")
    return eli5, script, audio_path

# --- Gradio App ---
app = gr.Interface(
    fn=full_pipeline,
    inputs=gr.Textbox(label="Your Topic"),
    outputs=[
        gr.Textbox(label="ELI5 Explanation", lines=8),
        gr.Textbox(label="Generated Story Script", lines=20),
        gr.Audio(label="Generated Audio")
    ],
    title="🎧 TinyTutor — Full Pipeline"
)

if __name__ == "__main__":
    app.launch()