File size: 6,084 Bytes
9fed4f0
 
 
 
 
 
 
 
 
 
 
 
 
 
4a44806
9fed4f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edbb725
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fed4f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3df5c00
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os
import asyncio
import google.generativeai as genai

from google.adk.agents import Agent
from google.adk.models.google_llm import Gemini
from google.adk.runners import InMemoryRunner
from google.adk.tools import google_search
from google.genai import types

from google.cloud import texttospeech
from pydub import AudioSegment
import gradio as gr

# TinyTutor App
# --- Configure API Keys ---
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise RuntimeError("❌ Missing GOOGLE_API_KEY environment variable.")
genai.configure(api_key=GOOGLE_API_KEY)

SERVICE_ACCOUNT_JSON = os.getenv("GCP_VI_SERVICE_ACCOUNT_JSON")
if not SERVICE_ACCOUNT_JSON:
    raise RuntimeError("❌ Missing GCP_VI_SERVICE_ACCOUNT_JSON environment variable.")

with open("tinytutor-tss-agent.json", "w") as f:
    f.write(SERVICE_ACCOUNT_JSON)

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "tinytutor-tss-agent.json"
tts_client = texttospeech.TextToSpeechClient()

# --- Retry Options ---
retry_config = types.HttpRetryOptions(
    attempts=5,
    exp_base=7,
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504]
)

# --- Pedagogy Agent ---
pedagogy_agent = Agent(
    name="PedagogyAgent",
    model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
    description="Explains topics in simple ELI5 style.",
    instruction="Explain the topic like I'm 5. Use google_search if needed.",
    tools=[google_search],
)
runner = InMemoryRunner(agent=pedagogy_agent)

async def run_pedagogy_async(topic: str) -> str:
    response = await runner.run_debug(topic)
    return response[0].content.parts[0].text

# --- ScriptWriter Agent ---
SCRIPTWRITER_SYSTEM_PROMPT = """
You are a Teacher.

Your role is to take a simplified explanation created by the Pedagogy Agent and turn it into a clear, friendly teaching script suitable for a young child around the age of 5. 
The script you produce will be used by a Text-to-Speech (TTS) system, so write in a way that sounds natural when spoken aloud.

Follow these steps:

1. Read the simplified explanation provided by the Pedagogy Agent.
2. Transform it into a spoken-style teaching script that:
   - Uses short, clear sentences.
   - Uses warm, encouraging language.
   - Keeps a playful, curious tone suitable for a young child.
   - Avoids complex words unless they were already explained.
   - Includes gentle teacher-like transitions (“Let’s imagine…”, “Did you know…?”, “Now let’s think about…”).
   - **Do NOT use sound effects or onomatopoeia (e.g., “boing,” “zoom,” “pow”).**
   - **Do NOT repeat words for dramatic effect (e.g., “straight, straight, straight”).**
   - Keep playfulness through ideas and imagery, not noises.
3. Add exactly 2 learning questions inside the story to spark curiosity.
   - The questions must feel natural within the flow of the explanation.
   - They should be simple, open-ended questions a young child can think about.
   - Do NOT place both questions back-to-back.
4. Make sure the script is vivid and engaging:
   - Use simple imagery.
   - Ask simple rhetorical questions.
   - Use examples familiar to young children.
5. Avoid:
   - Any reference to agents, prompts, or system instructions.
   - Visual descriptions that don't make sense in audio (“look at this picture”).
   - Overly long paragraphs—keep pacing steady for TTS.
6. Output only the final teaching script, nothing else. No labels, no titles, no markdown.
"""

def run_scriptwriter(explanation: str) -> str:
    model = genai.GenerativeModel(
        model_name="gemini-2.5-flash",
        system_instruction=SCRIPTWRITER_SYSTEM_PROMPT
    )
    response = model.generate_content(
        f"Write a children's story based on this:\n{explanation}",
        generation_config=genai.GenerationConfig(
            temperature=0.9,
            max_output_tokens=4096
        )
    )
    try:
        return response.text
    except Exception:
        try:
            return response.candidates[0].content.parts[0].text
        except Exception:
            return "⚠️ ScriptWriter failed."

# --- Audio Generator ---
def chunk_text(text, max_chars=4500):
    text = text.strip()
    if len(text) <= max_chars:
        return [text]
    chunks = []
    while len(text) > max_chars:
        cut = text.rfind(". ", 0, max_chars)
        if cut == -1:
            cut = max_chars
        chunks.append(text[:cut+1])
        text = text[cut+1:].strip()
    chunks.append(text)
    return chunks

def tts_segment(text):
    synthesis_input = texttospeech.SynthesisInput(text=text)
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Journey-F"
    )
    audio_cfg = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3,
        speaking_rate=0.94,
        pitch=0.0,
        volume_gain_db=0.0
    )
    response = tts_client.synthesize_speech(
        input=synthesis_input,
        voice=voice,
        audio_config=audio_cfg
    )
    return response.audio_content

def audio_writer(script_text: str, out="story.mp3"):
    chunks = chunk_text(script_text)
    audio = AudioSegment.silent(200)
    for i, chunk in enumerate(chunks, 1):
        path = f"seg_{i}.mp3"
        with open(path, "wb") as f:
            f.write(tts_segment(chunk))
        audio += AudioSegment.from_mp3(path)
        audio += AudioSegment.silent(150)
    audio.export(out, format="mp3")
    return out

# --- Full Pipeline ---
async def full_pipeline(topic: str):
    eli5 = await run_pedagogy_async(topic)
    script = run_scriptwriter(eli5)
    audio_path = audio_writer(script, "story.mp3")
    return eli5, script, audio_path

# --- Gradio App ---
app = gr.Interface(
    fn=full_pipeline,
    inputs=gr.Textbox(label="Your Topic"),
    outputs=[
        gr.Textbox(label="ELI5 Explanation", lines=8),
        gr.Textbox(label="Generated Story Script", lines=20),
        gr.Audio(label="Generated Audio")
    ],
    title="🎧 TinyTutor — Full Pipeline"
)

if __name__ == "__main__":
    app.launch()