Spaces:
Running
Running
File size: 6,084 Bytes
9fed4f0 4a44806 9fed4f0 edbb725 9fed4f0 3df5c00 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
import os
import asyncio
import google.generativeai as genai
from google.adk.agents import Agent
from google.adk.models.google_llm import Gemini
from google.adk.runners import InMemoryRunner
from google.adk.tools import google_search
from google.genai import types
from google.cloud import texttospeech
from pydub import AudioSegment
import gradio as gr
# TinyTutor App
# --- Configure API Keys ---
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
raise RuntimeError("❌ Missing GOOGLE_API_KEY environment variable.")
genai.configure(api_key=GOOGLE_API_KEY)
SERVICE_ACCOUNT_JSON = os.getenv("GCP_VI_SERVICE_ACCOUNT_JSON")
if not SERVICE_ACCOUNT_JSON:
raise RuntimeError("❌ Missing GCP_VI_SERVICE_ACCOUNT_JSON environment variable.")
with open("tinytutor-tss-agent.json", "w") as f:
f.write(SERVICE_ACCOUNT_JSON)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "tinytutor-tss-agent.json"
tts_client = texttospeech.TextToSpeechClient()
# --- Retry Options ---
retry_config = types.HttpRetryOptions(
attempts=5,
exp_base=7,
initial_delay=1,
http_status_codes=[429, 500, 503, 504]
)
# --- Pedagogy Agent ---
pedagogy_agent = Agent(
name="PedagogyAgent",
model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
description="Explains topics in simple ELI5 style.",
instruction="Explain the topic like I'm 5. Use google_search if needed.",
tools=[google_search],
)
runner = InMemoryRunner(agent=pedagogy_agent)
async def run_pedagogy_async(topic: str) -> str:
response = await runner.run_debug(topic)
return response[0].content.parts[0].text
# --- ScriptWriter Agent ---
SCRIPTWRITER_SYSTEM_PROMPT = """
You are a Teacher.
Your role is to take a simplified explanation created by the Pedagogy Agent and turn it into a clear, friendly teaching script suitable for a young child around the age of 5.
The script you produce will be used by a Text-to-Speech (TTS) system, so write in a way that sounds natural when spoken aloud.
Follow these steps:
1. Read the simplified explanation provided by the Pedagogy Agent.
2. Transform it into a spoken-style teaching script that:
- Uses short, clear sentences.
- Uses warm, encouraging language.
- Keeps a playful, curious tone suitable for a young child.
- Avoids complex words unless they were already explained.
- Includes gentle teacher-like transitions (“Let’s imagine…”, “Did you know…?”, “Now let’s think about…”).
- **Do NOT use sound effects or onomatopoeia (e.g., “boing,” “zoom,” “pow”).**
- **Do NOT repeat words for dramatic effect (e.g., “straight, straight, straight”).**
- Keep playfulness through ideas and imagery, not noises.
3. Add exactly 2 learning questions inside the story to spark curiosity.
- The questions must feel natural within the flow of the explanation.
- They should be simple, open-ended questions a young child can think about.
- Do NOT place both questions back-to-back.
4. Make sure the script is vivid and engaging:
- Use simple imagery.
- Ask simple rhetorical questions.
- Use examples familiar to young children.
5. Avoid:
- Any reference to agents, prompts, or system instructions.
- Visual descriptions that don't make sense in audio (“look at this picture”).
- Overly long paragraphs—keep pacing steady for TTS.
6. Output only the final teaching script, nothing else. No labels, no titles, no markdown.
"""
def run_scriptwriter(explanation: str) -> str:
model = genai.GenerativeModel(
model_name="gemini-2.5-flash",
system_instruction=SCRIPTWRITER_SYSTEM_PROMPT
)
response = model.generate_content(
f"Write a children's story based on this:\n{explanation}",
generation_config=genai.GenerationConfig(
temperature=0.9,
max_output_tokens=4096
)
)
try:
return response.text
except Exception:
try:
return response.candidates[0].content.parts[0].text
except Exception:
return "⚠️ ScriptWriter failed."
# --- Audio Generator ---
def chunk_text(text, max_chars=4500):
text = text.strip()
if len(text) <= max_chars:
return [text]
chunks = []
while len(text) > max_chars:
cut = text.rfind(". ", 0, max_chars)
if cut == -1:
cut = max_chars
chunks.append(text[:cut+1])
text = text[cut+1:].strip()
chunks.append(text)
return chunks
def tts_segment(text):
synthesis_input = texttospeech.SynthesisInput(text=text)
voice = texttospeech.VoiceSelectionParams(
language_code="en-US",
name="en-US-Journey-F"
)
audio_cfg = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3,
speaking_rate=0.94,
pitch=0.0,
volume_gain_db=0.0
)
response = tts_client.synthesize_speech(
input=synthesis_input,
voice=voice,
audio_config=audio_cfg
)
return response.audio_content
def audio_writer(script_text: str, out="story.mp3"):
chunks = chunk_text(script_text)
audio = AudioSegment.silent(200)
for i, chunk in enumerate(chunks, 1):
path = f"seg_{i}.mp3"
with open(path, "wb") as f:
f.write(tts_segment(chunk))
audio += AudioSegment.from_mp3(path)
audio += AudioSegment.silent(150)
audio.export(out, format="mp3")
return out
# --- Full Pipeline ---
async def full_pipeline(topic: str):
eli5 = await run_pedagogy_async(topic)
script = run_scriptwriter(eli5)
audio_path = audio_writer(script, "story.mp3")
return eli5, script, audio_path
# --- Gradio App ---
app = gr.Interface(
fn=full_pipeline,
inputs=gr.Textbox(label="Your Topic"),
outputs=[
gr.Textbox(label="ELI5 Explanation", lines=8),
gr.Textbox(label="Generated Story Script", lines=20),
gr.Audio(label="Generated Audio")
],
title="🎧 TinyTutor — Full Pipeline"
)
if __name__ == "__main__":
app.launch() |