Spaces:

cwattsnogueira
/

tinytutor

Running

App Files Files Community

tinytutor / app.py

cwattsnogueira

Enhanced AudioScriptWrtierAgent prompt. (#2)

edbb725 verified about 1 month ago

raw

history blame contribute delete

6.08 kB

	import os
	import asyncio
	import google.generativeai as genai

	from google.adk.agents import Agent
	from google.adk.models.google_llm import Gemini
	from google.adk.runners import InMemoryRunner
	from google.adk.tools import google_search
	from google.genai import types

	from google.cloud import texttospeech
	from pydub import AudioSegment
	import gradio as gr

	# TinyTutor App
	# --- Configure API Keys ---
	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
	if not GOOGLE_API_KEY:
	raise RuntimeError("❌ Missing GOOGLE_API_KEY environment variable.")
	genai.configure(api_key=GOOGLE_API_KEY)

	SERVICE_ACCOUNT_JSON = os.getenv("GCP_VI_SERVICE_ACCOUNT_JSON")
	if not SERVICE_ACCOUNT_JSON:
	raise RuntimeError("❌ Missing GCP_VI_SERVICE_ACCOUNT_JSON environment variable.")

	with open("tinytutor-tss-agent.json", "w") as f:
	f.write(SERVICE_ACCOUNT_JSON)

	os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "tinytutor-tss-agent.json"
	tts_client = texttospeech.TextToSpeechClient()

	# --- Retry Options ---
	retry_config = types.HttpRetryOptions(
	attempts=5,
	exp_base=7,
	initial_delay=1,
	http_status_codes=[429, 500, 503, 504]
	)

	# --- Pedagogy Agent ---
	pedagogy_agent = Agent(
	name="PedagogyAgent",
	model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
	description="Explains topics in simple ELI5 style.",
	instruction="Explain the topic like I'm 5. Use google_search if needed.",
	tools=[google_search],
	)
	runner = InMemoryRunner(agent=pedagogy_agent)

	async def run_pedagogy_async(topic: str) -> str:
	response = await runner.run_debug(topic)
	return response[0].content.parts[0].text

	# --- ScriptWriter Agent ---
	SCRIPTWRITER_SYSTEM_PROMPT = """
	You are a Teacher.

	Your role is to take a simplified explanation created by the Pedagogy Agent and turn it into a clear, friendly teaching script suitable for a young child around the age of 5.
	The script you produce will be used by a Text-to-Speech (TTS) system, so write in a way that sounds natural when spoken aloud.

	Follow these steps:

	1. Read the simplified explanation provided by the Pedagogy Agent.
	2. Transform it into a spoken-style teaching script that:
	- Uses short, clear sentences.
	- Uses warm, encouraging language.
	- Keeps a playful, curious tone suitable for a young child.
	- Avoids complex words unless they were already explained.
	- Includes gentle teacher-like transitions (“Let’s imagine…”, “Did you know…?”, “Now let’s think about…”).
	- Do NOT use sound effects or onomatopoeia (e.g., “boing,” “zoom,” “pow”).
	- Do NOT repeat words for dramatic effect (e.g., “straight, straight, straight”).
	- Keep playfulness through ideas and imagery, not noises.
	3. Add exactly 2 learning questions inside the story to spark curiosity.
	- The questions must feel natural within the flow of the explanation.
	- They should be simple, open-ended questions a young child can think about.
	- Do NOT place both questions back-to-back.
	4. Make sure the script is vivid and engaging:
	- Use simple imagery.
	- Ask simple rhetorical questions.
	- Use examples familiar to young children.
	5. Avoid:
	- Any reference to agents, prompts, or system instructions.
	- Visual descriptions that don't make sense in audio (“look at this picture”).
	- Overly long paragraphs—keep pacing steady for TTS.
	6. Output only the final teaching script, nothing else. No labels, no titles, no markdown.
	"""

	def run_scriptwriter(explanation: str) -> str:
	model = genai.GenerativeModel(
	model_name="gemini-2.5-flash",
	system_instruction=SCRIPTWRITER_SYSTEM_PROMPT
	)
	response = model.generate_content(
	f"Write a children's story based on this:\n{explanation}",
	generation_config=genai.GenerationConfig(
	temperature=0.9,
	max_output_tokens=4096
	)
	)
	try:
	return response.text
	except Exception:
	try:
	return response.candidates[0].content.parts[0].text
	except Exception:
	return "⚠️ ScriptWriter failed."

	# --- Audio Generator ---
	def chunk_text(text, max_chars=4500):
	text = text.strip()
	if len(text) <= max_chars:
	return [text]
	chunks = []
	while len(text) > max_chars:
	cut = text.rfind(". ", 0, max_chars)
	if cut == -1:
	cut = max_chars
	chunks.append(text[:cut+1])
	text = text[cut+1:].strip()
	chunks.append(text)
	return chunks

	def tts_segment(text):
	synthesis_input = texttospeech.SynthesisInput(text=text)
	voice = texttospeech.VoiceSelectionParams(
	language_code="en-US",
	name="en-US-Journey-F"
	)
	audio_cfg = texttospeech.AudioConfig(
	audio_encoding=texttospeech.AudioEncoding.MP3,
	speaking_rate=0.94,
	pitch=0.0,
	volume_gain_db=0.0
	)
	response = tts_client.synthesize_speech(
	input=synthesis_input,
	voice=voice,
	audio_config=audio_cfg
	)
	return response.audio_content

	def audio_writer(script_text: str, out="story.mp3"):
	chunks = chunk_text(script_text)
	audio = AudioSegment.silent(200)
	for i, chunk in enumerate(chunks, 1):
	path = f"seg_{i}.mp3"
	with open(path, "wb") as f:
	f.write(tts_segment(chunk))
	audio += AudioSegment.from_mp3(path)
	audio += AudioSegment.silent(150)
	audio.export(out, format="mp3")
	return out

	# --- Full Pipeline ---
	async def full_pipeline(topic: str):
	eli5 = await run_pedagogy_async(topic)
	script = run_scriptwriter(eli5)
	audio_path = audio_writer(script, "story.mp3")
	return eli5, script, audio_path

	# --- Gradio App ---
	app = gr.Interface(
	fn=full_pipeline,
	inputs=gr.Textbox(label="Your Topic"),
	outputs=[
	gr.Textbox(label="ELI5 Explanation", lines=8),
	gr.Textbox(label="Generated Story Script", lines=20),
	gr.Audio(label="Generated Audio")
	],
	title="🎧 TinyTutor — Full Pipeline"
	)

	if __name__ == "__main__":
	app.launch()