Spaces:

build-small-hackathon
/

podify

Running on Zero

jayaspjacob

Add OpenAudio emotion/tone cues to generated scripts

9756234 23 days ago

7.61 kB

	"""LangGraph research graph: plan -> research -> outline -> write.

	Produces a speaker-tagged podcast script from a topic, grounded in live DuckDuckGo
	search results. Kept deliberately linear and lightweight so it runs fast on a CPU Space.
	"""

	from __future__ import annotations

	import json
	import re
	from typing import List, Tuple, TypedDict

	from langgraph.graph import StateGraph, START, END

	from .llm import complete
	from .search import web_search, SearchResult


	class ResearchState(TypedDict, total=False):
	topic: str
	style: str
	duration_min: int
	num_speakers: int
	speaker_names: List[str]
	queries: List[str]
	findings: str
	sources: List[str]
	outline: str
	script: str


	# --------------------------------------------------------------------------- nodes


	def plan_node(state: ResearchState) -> dict:
	topic = state["topic"]
	raw = complete(
	system=(
	"You are a research planner. Given a podcast topic, produce 3-6 focused web "
	"search queries that together cover the key angles. Respond ONLY with a JSON "
	'array of strings, e.g. ["query one", "query two"].'
	),
	user=f"Topic: {topic}",
	temperature=0.4,
	max_tokens=400,
	)
	queries = _parse_json_list(raw) or [topic]
	return {"queries": queries[:6]}


	def research_node(state: ResearchState) -> dict:
	blocks: List[str] = []
	sources: List[str] = []
	for q in state.get("queries", []):
	results: List[SearchResult] = web_search(q, max_results=4)
	if not results:
	continue
	blocks.append(f"### Query: {q}\n" + "\n".join(r.as_markdown() for r in results))
	sources.extend(r.url for r in results)
	findings = "\n\n".join(blocks) if blocks else "(No web results were available.)"
	# De-duplicate sources, preserve order.
	seen, uniq = set(), []
	for u in sources:
	if u not in seen:
	seen.add(u)
	uniq.append(u)
	return {"findings": findings, "sources": uniq}


	def outline_node(state: ResearchState) -> dict:
	outline = complete(
	system=(
	"You are a podcast producer. Using the research findings, write a tight "
	"outline (intro, 3-5 segments, outro) for the podcast. Use markdown bullets."
	),
	user=(
	f"Topic: {state['topic']}\n"
	f"Style: {state.get('style', 'conversational')}\n"
	f"Target length: ~{state.get('duration_min', 5)} minutes\n\n"
	f"Research findings:\n{state.get('findings', '')}"
	),
	temperature=0.6,
	max_tokens=800,
	)
	return {"outline": outline}


	def write_node(state: ResearchState) -> dict:
	speakers = state.get("speaker_names") or _default_speakers(state.get("num_speakers", 2))
	speaker_list = ", ".join(speakers)
	fmt = "\n".join(f"{s}: <what they say>" for s in speakers)
	script = complete(
	system=(
	"You are a professional podcast scriptwriter. Write a natural, engaging, "
	"factually-grounded podcast script based on the outline and findings.\n"
	f"Speakers: {speaker_list}.\n"
	"Format STRICTLY as one line per turn, prefixed with the speaker name and a "
	f"colon, like:\n{fmt}\n"
	"Make the delivery feel human by adding OpenAudio emotion/tone cues IN "
	"PARENTHESES, inline, right before the words they color (or at the very start "
	"of a turn). Use ONLY these cues: (excited) (curious) (surprised) (amused) "
	"(interested) (confident) (empathetic) (joyful) (serious) (sarcastic) "
	"(thoughtful) (laughing) (chuckling) (sighing) (whispering) (soft tone) "
	"(in a hurry tone). Use them sparingly — about one every few lines, only where "
	"it genuinely fits the moment. Do NOT invent other cues and do NOT use square "
	"brackets. There is no pause or emphasis marker: convey pauses and emphasis with "
	"natural punctuation (commas, em-dashes —, ellipses …).\n"
	"Apart from these inline parenthetical cues, output only spoken dialogue — no "
	"markdown, headings, or stand-alone stage directions. Keep each line to a few "
	"sentences. Open with a hook and close with a sign-off."
	),
	user=(
	f"Topic: {state['topic']}\n"
	f"Style: {state.get('style', 'conversational')}\n"
	f"Target length: ~{state.get('duration_min', 5)} minutes\n\n"
	f"Outline:\n{state.get('outline', '')}\n\n"
	f"Findings:\n{state.get('findings', '')}"
	),
	temperature=0.8,
	max_tokens=3000,
	)
	return {"script": script.strip()}


	# --------------------------------------------------------------------------- helpers


	def _parse_json_list(text: str) -> List[str]:
	match = re.search(r"\[.*\]", text, re.DOTALL)
	if not match:
	return [line.strip("-* ").strip() for line in text.splitlines() if line.strip()]
	try:
	data = json.loads(match.group(0))
	return [str(x).strip() for x in data if str(x).strip()]
	except json.JSONDecodeError:
	return []


	def _default_speakers(n: int) -> List[str]:
	names = ["Host", "Guest", "Co-host", "Expert"]
	if n <= 1:
	return ["Narrator"]
	return names[:n]


	def parse_script(script: str) -> List[Tuple[str, str]]:
	"""Turn a 'Speaker: text' transcript into [(speaker, text), ...]."""
	lines: List[Tuple[str, str]] = []
	pattern = re.compile(r"^\s([\w .'-]{1,30}?)\s:\s*(.+)$")
	for raw in script.splitlines():
	raw = raw.strip()
	if not raw:
	continue
	m = pattern.match(raw)
	if m:
	lines.append((m.group(1).strip(), m.group(2).strip()))
	elif lines: # continuation of previous speaker's line
	spk, txt = lines[-1]
	lines[-1] = (spk, f"{txt} {raw}")
	return lines


	# --------------------------------------------------------------------------- graph


	def build_graph():
	g = StateGraph(ResearchState)
	g.add_node("plan", plan_node)
	g.add_node("research", research_node)
	g.add_node("outline", outline_node)
	g.add_node("write", write_node)
	g.add_edge(START, "plan")
	g.add_edge("plan", "research")
	g.add_edge("research", "outline")
	g.add_edge("outline", "write")
	g.add_edge("write", END)
	return g.compile()


	_GRAPH = None


	def generate_script(
	topic: str,
	*,
	style: str = "conversational",
	duration_min: int = 5,
	num_speakers: int = 2,
	speaker_names: List[str] \| None = None,
	) -> dict:
	"""Run the full research graph and return the final state."""
	global _GRAPH
	if _GRAPH is None:
	_GRAPH = build_graph()
	speakers = speaker_names or _default_speakers(num_speakers)
	result = _GRAPH.invoke(
	{
	"topic": topic,
	"style": style,
	"duration_min": duration_min,
	"num_speakers": num_speakers,
	"speaker_names": speakers,
	}
	)
	return result


	if __name__ == "__main__": # manual smoke test (needs HF_TOKEN)
	import sys

	t = sys.argv[1] if len(sys.argv) > 1 else "The history and future of electric cars"
	out = generate_script(t, duration_min=3)
	print("\n=== SCRIPT ===\n")
	print(out["script"])
	print("\n=== SOURCES ===\n")
	print("\n".join(out.get("sources", [])))
	print("\n=== PARSED LINES ===\n")
	for spk, txt in parse_script(out["script"]):
	print(f"[{spk}] {txt[:80]}")