Spaces:

Sanket17
/

bytebrains

Build error

App Files Files Community

Sanket17 commited on 20 days ago

Commit

b1ebd68

0 Parent(s):

initial commit

Browse files

Files changed (13) hide show

.gitattributes +35 -0
.gitignore +3 -0
Dockerfile +42 -0
README.md +59 -0
app.py +99 -0
avatar/app.txt +0 -0
generate_chalkboard.py +340 -0
logo/app.txt +0 -0
narrate_and_render.py +523 -0
requirements.txt +7 -0
run_pipeline.py +564 -0
template.html +173 -0
voices/app.txt +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@


1	+
2	+ *.png
3	+ *.wav

Dockerfile ADDED Viewed

	@@ -0,0 +1,42 @@

+FROM python:3.11-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    curl \
+    wget \
+    gnupg \
+    libnss3 \
+    libnspr4 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libdbus-1-3 \
+    libxkbcommon0 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxrandr2 \
+    libgbm1 \
+    libxss1 \
+    libasound2 \
+    libatspi2.0-0 \
+    libwayland-client0 \
+    fonts-liberation \
+    libappindicator3-1 \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+RUN playwright install chromium
+COPY . .
+ENV PYTHONUNBUFFERED=1
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+# ByteBrain Hugging Face App
+This app generates a chalkboard-style MP4 from a topic using a simple Gradio UI.
+## UI
+- Logo: `bytebrain/logo/logo.png`
+- Input: topic text
+- Output: generated video + logs
+## Output Location
+All generated files are written under temporary storage:
+- Linux/Hugging Face: `/tmp/bytebrain-output`
+- Local Windows/macOS: system temp directory + `bytebrain-output`
+You can override with:
+- `PIPELINE_OUTPUT_DIR=/your/path`
+## Required Environment Variables
+- `GEMINI_API_KEY`
+- `OPENAI_API_KEY`
+- `HF_TOKEN` (recommended)
+Optional:
+- `HF_SPACE` (defaults to `banao-tech/vibe-voice-custom-voices`)
+- `OPENAI_TRANSCRIBE_MODEL` (defaults to `gpt-4o-mini-transcribe`)
+- `NARRATION_MODEL` (defaults to `gemini-2.5-pro`)
+## Run Locally
+```bash
+pip install -r requirements.txt
+playwright install chromium
+python app.py
+```
+Open `http://localhost:7860`.
+## Docker
+```bash
+docker build -t bytebrain-app .
+docker run -p 7860:7860 \
+  -e GEMINI_API_KEY=your_key \
+  -e OPENAI_API_KEY=your_key \
+  -e HF_TOKEN=your_hf_token \
+  bytebrain-app
+```
+## Hugging Face Space
+- Use Docker Space.
+- Set the same environment secrets in Space settings.
+- Entry command is already configured by `Dockerfile` (`python app.py`).

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""
+app.py
+------
+Gradio UI wrapper around run_pipeline.py.
+Fixes Windows cp1252 UnicodeEncodeError by forcing UTF-8 in the subprocess.
+"""
+import os
+import sys
+import subprocess
+import tempfile
+from pathlib import Path
+from datetime import datetime
+import gradio as gr
+HERE       = Path(__file__).parent.resolve()
+LOGO_PATH  = HERE / "logo" / "logo.png"
+RUN_PIPELINE = HERE / "run_pipeline.py"
+def _slug(text: str) -> str:
+    return "".join(ch.lower() if ch.isalnum() else "_" for ch in text).strip("_")
+def generate_video(topic: str):
+    topic = (topic or "").strip()
+    if not topic:
+        raise gr.Error("Please enter a topic.")
+    ts           = datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_root  = Path(tempfile.gettempdir()) / "bytebrain-output"
+    output_root.mkdir(parents=True, exist_ok=True)
+    output_video = output_root / f"{_slug(topic)}_{ts}_video.mp4"
+    env = os.environ.copy()
+    env["PIPELINE_OUTPUT_DIR"] = str(output_root)
+    # Force UTF-8 so Windows cp1252 never chokes on box-drawing / Devanagari chars
+    env["PYTHONIOENCODING"] = "utf-8"
+    env["PYTHONUTF8"]       = "1"
+    cmd = [
+        sys.executable,
+        "-X", "utf8",        # Python 3.7+ UTF-8 mode
+        str(RUN_PIPELINE),
+        topic,
+        "--output", str(output_video),
+    ]
+    proc = subprocess.run(
+        cmd,
+        capture_output=True,
+        encoding="utf-8",    # decode stdout/stderr as UTF-8
+        errors="replace",    # replace undecodable bytes instead of crashing
+        cwd=str(HERE),
+        env=env,
+    )
+    logs = (proc.stdout or "") + ("\n" + proc.stderr if proc.stderr else "")
+    if proc.returncode != 0:
+        tail = "\n".join(logs.strip().splitlines()[-50:])
+        raise gr.Error(f"Generation failed.\n{tail}")
+    if not output_video.exists():
+        raise gr.Error("Pipeline finished but output video file is missing.")
+    return str(output_video), logs
+# ── Gradio UI ─────────────────────────────────────────────────────────────────
+with gr.Blocks(title="ByteBrain Video Generator") as demo:
+    if LOGO_PATH.exists():
+        gr.Image(value=str(LOGO_PATH), show_label=False, width=140, height=140)
+    gr.Markdown("## ByteBrain -- Topic to Video")
+    gr.Markdown(
+        "Enter any ML/CS topic and get a chalkboard explainer video "
+        "with Hindi Trump-Modi narration."
+    )
+    topic_input  = gr.Textbox(label="Topic", placeholder="e.g. Softmax Function")
+    generate_btn = gr.Button("Generate Video", variant="primary")
+    video_output = gr.Video(label="Generated Video")
+    logs_output  = gr.Textbox(label="Pipeline Logs", lines=20, max_lines=40)
+    generate_btn.click(
+        fn=generate_video,
+        inputs=topic_input,
+        outputs=[video_output, logs_output],
+    )
+if __name__ == "__main__":
+    demo.queue().launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True,
+        share=True,
+        prevent_thread_lock=False,
+    )

avatar/app.txt ADDED Viewed

File without changes

generate_chalkboard.py ADDED Viewed

	@@ -0,0 +1,340 @@

+"""
+generate_chalkboard.py
+──────────────────────────────────────────────────────────────────────────────
+Two-pass Gemini pipeline:
+  Pass 1 — structured JSON  (title, bullets, formula, key terms ...)
+  Pass 2 — animated SVG diagram  (chalk-style, topic-specific)
+Both are injected into the Jinja2 HTML template to produce a complete board.
+Usage
+-----
+    python generate_chalkboard.py "Backpropagation"
+    python generate_chalkboard.py "Attention Mechanism" --save-json
+    python generate_chalkboard.py --from-json output/backprop_....json
+Requirements
+------------
+    pip install google-genai jinja2 python-dotenv
+.env
+----
+    GEMINI_API_KEY=your-key-here
+"""
+import os
+import re
+import json
+import argparse
+from pathlib import Path
+from datetime import datetime
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+from google import genai
+from google.genai import types
+from jinja2 import Environment, FileSystemLoader
+# ── Config ────────────────────────────────────────────────────────────────────
+MODEL         = "gemini-2.5-pro"
+TEMPLATE_FILE = "template.html"
+TEMPLATE_DIR  = Path(__file__).parent
+OUTPUT_DIR    = Path(__file__).parent / "output"
+# ── Pass 1 prompt — structured content JSON ───────────────────────────────────
+CONTENT_PROMPT = """
+You are an expert ML/CS educator creating chalk-board explainers.
+Output ONLY a valid JSON object -- no markdown fences, no prose, no extra keys.
+Schema (follow exactly):
+{
+  "title":        "<emoji + topic name, <=40 chars>",
+  "subtitle":     "<domain · tagline, <=60 chars>",
+  "idea_items":   [
+    {"bullet": "->", "html": "<note with <span class='colour'> tags>"},
+    {"bullet": "->", "html": "..."},
+    {"bullet": "->", "html": "..."}
+  ],
+  "minima_label": "<section label <=30 chars>",
+  "minima_items": [
+    {"bullet": "x", "html": "<span class=\"pink ul\">bad thing</span> -- why bad"},
+    {"bullet": "v", "html": "<span class=\"yellow ul\">good thing</span> -- why good"}
+  ],
+  "formula":      "<core equation, plain text + <span> ok>",
+  "key_terms":    [
+    {"bullet": "<sym>", "html": "<span class=\"colour ul\">term</span> -- def"},
+    {"bullet": "<sym>", "html": "..."},
+    {"bullet": "<sym>", "html": "..."}
+  ],
+  "footnote":     "* <tip <=80 chars>",
+  "extra_label":  "<bottom-right label <=40 chars>",
+  "extra_sub":    "<bottom-right hint <=60 chars>",
+  "diagram_hint": "<one sentence: what the diagram should visually show>"
+}
+Allowed colour classes: yellow  pink  blue  orange
+Underline class: ul (combine with colour, e.g. class="blue ul")
+Only <span> tags inside html values. Output ONLY the JSON.
+""".strip()
+# ── Pass 2 prompt — animated SVG diagram ─────────────────────────────────────
+DIAGRAM_PROMPT = """
+You are an SVG animation expert creating chalk-style educational diagrams.
+CONTEXT
+-------
+Topic   : {topic}
+Hint    : {hint}
+Colours :
+  --chalk-white  #f5f0e8
+  --chalk-yellow #f7e06a
+  --chalk-pink   #f4a0b0
+  --chalk-blue   #a0c4f4
+  --chalk-orange #f4b87a
+  board bg       #2d5a27  (dark green chalkboard)
+OUTPUT RULES
+------------
+- Output ONLY a raw <svg> element -- no wrapper, no markdown, no explanation.
+- viewBox="0 0 354 300"  width="354"  height="300"
+- style="filter:url(#chalk-filter)"  (already defined in the page)
+- All strokes/fills use the colour values above.
+- Every line/path that "draws on" must use this pattern:
+    stroke-dasharray="<len>"  stroke-dashoffset="<len>"
+    style="animation: drawOn <dur>s ease forwards <delay>s"
+- Every element that pops in must use:
+    opacity="0"  style="animation: popIn 0.3s ease forwards <delay>s"
+- Use font-family="Patrick Hand, cursive" or "Caveat, cursive" for labels.
+- Keep the diagram clear and readable -- axes, curves, nodes, arrows, labels.
+- The diagram must be TOPIC-SPECIFIC and visually explain the concept.
+- Total animation duration should be 5-8 seconds.
+- Add a <style> block INSIDE the <svg> with ONLY these two keyframes
+  (do NOT redefine any other keyframe -- they already exist globally):
+    @keyframes drawOn  {{ to {{ stroke-dashoffset: 0; }} }}
+    @keyframes popIn   {{ from {{ opacity:0; transform:scale(.5); }} to {{ opacity:1; transform:scale(1); }} }}
+Output the <svg>...</svg> block only. Nothing else.
+""".strip()
+# ── Gemini client ─────────────────────────────────────────────────────────────
+def _get_client() -> genai.Client:
+    api_key = os.environ.get("GEMINI_API_KEY")
+    if not api_key:
+        print("[error] GEMINI_API_KEY not set. Add it to your .env file.")
+        raise SystemExit(1)
+    return genai.Client(api_key=api_key)
+def _strip_fences(text: str) -> str:
+    text = re.sub(r"^```[a-z]*\n?", "", text.strip())
+    text = re.sub(r"\n?```$", "", text)
+    return text.strip()
+# ── Pass 1 — content JSON ─────────────────────────────────────────────────────
+def generate_content(topic: str) -> dict:
+    print(f"[pass1] Generating content JSON for: {topic!r} ...")
+    client = _get_client()
+    response = client.models.generate_content(
+        model=MODEL,
+        contents=f"{CONTENT_PROMPT}\n\nTopic: {topic}",
+        config=types.GenerateContentConfig(
+            temperature=0.7,
+            max_output_tokens=4096,
+        ),
+    )
+    raw = _strip_fences(response.text)
+    try:
+        data = json.loads(raw)
+    except json.JSONDecodeError as exc:
+        print("[error] Pass 1 did not return valid JSON:")
+        print(raw[:800])
+        raise SystemExit(1) from exc
+    print("[pass1] Content JSON OK")
+    return data
+# ── Pass 2 — animated SVG ─────────────────────────────────────────────────────
+def generate_diagram(topic: str, hint: str) -> str | None:
+    print("[pass2] Generating SVG diagram ...")
+    client = _get_client()
+    prompt = DIAGRAM_PROMPT.format(topic=topic, hint=hint)
+    response = client.models.generate_content(
+        model=MODEL,
+        contents=prompt,
+        config=types.GenerateContentConfig(
+            temperature=0.9,
+            max_output_tokens=8192,
+        ),
+    )
+    raw = _strip_fences(response.text)
+    if not raw.strip().startswith("<svg"):
+        match = re.search(r"(<svg[\s\S]+?</svg>)", raw, re.IGNORECASE)
+        if match:
+            raw = match.group(1)
+        else:
+            print("[warn]  Pass 2 did not return a valid <svg>. Diagram will use placeholder.")
+            return None
+    print("[pass2] SVG diagram OK")
+    return raw
+# ── Jinja rendering ───────────────────────────────────────────────────────────
+def render_template(ctx: dict, template_file: str = TEMPLATE_FILE) -> str:
+    template_path = TEMPLATE_DIR / template_file
+    if not template_path.exists():
+        raise FileNotFoundError(
+            f"Template not found: {template_path}\n"
+            f"Ensure '{template_file}' is in the same folder as this script."
+        )
+    env = Environment(
+        loader=FileSystemLoader(str(TEMPLATE_DIR)),
+        autoescape=False,
+    )
+    return env.get_template(template_file).render(**ctx)
+# ── Output helpers ────────────────────────────────────────────────────────────
+def save_html(html: str, path: Path) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(html, encoding="utf-8")
+    print(f"[out]   HTML -> {path}")
+def save_json(data: dict, path: Path) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
+    print(f"[out]   JSON -> {path}")
+# ── Build Jinja context ───────────────────────────────────────────────────────
+def build_context(data: dict, diagram_svg: str | None) -> dict:
+    def render_list(items: list) -> str:
+        lis = []
+        for item in items:
+            bullet = item.get("bullet", "->")
+            html   = item.get("html",   "")
+            lis.append(
+                f'<li data-b="{bullet}" '
+                f'style="font-family:\'Indie Flower\',cursive;font-size:15px;'
+                f'line-height:1.4;padding-left:22px;position:relative;'
+                f'margin-bottom:3px;color:var(--chalk-white);">'
+                f'{html}</li>'
+            )
+        return "\n".join(lis)
+    return {
+        # Header
+        "title":             data.get("title",        "Topic"),
+        "subtitle":          data.get("subtitle",     ""),
+        # Top-right side notes
+        "idea_items_html":   render_list(data.get("idea_items",   [])),
+        "minima_label":      data.get("minima_label", "Key Contrast"),
+        "minima_items_html": render_list(data.get("minima_items", [])),
+        # Bottom-left
+        "formula":           data.get("formula",      ""),
+        "key_terms_html":    render_list(data.get("key_terms",    [])),
+        "footnote":          data.get("footnote",     ""),
+        # Bottom-right placeholder
+        "extra_label":       data.get("extra_label",  "Your content here"),
+        "extra_sub":         data.get("extra_sub",    "Pass extra_content to fill"),
+        "extra_content":     None,
+        # Diagram slot -- filled by Pass 2 SVG (or None -> placeholder)
+        "diagram_label":     data.get("diagram_label", "Diagram"),
+        "diagram_sub":       data.get("diagram_sub",   ""),
+        "diagram_content":   diagram_svg,
+    }
+# ── CLI ───────────────────────────────────────────────────────────────────────
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Generate a full chalk-board HTML explainer via Gemini (2-pass)."
+    )
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("topic", nargs="?",
+                       help="Topic to explain e.g. 'Backpropagation'")
+    group.add_argument("--from-json", metavar="FILE",
+                       help="Skip Pass 1 -- load content from JSON file (still runs Pass 2)")
+    parser.add_argument("--output", "-o", metavar="FILE",
+                        help="Output HTML path (default: output/<slug>_<ts>.html)")
+    parser.add_argument("--save-json", action="store_true",
+                        help="Save raw content JSON alongside the HTML")
+    parser.add_argument("--no-diagram", action="store_true",
+                        help="Skip Pass 2 -- leave diagram as placeholder")
+    parser.add_argument("--template", default=TEMPLATE_FILE,
+                        help=f"Jinja2 template file (default: {TEMPLATE_FILE})")
+    return parser.parse_args()
+def slug(text: str) -> str:
+    return re.sub(r"[^a-z0-9]+", "_", text.lower()).strip("_")
+# ── Main ──────────────────────────────────────────────────────────────────────
+def main():
+    args = parse_args()
+    # -- Pass 1: content JSON
+    if args.from_json:
+        json_path  = Path(args.from_json)
+        print(f"[load]  Reading JSON from {json_path} ...")
+        raw_data   = json.loads(json_path.read_text(encoding="utf-8"))
+        topic      = raw_data.get("title", json_path.stem)
+        topic_slug = slug(topic)
+    else:
+        topic      = args.topic
+        raw_data   = generate_content(topic)
+        topic_slug = slug(topic)
+    if args.save_json and not args.from_json:
+        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+        save_json(raw_data, OUTPUT_DIR / f"{topic_slug}_{ts}.json")
+    # -- Pass 2: animated SVG diagram
+    diagram_svg = None
+    if not args.no_diagram:
+        hint = raw_data.get("diagram_hint", f"A visual diagram explaining {topic}")
+        diagram_svg = generate_diagram(topic, hint)
+    # -- Build context + render
+    ctx  = build_context(raw_data, diagram_svg)
+    html = render_template(ctx, template_file=args.template)
+    # -- Save output
+    ts       = datetime.now().strftime("%Y%m%d_%H%M%S")
+    out_path = Path(args.output) if args.output else OUTPUT_DIR / f"{topic_slug}_{ts}.html"
+    save_html(html, out_path)
+    print(f"\nDone! Open in browser -> {out_path.resolve()}")
+if __name__ == "__main__":
+    main()

logo/app.txt ADDED Viewed

File without changes

narrate_and_render.py ADDED Viewed

	@@ -0,0 +1,523 @@

+"""
+narrate_and_render.py
+──────────────────────────────────────────────────────────────────────────────
+Full pipeline:
+  1. generate_narration()  -- Gemini writes a funny Trump vs Modi
+                              Hindi dialogue for the given topic
+  2. generate_audio()      -- sends each dialogue line to the HuggingFace
+                              Gradio TTS (banao-tech/vibe-voice-custom-voices)
+                              using speaker voice files you provide
+  3. render_html_frames()  -- Playwright opens the generated HTML board,
+                              takes a screenshot per dialogue beat
+  4. build_video()         -- FFmpeg stitches frames + per-line audio into
+                              the final MP4
+Usage
+-----
+    python narrate_and_render.py                        \\
+        --html   output/softmax_20260329.html           \\
+        --topic  "Softmax Function"                     \\
+        --voice-trump  voices/trump.wav                 \\
+        --voice-modi   voices/modi.wav                  \\
+        --output final/softmax_video.mp4
+Requirements
+------------
+    pip install google-genai gradio_client playwright python-dotenv
+    playwright install chromium
+.env
+----
+    GEMINI_API_KEY=your-key-here
+"""
+import os
+import re
+import json
+import shutil
+import argparse
+import subprocess
+import uuid
+from pathlib import Path
+from datetime import datetime
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+# ── lazy imports ──────────────────────────────────────────────────────────────
+def _require(module: str, pip_name: str = None):
+    import importlib
+    try:
+        return importlib.import_module(module)
+    except ImportError:
+        pkg = pip_name or module
+        raise SystemExit(
+            f"[error] Missing package: '{pkg}'\n"
+            f"        Run:  pip install {pkg}"
+        )
+# ── Config ────────────────────────────────────────────────────────────────────
+HF_SPACE        = "banao-tech/vibe-voice-custom-voices"
+NARRATION_MODEL = os.environ.get("NARRATION_MODEL", "gemini-2.5-pro")
+BOARD_WIDTH     = 414
+BOARD_HEIGHT    = 736
+# ══════════════════════════════════════════════════════════════════════════════
+# STEP 1 -- Generate Hindi Trump vs Modi narration via Gemini
+# ══════════════════════════════════════════════════════════════════════════════
+NARRATION_PROMPT = """
+You are a satire comedy writer. Create a sarcastically funny Hindi dialogue between Trump [1] and Modi [2]
+explaining the given ML/CS topic. Rules:
+- Exactly 8-10 lines total, alternating [1] and [2]
+- Output lines primarily in Hindi (Devanagari script), allowing English only for technical terms
+- Trump is overconfident, often confused, and slightly dim-witted in a playful way; keep it witty and non-hateful
+- Modi explains patiently with desi analogies
+- Use most indian way of comedy, not english, also not include fake news term here
+- Tone should be sarcastic and punchy, not plain funny
+- Each line MAX 25 words
+- End with both understanding the concept
+- Output ONLY the dialogue lines, one per line, exactly in this format:
+  [1]: <line>
+  [2]: <line>
+No extra text, no intro, no outro.
+IMPORTANT: BOTH MUST SPEAK IN HINDI LANGUAGE. ONLY TECHNICAL TERMS OR JARGON IS ALLOWED.
+Example style:
+[1]: Modi bhai, yeh Gradient Descent kya hai? Kuch samajh nahi aaya!
+[2]: Are Trump bhai! Socho -- ek pahad hai, gend ko neeche pahunchana hai.
+[1]: Neeche kyun Modi? Main toh TOP pe rehta hoon -- America First!
+[2]: Yahan ulta hai! Neeche matlab Loss kam hai -- yahi ML ka khel hai!
+""".strip()
+def generate_narration(topic: str) -> list[dict]:
+    """
+    Returns list of:  [{"speaker": 1, "line": "..."}, ...]
+    speaker 1 = Trump, speaker 2 = Modi
+    """
+    from google import genai as google_genai
+    from google.genai import types as google_types
+    api_key = os.environ.get("GEMINI_API_KEY")
+    if not api_key:
+        raise SystemExit("[error] GEMINI_API_KEY not set in .env")
+    client = google_genai.Client(api_key=api_key)
+    print(f"[narr]  Generating Hindi dialogue for: {topic!r} ...")
+    prompt = f"{NARRATION_PROMPT}\n\nTopic: {topic}"
+    raw = ""
+    last_error = None
+    for model_name in [NARRATION_MODEL, "gemini-2.0-flash"]:
+        try:
+            response = client.models.generate_content(
+                model=model_name,
+                contents=prompt,
+                config=google_types.GenerateContentConfig(
+                    temperature=0.9,
+                    max_output_tokens=800,
+                ),
+            )
+            raw = (response.text or "").strip()
+            if raw:
+                break
+        except Exception as e:
+            last_error = e
+            print(f"[warn]  Narration model '{model_name}' failed: {e}")
+    if not raw:
+        raise SystemExit(f"[error] Narration generation failed: {last_error}")
+    lines = []
+    for raw_line in raw.splitlines():
+        raw_line = raw_line.strip()
+        m = re.match(r"\[([12])\]:\s*(.+)", raw_line)
+        if m:
+            lines.append({"speaker": int(m.group(1)), "line": m.group(2).strip()})
+    if not lines:
+        raise SystemExit("[error] Gemini returned no parseable dialogue lines.")
+    print(f"[narr]  {len(lines)} lines generated")
+    return lines
+# ══════════════════════════════════════════════════════════════════════════════
+# STEP 2 -- TTS via HuggingFace Gradio
+# ══════════════════════════════════════════════════════════════════════════════
+def generate_audio(
+    dialogue: list[dict],
+    voice_trump: str,
+    voice_modi: str,
+    audio_dir: Path,
+    hf_space: str,
+    hf_token: str | None,
+) -> list[Path]:
+    """
+    Sends each dialogue line to the TTS Gradio space.
+    Returns list of audio file paths in dialogue order.
+    """
+    gradio_client = _require("gradio_client")
+    Client      = gradio_client.Client
+    handle_file = gradio_client.handle_file
+    audio_dir.mkdir(parents=True, exist_ok=True)
+    print(f"[tts]   Connecting to HuggingFace space: {hf_space} ...")
+    client_kwargs = {}
+    if hf_token:
+        client_kwargs["hf_token"] = hf_token
+    try:
+        client = Client(hf_space, **client_kwargs)
+    except TypeError:
+        client = Client(hf_space)
+    trump_voice = handle_file(voice_trump)
+    modi_voice  = handle_file(voice_modi)
+    audio_paths = []
+    for i, entry in enumerate(dialogue):
+        speaker  = entry["speaker"]
+        text     = entry["line"]
+        out_file = audio_dir / f"line_{i+1:02d}_spk{speaker}.wav"
+        spk1 = trump_voice if speaker == 1 else modi_voice
+        spk2 = modi_voice  if speaker == 1 else trump_voice
+        print(f"[tts]   Line {i+1}/{len(dialogue)} (Speaker {speaker}): {text[:50]}...")
+        try:
+            result = client.predict(
+                text=text,
+                speaker1_audio_path=spk1,
+                speaker2_audio_path=spk2,
+                speaker3_audio_path=spk1,
+                speaker4_audio_path=spk2,
+                seed=42,
+                diffusion_steps=20,
+                cfg_scale=1.3,
+                use_sampling=False,
+                temperature=0.95,
+                top_p=0.95,
+                max_words_per_chunk=250,
+                api_name="/generate_speech_gradio",
+            )
+            if isinstance(result, dict):
+                src = result.get("value") or result.get("path") or result.get("name")
+            else:
+                src = result
+            shutil.copy2(src, out_file)
+            print(f"[tts]   Saved -> {out_file}")
+        except Exception as exc:
+            print(f"[warn]  TTS failed for line {i+1}: {exc}")
+            _silent_wav(out_file, duration=2)
+        audio_paths.append(out_file)
+    print(f"[tts]   All {len(audio_paths)} audio files ready")
+    return audio_paths
+def _silent_wav(path: Path, duration: int = 2):
+    subprocess.run([
+        "ffmpeg", "-y", "-f", "lavfi",
+        "-i", f"anullsrc=r=44100:cl=stereo",
+        "-ar", "44100",
+        "-t", str(duration),
+        str(path),
+    ], capture_output=True)
+# ══════════════════════════════════════════════════════════════════════════════
+# STEP 3 -- Playwright: render HTML board -> screenshots per dialogue beat
+# ══════════════════════════════════════════════════════════════════════════════
+def render_html_frames(
+    html_path: Path,
+    dialogue: list[dict],
+    audio_paths: list[Path],
+    frames_dir: Path,
+) -> list[Path]:
+    pw_module = _require("playwright.sync_api", "playwright")
+    sync_playwright = pw_module.sync_playwright
+    frames_dir.mkdir(parents=True, exist_ok=True)
+    frame_paths = []
+    durations = []
+    for ap in audio_paths:
+        dur = _get_audio_duration(ap)
+        durations.append(dur if dur else 3.0)
+    html_url = html_path.resolve().as_uri()
+    print(f"[frames] Launching Playwright -> {html_url}")
+    with sync_playwright() as p:
+        browser = p.chromium.launch()
+        page = browser.new_page(
+            viewport={"width": BOARD_WIDTH, "height": BOARD_HEIGHT}
+        )
+        page.goto(html_url, wait_until="networkidle")
+        page.wait_for_timeout(8500)
+        page.add_style_tag(content="""
+            #subtitle-overlay {
+                position: fixed;
+                bottom: 54px;
+                left: 50%;
+                transform: translateX(-50%);
+                width: 88%;
+                background: rgba(0,0,0,0.72);
+                border-radius: 8px;
+                padding: 8px 14px;
+                font-family: 'Caveat', cursive;
+                font-size: 15px;
+                color: #f5f0e8;
+                text-align: center;
+                line-height: 1.5;
+                z-index: 9999;
+                display: none;
+                border: 1px solid rgba(245,240,232,0.2);
+            }
+            #subtitle-overlay.visible { display: block; }
+            #subtitle-speaker {
+                font-size: 11px;
+                letter-spacing: 1.5px;
+                text-transform: uppercase;
+                margin-bottom: 3px;
+                opacity: 0.6;
+            }
+        """)
+        page.evaluate("""
+            const div = document.createElement('div');
+            div.id = 'subtitle-overlay';
+            div.innerHTML = '<div id="subtitle-speaker"></div><div id="subtitle-text"></div>';
+            document.body.appendChild(div);
+        """)
+        for i, (entry, audio_path, duration) in enumerate(
+            zip(dialogue, audio_paths, durations)
+        ):
+            speaker_name = "Trump" if entry["speaker"] == 1 else "Modi"
+            line_text    = entry["line"]
+            page.evaluate(f"""
+                document.getElementById('subtitle-speaker').textContent = {json.dumps(speaker_name)};
+                document.getElementById('subtitle-text').textContent    = {json.dumps(line_text)};
+                document.getElementById('subtitle-overlay').classList.add('visible');
+            """)
+            frame_path = frames_dir / f"frame_{i+1:03d}.png"
+            page.screenshot(path=str(frame_path), full_page=False)
+            frame_paths.append(frame_path)
+            print(f"[frames] Frame {i+1}/{len(dialogue)} -> {frame_path.name} ({duration:.1f}s)")
+        page.evaluate("document.getElementById('subtitle-overlay').classList.remove('visible')")
+        outro_path = frames_dir / f"frame_{len(dialogue)+1:03d}.png"
+        page.screenshot(path=str(outro_path), full_page=False)
+        frame_paths.append(outro_path)
+        browser.close()
+    print(f"[frames] {len(frame_paths)} frames rendered")
+    return frame_paths
+def _get_audio_duration(path: Path) -> float | None:
+    try:
+        result = subprocess.run(
+            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
+             "-of", "default=noprint_wrappers=1:nokey=1", str(path)],
+            capture_output=True, text=True
+        )
+        return float(result.stdout.strip())
+    except Exception:
+        return None
+# ══════════════════════════════════════════════════════════════════════════════
+# STEP 4 -- FFmpeg: stitch frames + audio -> MP4
+# ══════════════════════════════════════════════════════════════════════════════
+def build_video(
+    frame_paths: list[Path],
+    audio_paths: list[Path],
+    durations: list[float],
+    output_path: Path,
+    fps: int = 24,
+):
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    tmp = output_path.parent / f"_tmp_segments_{uuid.uuid4().hex[:8]}"
+    tmp.mkdir(exist_ok=True)
+    segment_paths = []
+    for i, (frame, audio, dur) in enumerate(zip(frame_paths, audio_paths, durations)):
+        seg = tmp / f"seg_{i:03d}.mp4"
+        cmd = [
+            "ffmpeg", "-y",
+            "-loop", "1", "-i", str(frame),
+            "-i", str(audio),
+            "-c:v", "libx264", "-preset", "fast",
+            "-tune", "stillimage",
+            "-c:a", "aac", "-b:a", "192k",
+            "-pix_fmt", "yuv420p",
+            "-vf", f"scale={BOARD_WIDTH}:{BOARD_HEIGHT}:force_original_aspect_ratio=decrease,"
+                   f"pad={BOARD_WIDTH}:{BOARD_HEIGHT}:(ow-iw)/2:(oh-ih)/2:color=black",
+            "-t", str(dur),
+            "-shortest",
+            str(seg),
+        ]
+        try:
+            subprocess.run(cmd, check=True, capture_output=True, text=True)
+        except subprocess.CalledProcessError as exc:
+            raise SystemExit(
+                f"[error] FFmpeg segment encode failed at segment {i+1}.\n"
+                f"stderr:\n{exc.stderr}"
+            )
+        segment_paths.append(seg)
+        print(f"[video]  Segment {i+1}/{len(audio_paths)} encoded ({dur:.1f}s)")
+    if len(frame_paths) > len(audio_paths):
+        outro_frame = frame_paths[-1]
+        outro_seg   = tmp / "seg_outro.mp4"
+        cmd = [
+            "ffmpeg", "-y",
+            "-loop", "1", "-i", str(outro_frame),
+            "-f", "lavfi", "-i", "anullsrc=r=44100:cl=stereo",
+            "-c:v", "libx264", "-preset", "fast",
+            "-c:a", "aac", "-b:a", "192k",
+            "-pix_fmt", "yuv420p",
+            "-vf", f"scale={BOARD_WIDTH}:{BOARD_HEIGHT}:force_original_aspect_ratio=decrease,"
+                   f"pad={BOARD_WIDTH}:{BOARD_HEIGHT}:(ow-iw)/2:(oh-ih)/2:color=black",
+            "-t", "2",
+            "-shortest",
+            str(outro_seg),
+        ]
+        try:
+            subprocess.run(cmd, check=True, capture_output=True, text=True)
+        except subprocess.CalledProcessError as exc:
+            raise SystemExit(f"[error] FFmpeg outro encode failed.\nstderr:\n{exc.stderr}")
+        segment_paths.append(outro_seg)
+    concat_list = tmp / "concat.txt"
+    with open(concat_list, "w") as f:
+        for sp in segment_paths:
+            f.write(f"file '{sp.resolve()}'\n")
+    print(f"[video]  Concatenating {len(segment_paths)} segments -> {output_path}")
+    cmd = [
+        "ffmpeg", "-y",
+        "-f", "concat", "-safe", "0",
+        "-i", str(concat_list),
+        "-c:v", "libx264", "-preset", "medium", "-crf", "20",
+        "-c:a", "aac", "-b:a", "192k", "-ar", "48000",
+        "-movflags", "+faststart",
+        str(output_path),
+    ]
+    subprocess.run(cmd, check=True)
+    print(f"[video]  Final video -> {output_path.resolve()}")
+    shutil.rmtree(tmp, ignore_errors=True)
+# ── CLI ───────────────────────────────────────────────────────────────────────
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Generate Hindi Trump-Modi narration, TTS audio, and render video from chalkboard HTML."
+    )
+    parser.add_argument("--html",        required=True, help="Path to the generated chalkboard HTML file")
+    parser.add_argument("--topic",       required=True, help="Topic name")
+    parser.add_argument("--voice-trump", required=True, help="WAV file for Trump's voice")
+    parser.add_argument("--voice-modi",  required=True, help="WAV file for Modi's voice")
+    parser.add_argument("--output",      default=None,  help="Output MP4 path")
+    parser.add_argument("--save-script", action="store_true", help="Save dialogue script as JSON")
+    parser.add_argument("--fps",         type=int, default=24, help="Output video FPS")
+    parser.add_argument("--hf-space",    default=os.environ.get("HF_SPACE", HF_SPACE))
+    parser.add_argument("--hf-token",    default=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN"))
+    parser.add_argument("--run-root",    default="output")
+    parser.add_argument("--keep-workdir", action="store_true")
+    return parser.parse_args()
+def slug(text: str) -> str:
+    return re.sub(r"[^a-z0-9]+", "_", text.lower()).strip("_")
+def _resolve_html_path(raw_html_path: Path) -> Path:
+    if not raw_html_path.exists():
+        raise SystemExit(f"[error] HTML file not found: {raw_html_path.resolve()}")
+    return raw_html_path
+def main():
+    args  = parse_args()
+    ts    = datetime.now().strftime("%Y%m%d_%H%M%S")
+    topic_slug = slug(args.topic)
+    run_root = Path(args.run_root)
+    run_root.mkdir(parents=True, exist_ok=True)
+    html_path  = _resolve_html_path(Path(args.html))
+    for label, vpath in [("--voice-trump", args.voice_trump), ("--voice-modi", args.voice_modi)]:
+        if not Path(vpath).exists():
+            raise SystemExit(f"[error] Voice file not found ({label}): {vpath}")
+    work_dir = run_root / f"_work_{topic_slug}_{ts}"
+    work_dir.mkdir(parents=True, exist_ok=True)
+    out_dir    = run_root
+    output_path = Path(args.output) if args.output else out_dir / f"{topic_slug}_{ts}_video.mp4"
+    dialogue = generate_narration(args.topic)
+    if args.save_script:
+        script_path = out_dir / f"{topic_slug}_{ts}_script.json"
+        script_path.write_text(json.dumps(dialogue, ensure_ascii=False, indent=2), encoding="utf-8")
+        print(f"[narr]  Script saved -> {script_path}")
+    print("\n-- Dialogue Script ------------------------------------------")
+    for entry in dialogue:
+        name = "Trump" if entry["speaker"] == 1 else "Modi "
+        print(f"  [{name}]: {entry['line']}")
+    print("-------------------------------------------------------------\n")
+    audio_dir   = work_dir / "audio"
+    audio_paths = generate_audio(
+        dialogue,
+        voice_trump=args.voice_trump,
+        voice_modi=args.voice_modi,
+        audio_dir=audio_dir,
+        hf_space=args.hf_space,
+        hf_token=args.hf_token,
+    )
+    frames_dir  = work_dir / "frames"
+    frame_paths = render_html_frames(html_path, dialogue, audio_paths, frames_dir)
+    durations = [_get_audio_duration(ap) or 3.0 for ap in audio_paths]
+    build_video(frame_paths, audio_paths, durations, output_path, fps=args.fps)
+    if not args.keep_workdir:
+        shutil.rmtree(work_dir, ignore_errors=True)
+    else:
+        print(f"[debug] Work files kept at: {work_dir.resolve()}")
+    print(f"\nDone! Video saved -> {output_path.resolve()}")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+python-dotenv
+google-generativeai
+jinja2
+gradio_client
+playwright
+openai

run_pipeline.py ADDED Viewed

	@@ -0,0 +1,564 @@

+"""
+run_pipeline.py
+──────────────────────────────────────────────────────────────────────────────
+ONE command to rule them all.
+Give it a topic -> get a fully rendered MP4 chalkboard video with
+Hindi Trump-Modi narration.
+Usage
+-----
+    python run_pipeline.py "Gradient Descent"
+    python run_pipeline.py "Softmax Function" --voice-trump voices/trump.wav --voice-modi voices/modi.wav
+    python run_pipeline.py "Attention Mechanism" --no-diagram --keep-workdir
+What it does internally
+-----------------------
+    Step 1  generate_chalkboard.py Pass 1  -- Gemini generates title + diagram hint (JSON)
+    Step 2  generate_chalkboard.py Pass 2  -- Gemini generates animated SVG diagram
+    Step 3  Jinja2 renders HTML board      -- SVG + title injected into template
+    Step 4  narrate_and_render.py Step 1   -- Gemini writes Hindi Trump-Modi dialogue
+    Step 5  narrate_and_render.py Step 2   -- HuggingFace TTS generates audio per line
+    Step 6  narrate_and_render.py Step 3   -- Playwright screenshots board per line
+    Step 7  narrate_and_render.py Step 4   -- FFmpeg stitches frames + audio -> MP4
+File layout expected
+--------------------
+    run_pipeline.py                  <- THIS FILE
+    template.html                    <- Jinja2 HTML template
+    generate_chalkboard.py           <- chalkboard generator (imported)
+    narrate_and_render.py            <- narration + video renderer (imported)
+    voices/
+        trump.wav                    <- Trump voice sample (default path)
+        modi.wav                     <- Modi voice sample (default path)
+    avatar/
+        trump.png                    <- Trump avatar image (default path)
+        modi.png                     <- Modi avatar image (default path)
+    output/                          <- all outputs land here (auto-created)
+Requirements
+------------
+    pip install google-genai jinja2 python-dotenv gradio_client playwright
+    playwright install chromium
+    # ffmpeg must be on PATH
+.env
+----
+    GEMINI_API_KEY=your-gemini-key
+    HF_TOKEN=your-huggingface-token   (optional but helps with rate limits)
+"""
+import os
+import sys
+import json
+import base64
+import argparse
+import threading
+import socketserver
+import http.server
+import subprocess
+import shutil
+import tempfile
+from pathlib import Path
+from datetime import datetime
+# Ensure UTF-8 output on Windows to avoid cp1252 UnicodeEncodeError
+if sys.stdout.encoding and sys.stdout.encoding.lower() != "utf-8":
+    try:
+        sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+    except AttributeError:
+        pass
+if sys.stderr.encoding and sys.stderr.encoding.lower() != "utf-8":
+    try:
+        sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+    except AttributeError:
+        pass
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+# Make sure siblings are importable regardless of cwd
+HERE = Path(__file__).parent.resolve()
+sys.path.insert(0, str(HERE))
+from generate_chalkboard import (
+    generate_content,
+    generate_diagram,
+    build_context,
+    render_template,
+    save_html,
+    save_json,
+    slug,
+    TEMPLATE_FILE,
+)
+from narrate_and_render import (
+    generate_narration,
+    generate_audio,
+    render_html_frames,
+    build_video,
+    _get_audio_duration,
+    _silent_wav,
+    HF_SPACE,
+    BOARD_WIDTH,
+    BOARD_HEIGHT,
+)
+# ── Config ────────────────────────────────────────────────────────────────────
+OUTPUT_DIR           = Path(os.environ.get("PIPELINE_OUTPUT_DIR", str(Path(tempfile.gettempdir()) / "bytebrain-output")))
+DEFAULT_TRUMP        = HERE / "voices" / "trump.wav"
+DEFAULT_MODI         = HERE / "voices" / "modi.wav"
+DEFAULT_TRUMP_AVATAR = HERE / "avatar" / "trump.png"
+DEFAULT_MODI_AVATAR  = HERE / "avatar" / "modi.png"
+HTTP_PORT            = 8765
+# ── Local HTTP server (fixes Google Fonts over file://) ──────────────────────
+class _QuietHandler(http.server.SimpleHTTPRequestHandler):
+    def log_message(self, *args):
+        pass
+def _start_http_server(directory: Path) -> socketserver.TCPServer:
+    os.chdir(directory)
+    httpd = socketserver.TCPServer(("", HTTP_PORT), _QuietHandler)
+    thread = threading.Thread(target=httpd.serve_forever, daemon=True)
+    thread.start()
+    print(f"[http]   Local font server started on http://localhost:{HTTP_PORT}")
+    return httpd
+# ── CLI ───────────────────────────────────────────────────────────────────────
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Full chalkboard video pipeline -- just give a topic."
+    )
+    parser.add_argument(
+        "topic",
+        help="ML/CS topic to explain, e.g. 'Gradient Descent'",
+    )
+    parser.add_argument(
+        "--voice-trump", default=str(DEFAULT_TRUMP),
+        help=f"WAV voice sample for Trump (default: {DEFAULT_TRUMP})",
+    )
+    parser.add_argument(
+        "--voice-modi", default=str(DEFAULT_MODI),
+        help=f"WAV voice sample for Modi  (default: {DEFAULT_MODI})",
+    )
+    parser.add_argument(
+        "--output", "-o", default=None,
+        help="Final MP4 output path",
+    )
+    parser.add_argument(
+        "--no-diagram", action="store_true",
+        help="Skip SVG diagram generation (faster, uses placeholder)",
+    )
+    parser.add_argument(
+        "--save-json", action="store_true",
+        help="Save the intermediate content JSON",
+    )
+    parser.add_argument(
+        "--save-script", action="store_true",
+        help="Save the Hindi dialogue script as JSON",
+    )
+    parser.add_argument(
+        "--keep-workdir", action="store_true",
+        help="Keep intermediate frames/audio files for debugging",
+    )
+    parser.add_argument(
+        "--hf-space", default=os.environ.get("HF_SPACE", HF_SPACE),
+        help="HuggingFace Space ID for TTS",
+    )
+    parser.add_argument(
+        "--hf-token",
+        default=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
+        help="HuggingFace API token (optional)",
+    )
+    parser.add_argument(
+        "--avatar-trump", default=str(DEFAULT_TRUMP_AVATAR),
+        help=f"Trump avatar image path (default: {DEFAULT_TRUMP_AVATAR})",
+    )
+    parser.add_argument(
+        "--avatar-modi", default=str(DEFAULT_MODI_AVATAR),
+        help=f"Modi avatar image path (default: {DEFAULT_MODI_AVATAR})",
+    )
+    parser.add_argument(
+        "--openai-transcribe-model",
+        default=os.environ.get("OPENAI_TRANSCRIBE_MODEL", "gpt-4o-mini-transcribe"),
+        help="OpenAI transcription model for timing extraction",
+    )
+    return parser.parse_args()
+# ── Helpers ───────────────────────────────────────────────────────────────────
+def _image_to_data_uri(image_path: Path) -> str:
+    ext  = image_path.suffix.lower()
+    mime = "image/png" if ext == ".png" else "image/jpeg"
+    data = base64.b64encode(image_path.read_bytes()).decode("ascii")
+    return f"data:{mime};base64,{data}"
+def _silent_wav_stereo(path: Path, duration: int = 2):
+    subprocess.run([
+        "ffmpeg", "-y", "-f", "lavfi",
+        "-i", "anullsrc=r=44100:cl=stereo",
+        "-ar", "44100",
+        "-t", str(duration),
+        str(path),
+    ], capture_output=True)
+def _concat_audio_tracks(audio_paths: list[Path], output_audio_path: Path) -> Path:
+    output_audio_path.parent.mkdir(parents=True, exist_ok=True)
+    concat_list = output_audio_path.parent / "audio_concat.txt"
+    with open(concat_list, "w", encoding="utf-8") as f:
+        for ap in audio_paths:
+            f.write(f"file '{ap.resolve()}'\n")
+    cmd = [
+        "ffmpeg", "-y",
+        "-f", "concat", "-safe", "0",
+        "-i", str(concat_list),
+        "-c", "copy",
+        str(output_audio_path),
+    ]
+    try:
+        subprocess.run(cmd, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError:
+        cmd = [
+            "ffmpeg", "-y",
+            "-f", "concat", "-safe", "0",
+            "-i", str(concat_list),
+            "-ar", "24000", "-ac", "1",
+            str(output_audio_path),
+        ]
+        subprocess.run(cmd, check=True, capture_output=True, text=True)
+    return output_audio_path
+def _mux_recorded_video_with_audio(recorded_video_path: Path, audio_path: Path, output_path: Path):
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", str(recorded_video_path),
+        "-i", str(audio_path),
+        "-c:v", "libx264", "-preset", "medium", "-crf", "20",
+        "-pix_fmt", "yuv420p",
+        "-c:a", "aac", "-b:a", "192k",
+        "-shortest",
+        str(output_path),
+    ]
+    subprocess.run(cmd, check=True)
+def _build_speaker_timeline_with_openai(
+    dialogue: list[dict],
+    audio_paths: list[Path],
+    durations: list[float | None],
+    transcribe_model: str,
+) -> list[dict]:
+    try:
+        openai_module = __import__("openai")
+    except Exception:
+        raise SystemExit("[error] openai package is required. Run: pip install openai")
+    OpenAI  = openai_module.OpenAI
+    api_key = os.environ.get("OPENAI_API_KEY")
+    if not api_key:
+        raise SystemExit("[error] OPENAI_API_KEY is required for transcription-based avatar timing.")
+    client = OpenAI(api_key=api_key)
+    timeline = []
+    start_at = 0.0
+    for idx, entry in enumerate(dialogue):
+        dur        = durations[idx] if idx < len(durations) and durations[idx] else 3.0
+        audio_path = audio_paths[idx] if idx < len(audio_paths) else None
+        transcript = ""
+        if audio_path and audio_path.exists():
+            try:
+                with audio_path.open("rb") as f:
+                    result = client.audio.transcriptions.create(
+                        model=transcribe_model,
+                        file=f,
+                        response_format="verbose_json",
+                        timestamp_granularities=["word"],
+                    )
+                transcript = getattr(result, "text", "") or ""
+                words = getattr(result, "words", None) or []
+                if words:
+                    first_start = words[0].get("start", 0.0) if isinstance(words[0], dict) else getattr(words[0], "start", 0.0)
+                    last_end    = words[-1].get("end",   0.0) if isinstance(words[-1], dict) else getattr(words[-1], "end",   0.0)
+                    dur = max(float(last_end) - float(first_start), 0.1)
+            except Exception as e:
+                print(f"[warn]  OpenAI transcription failed for line {idx+1}: {e}")
+        end_at = start_at + float(dur)
+        timeline.append({
+            "index":        idx + 1,
+            "speaker":      "trump" if entry.get("speaker") == 1 else "modi",
+            "start_sec":    round(start_at, 3),
+            "end_sec":      round(end_at, 3),
+            "duration_sec": round(float(dur), 3),
+            "text":         entry.get("line", ""),
+            "transcript":   transcript,
+        })
+        start_at = end_at
+    return timeline
+# ── Playwright recorder (accepts http:// URL directly) ────────────────────────
+def _record_animation_via_http(
+    html_url: str,
+    timeline: list[dict],
+    work_dir: Path,
+    avatar_trump_path: Path,
+    avatar_modi_path: Path,
+) -> Path:
+    from narrate_and_render import _require
+    pw_module       = _require("playwright.sync_api", "playwright")
+    sync_playwright = pw_module.sync_playwright
+    recording_dir = work_dir / "recordings"
+    recording_dir.mkdir(parents=True, exist_ok=True)
+    avatar_trump_data = _image_to_data_uri(avatar_trump_path)
+    avatar_modi_data  = _image_to_data_uri(avatar_modi_path)
+    with sync_playwright() as p:
+        browser = p.chromium.launch()
+        context = browser.new_context(
+            viewport={"width": BOARD_WIDTH, "height": BOARD_HEIGHT},
+            record_video_dir=str(recording_dir),
+            record_video_size={"width": BOARD_WIDTH, "height": BOARD_HEIGHT},
+        )
+        page = context.new_page()
+        page.goto(html_url, wait_until="networkidle")
+        page.wait_for_timeout(8500)
+        page.add_style_tag(content="""
+            .speaker-avatar {
+                position: fixed;
+                bottom: 1px;
+                height: 100%;
+                max-width: 68%;
+                object-fit: contain;
+                object-position: bottom center;
+                z-index: 9998;
+                filter: drop-shadow(0 6px 16px rgba(0,0,0,.45));
+            }
+            #avatar-trump { left: 20px; }
+            #avatar-modi  { right: 20px; }
+            .avatar-active { display: block;  opacity: 1; }
+            .avatar-hidden { display: none;   opacity: 0; }
+        """)
+        page.evaluate(f"""
+            const trump = document.createElement('img');
+            trump.id = 'avatar-trump';
+            trump.className = 'speaker-avatar avatar-hidden';
+            trump.src = {json.dumps(avatar_trump_data)};
+            document.body.appendChild(trump);
+            const modi = document.createElement('img');
+            modi.id = 'avatar-modi';
+            modi.className = 'speaker-avatar avatar-hidden';
+            modi.src = {json.dumps(avatar_modi_data)};
+            document.body.appendChild(modi);
+        """)
+        for item in timeline:
+            speaker     = item.get("speaker", "trump")
+            duration_ms = int(max(0.1, float(item.get("duration_sec", 0.1))) * 1000)
+            page.evaluate(f"""
+                const trump  = document.getElementById('avatar-trump');
+                const modi   = document.getElementById('avatar-modi');
+                const active = {json.dumps(speaker)};
+                if (active === 'trump') {{
+                    trump.classList.add('avatar-active');
+                    trump.classList.remove('avatar-hidden');
+                    modi.classList.add('avatar-hidden');
+                    modi.classList.remove('avatar-active');
+                }} else {{
+                    modi.classList.add('avatar-active');
+                    modi.classList.remove('avatar-hidden');
+                    trump.classList.add('avatar-hidden');
+                    trump.classList.remove('avatar-active');
+                }}
+            """)
+            page.wait_for_timeout(duration_ms)
+        # Outro: hide avatars
+        page.evaluate("""
+            ['avatar-trump', 'avatar-modi'].forEach(id => {
+                const el = document.getElementById(id);
+                if (el) {
+                    el.classList.add('avatar-hidden');
+                    el.classList.remove('avatar-active');
+                }
+            });
+        """)
+        page.wait_for_timeout(600)
+        video_path_str = page.video.path()
+        page.close()
+        context.close()
+        browser.close()
+    raw_video_path = Path(video_path_str)
+    if not raw_video_path.exists():
+        raise SystemExit("[error] Playwright recording failed: no video file produced.")
+    return raw_video_path
+# ── Main pipeline ─────────────────────────────────────────────────────────────
+def main():
+    args       = parse_args()
+    topic      = args.topic
+    ts         = datetime.now().strftime("%Y%m%d_%H%M%S")
+    topic_slug = slug(topic)
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    work_dir = OUTPUT_DIR / f"_work_{topic_slug}_{ts}"
+    work_dir.mkdir(parents=True, exist_ok=True)
+    output_path = Path(args.output) if args.output \
+                  else OUTPUT_DIR / f"{topic_slug}_{ts}_video.mp4"
+    print(f"\n{'='*60}")
+    print(f"  TOPIC : {topic}")
+    print(f"  OUTPUT: {output_path}")
+    print(f"{'='*60}\n")
+    # Validate voice files early
+    for label, vpath in [("--voice-trump", args.voice_trump), ("--voice-modi", args.voice_modi)]:
+        if not Path(vpath).exists():
+            raise SystemExit(
+                f"[error] Voice file not found ({label}): {Path(vpath).resolve()}\n"
+                f"        Place WAV samples in voices/trump.wav and voices/modi.wav\n"
+                f"        or pass --voice-trump / --voice-modi explicitly."
+            )
+    for label, ipath in [("--avatar-trump", args.avatar_trump), ("--avatar-modi", args.avatar_modi)]:
+        if not Path(ipath).exists():
+            raise SystemExit(
+                f"[error] Avatar image not found ({label}): {Path(ipath).resolve()}\n"
+                f"        Place images in avatar/trump.png and avatar/modi.png\n"
+                f"        or pass --avatar-trump / --avatar-modi explicitly."
+            )
+    # =========================================================================
+    # PHASE 1 -- Generate the chalkboard HTML
+    # =========================================================================
+    print("-- PHASE 1: Chalkboard HTML ----------------------------------\n")
+    raw_data = generate_content(topic)
+    if args.save_json:
+        save_json(raw_data, OUTPUT_DIR / f"{topic_slug}_{ts}.json")
+    diagram_svg = None
+    if not args.no_diagram:
+        hint = raw_data.get("diagram_hint", f"A visual diagram explaining {topic}")
+        diagram_svg = generate_diagram(topic, hint)
+    ctx  = build_context(raw_data, diagram_svg)
+    html = render_template(ctx, template_file=TEMPLATE_FILE)
+    html_path = OUTPUT_DIR / f"{topic_slug}_{ts}.html"
+    save_html(html, html_path)
+    print(f"\n[phase1] HTML ready -> {html_path.name}\n")
+    # =========================================================================
+    # PHASE 2 -- Narration + audio
+    # =========================================================================
+    print("-- PHASE 2: Narration & TTS Audio ----------------------------\n")
+    dialogue = generate_narration(topic)
+    if args.save_script:
+        script_path = OUTPUT_DIR / f"{topic_slug}_{ts}_script.json"
+        script_path.write_text(
+            json.dumps(dialogue, ensure_ascii=False, indent=2), encoding="utf-8"
+        )
+        print(f"[narr]  Script saved -> {script_path.name}")
+    print("\n-- Dialogue Script ------------------------------------------")
+    for entry in dialogue:
+        name = "Trump" if entry["speaker"] == 1 else "Modi "
+        print(f"  [{name}]: {entry['line']}")
+    print("-------------------------------------------------------------\n")
+    audio_dir   = work_dir / "audio"
+    audio_paths = generate_audio(
+        dialogue,
+        voice_trump=args.voice_trump,
+        voice_modi=args.voice_modi,
+        audio_dir=audio_dir,
+        hf_space=args.hf_space,
+        hf_token=args.hf_token,
+    )
+    durations = [_get_audio_duration(ap) for ap in audio_paths]
+    timeline  = _build_speaker_timeline_with_openai(
+        dialogue,
+        audio_paths,
+        durations,
+        transcribe_model=args.openai_transcribe_model,
+    )
+    (work_dir / "speaker_timeline.json").write_text(
+        json.dumps(timeline, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+    print(f"\n[phase2] {len(audio_paths)} audio clips ready\n")
+    # =========================================================================
+    # PHASE 3 -- Record animation via Playwright
+    # =========================================================================
+    print("-- PHASE 3: Recording Animation ------------------------------\n")
+    httpd    = _start_http_server(OUTPUT_DIR)
+    html_url = f"http://localhost:{HTTP_PORT}/{html_path.name}"
+    recorded_video_path = _record_animation_via_http(
+        html_url,
+        timeline,
+        work_dir,
+        avatar_trump_path=Path(args.avatar_trump),
+        avatar_modi_path=Path(args.avatar_modi),
+    )
+    httpd.shutdown()
+    print(f"\n[phase3] Animation recorded -> {recorded_video_path.name}\n")
+    # =========================================================================
+    # PHASE 4 -- Build final MP4
+    # =========================================================================
+    print("-- PHASE 4: Building Video -----------------------------------\n")
+    merged_audio_path = work_dir / "merged_audio.wav"
+    _concat_audio_tracks(audio_paths, merged_audio_path)
+    _mux_recorded_video_with_audio(recorded_video_path, merged_audio_path, output_path)
+    if not args.keep_workdir:
+        shutil.rmtree(work_dir, ignore_errors=True)
+    else:
+        print(f"[debug]  Work files kept at: {work_dir.resolve()}")
+    print(f"\n{'='*60}")
+    print(f"  DONE!")
+    print(f"  Video -> {output_path.resolve()}")
+    print(f"{'='*60}\n")
+if __name__ == "__main__":
+    main()

template.html ADDED Viewed

	@@ -0,0 +1,173 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8"/>
+<link href="https://fonts.googleapis.com/css2?family=Caveat:wght@400;600;700&family=Indie+Flower&family=Patrick+Hand&display=swap" rel="stylesheet"/>
+<style>
+*,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
+:root{
+  --chalk-white:#f5f0e8;--chalk-yellow:#f7e06a;
+  --chalk-pink:#f4a0b0;--chalk-blue:#a0c4f4;--chalk-orange:#f4b87a;
+  --frame-wood:#5c3a1e;
+}
+/* 9:16 vertical ratio container */
+html,body{
+  width:100%;
+  min-height:100vh;
+  display:flex;
+  justify-content:center;
+  align-items:center;
+  background:#111;
+  font-family:'Caveat',cursive;
+  padding:20px 0 40px;
+}
+.frame{
+  position:relative;
+  /* 9:16 ratio — width × (16/9) = height */
+  width:390px;
+  height:693px; /* 390 × 16/9 ≈ 693 */
+  background:var(--frame-wood);
+  border-radius:6px;
+  padding:18px 18px 0;
+  box-shadow:inset 0 0 14px rgba(0,0,0,.7),0 10px 50px rgba(0,0,0,.9),0 0 0 3px #3a2010,0 0 0 7px #7a4a20;
+  overflow:hidden;
+  display:flex;
+  flex-direction:column;
+}
+.frame::before{
+  content:'';
+  position:absolute;
+  inset:0;
+  background:repeating-linear-gradient(90deg,transparent 0,transparent 20px,rgba(0,0,0,.07) 20px,rgba(0,0,0,.07) 21px);
+  pointer-events:none;
+  z-index:1;
+}
+.chalk-tray{
+  position:relative;
+  height:24px;
+  background:linear-gradient(180deg,#3a2010 0%,#5c3a1e 40%,#7a4a20 100%);
+  border-top:2px solid #8b5c2a;
+  display:flex;
+  align-items:center;
+  padding:0 22px;
+  gap:12px;
+  flex-shrink:0;
+  margin-top:auto;
+  z-index:2;
+}
+.chalk-stick{height:11px;border-radius:4px;opacity:.9}
+.board{
+  position:relative;
+  flex:1;
+  background:radial-gradient(ellipse at 25% 15%,#3a7030 0%,transparent 55%),
+              radial-gradient(ellipse at 80% 85%,rgba(70,110,50,.35) 0%,transparent 50%),
+              #2d5a27;
+  border-radius:2px;
+  overflow:hidden;
+  padding:16px 18px 14px;
+  display:flex;
+  flex-direction:column;
+}
+/* chalk filter */
+.chalk{filter:url(#chalk-filter)}
+/* text elements */
+.ch-title{font-family:'Caveat',cursive;font-size:26px;font-weight:700;color:var(--chalk-yellow);text-align:center;letter-spacing:1.5px;text-shadow:0 0 16px rgba(247,224,106,.4);opacity:0;animation:fadeChalk .5s ease both .1s}
+.ch-sub{font-family:'Patrick Hand',cursive;font-size:12px;color:var(--chalk-orange);text-align:center;letter-spacing:1px;margin-top:2px;opacity:0;animation:fadeChalk .5s ease both .2s}
+.ch-divider{height:2px;background:var(--chalk-white);border-radius:2px;margin:10px 4px 12px;opacity:0;transform-origin:left;animation:drawLine .7s ease both .35s}
+.ch-label{font-family:'Patrick Hand',cursive;font-size:11.5px;font-weight:600;letter-spacing:2px;text-transform:uppercase;margin-bottom:6px;opacity:0;animation:fadeChalk .5s ease both}
+.ch-list{list-style:none;padding:0;margin:0}
+.ch-list li{font-family:'Indie Flower',cursive;font-size:15px;line-height:1.4;padding-left:22px;position:relative;margin-bottom:3px;color:var(--chalk-white);opacity:0;animation:fadeChalk .5s ease both}
+.ch-list li::before{content:attr(data-b);position:absolute;left:2px;color:var(--chalk-yellow)}
+.ch-formula{font-family:'Caveat',cursive;font-size:18px;font-weight:600;color:var(--chalk-yellow);text-align:center;border:2px dashed rgba(245,240,232,.35);border-radius:6px;padding:7px 10px;margin:8px 0 6px;opacity:0;animation:fadeChalk .5s ease both}
+.ch-footnote{font-family:'Patrick Hand',cursive;font-size:11.5px;color:rgba(245,240,232,.55);font-style:italic;text-align:center;padding-top:4px;opacity:0;animation:fadeChalk .5s ease both}
+.yellow{color:var(--chalk-yellow)}.pink{color:var(--chalk-pink)}.blue{color:var(--chalk-blue)}.orange{color:var(--chalk-orange)}
+.ul{border-bottom:2px solid currentColor;padding-bottom:1px}
+/* SVG draw animation */
+.draw{stroke-dasharray:2000;stroke-dashoffset:2000;animation:drawPath var(--dur,2s) ease forwards var(--delay,0s)}
+.draw-fast{stroke-dasharray:600;stroke-dashoffset:600;animation:drawPath var(--dur,.8s) ease forwards var(--delay,0s)}
+.pop{opacity:0;animation:popIn .3s ease both var(--delay,0s)}
+@keyframes drawPath{to{stroke-dashoffset:0}}
+@keyframes fadeChalk{from{opacity:0;transform:translateY(4px)}to{opacity:1;transform:translateY(0)}}
+@keyframes drawLine{from{transform:scaleX(0);opacity:0}to{transform:scaleX(1);opacity:.5}}
+@keyframes popIn{from{opacity:0;transform:scale(.5)}to{opacity:1;transform:scale(1)}}
+.d1{animation-delay:.15s!important}.d2{animation-delay:.3s!important}.d3{animation-delay:.5s!important}
+.d4{animation-delay:.65s!important}.d5{animation-delay:.8s!important}.d6{animation-delay:.95s!important}
+.d7{animation-delay:1.1s!important}.d8{animation-delay:1.25s!important}.d9{animation-delay:1.4s!important}
+.d10{animation-delay:1.55s!important}.d11{animation-delay:1.7s!important}.d12{animation-delay:1.85s!important}
+.d13{animation-delay:2.0s!important}.d14{animation-delay:2.2s!important}.d15{animation-delay:2.4s!important}
+.d16{animation-delay:2.6s!important}.d17{animation-delay:2.8s!important}.d18{animation-delay:3.0s!important}
+.d19{animation-delay:3.2s!important}.d20{animation-delay:3.4s!important}
+</style>
+</head>
+<body>
+<svg width="0" height="0" style="position:absolute">
+<defs>
+  <filter id="chalk-filter" x="-5%" y="-5%" width="110%" height="110%">
+    <feTurbulence type="fractalNoise" baseFrequency="0.65" numOctaves="3" result="noise"/>
+    <feDisplacementMap in="SourceGraphic" in2="noise" scale="1.5" xChannelSelector="R" yChannelSelector="G"/>
+  </filter>
+</defs>
+</svg>
+<div class="frame">
+ <div class="board">
+  {# ── TITLE BLOCK ── #}
+  <div class="ch-title chalk d1">{{ title | default("📉 Gradient Descent") }}</div>
+  <div class="ch-sub chalk d2">{{ subtitle | default("Machine Learning · How models learn") }}</div>
+  <div class="ch-divider chalk"></div>
+  {# ══════════════════════════════════════════════════════
+     DIAGRAM SLOT — full board width
+     Pass diagram_content (raw SVG/HTML) to fill.
+     Dimensions: full width, height: 300px
+  ══════════════════════════════════════════════════════ #}
+  <div style="width:100%;height:300px;flex-shrink:0;margin-bottom:10px;opacity:0;animation:fadeChalk .4s ease both .4s">
+    {%- if diagram_content is defined and diagram_content -%}
+      <div style="width:100%;height:100%;display:flex;align-items:center;justify-content:center;">
+        {{ diagram_content | safe }}
+      </div>
+    {%- else -%}
+      <div style="width:100%;height:100%;border:2px dashed rgba(245,240,232,.25);border-radius:6px;display:flex;flex-direction:column;align-items:center;justify-content:center;gap:10px;background:rgba(245,240,232,.03);filter:url(#chalk-filter);">
+        <svg width="72" height="60" viewBox="0 0 72 60" fill="none" xmlns="http://www.w3.org/2000/svg">
+          <line x1="8" y1="6" x2="8" y2="52" stroke="rgba(245,240,232,.3)" stroke-width="1.8" stroke-linecap="round"/>
+          <line x1="8" y1="52" x2="66" y2="52" stroke="rgba(245,240,232,.3)" stroke-width="1.8" stroke-linecap="round"/>
+          <path d="M10 44 C18 34,24 20,36 16 C46 13,54 15,60 24" stroke="rgba(160,196,244,.4)" stroke-width="2" fill="none" stroke-linecap="round"/>
+          <circle cx="36" cy="16" r="3.5" fill="rgba(247,224,106,.4)"/>
+        </svg>
+        <span style="font-family:'Patrick Hand',cursive;font-size:10px;color:rgba(245,240,232,.3);letter-spacing:2px;text-transform:uppercase;">diagram slot</span>
+        <span style="font-family:'Indie Flower',cursive;font-size:11px;color:rgba(245,240,232,.18);text-align:center;padding:0 20px;">pass <span style="color:rgba(160,196,244,.4)">diagram_content</span> to fill this area</span>
+      </div>
+    {%- endif -%}
+  </div>{# end diagram slot #}
+ </div>{# end board #}
+ {# Chalk tray #}
+ <div class="chalk-tray">
+  <div class="chalk-stick" style="width:54px;background:#f5f0e8"></div>
+  <div class="chalk-stick" style="width:40px;background:#f7e06a"></div>
+  <div class="chalk-stick" style="width:28px;background:#f4a0b0"></div>
+  <div class="chalk-stick" style="width:20px;background:#a0c4f4"></div>
+  <div class="chalk-stick" style="width:14px;background:#f4b87a"></div>
+ </div>
+</div>{# end frame #}
+</body>
+</html>

voices/app.txt ADDED Viewed

File without changes