Sanket17 commited on
Commit
b1ebd68
Β·
0 Parent(s):

initial commit

Browse files
Files changed (13) hide show
  1. .gitattributes +35 -0
  2. .gitignore +3 -0
  3. Dockerfile +42 -0
  4. README.md +59 -0
  5. app.py +99 -0
  6. avatar/app.txt +0 -0
  7. generate_chalkboard.py +340 -0
  8. logo/app.txt +0 -0
  9. narrate_and_render.py +523 -0
  10. requirements.txt +7 -0
  11. run_pipeline.py +564 -0
  12. template.html +173 -0
  13. voices/app.txt +0 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ *.png
3
+ *.wav
Dockerfile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ ffmpeg \
7
+ curl \
8
+ wget \
9
+ gnupg \
10
+ libnss3 \
11
+ libnspr4 \
12
+ libatk1.0-0 \
13
+ libatk-bridge2.0-0 \
14
+ libcups2 \
15
+ libdrm2 \
16
+ libdbus-1-3 \
17
+ libxkbcommon0 \
18
+ libxcomposite1 \
19
+ libxdamage1 \
20
+ libxrandr2 \
21
+ libgbm1 \
22
+ libxss1 \
23
+ libasound2 \
24
+ libatspi2.0-0 \
25
+ libwayland-client0 \
26
+ fonts-liberation \
27
+ libappindicator3-1 \
28
+ && rm -rf /var/lib/apt/lists/*
29
+
30
+ COPY requirements.txt .
31
+ RUN pip install --no-cache-dir -r requirements.txt
32
+ RUN playwright install chromium
33
+
34
+ COPY . .
35
+
36
+ ENV PYTHONUNBUFFERED=1
37
+ ENV GRADIO_SERVER_NAME=0.0.0.0
38
+ ENV GRADIO_SERVER_PORT=7860
39
+
40
+ EXPOSE 7860
41
+
42
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ByteBrain Hugging Face App
2
+
3
+ This app generates a chalkboard-style MP4 from a topic using a simple Gradio UI.
4
+
5
+ ## UI
6
+
7
+ - Logo: `bytebrain/logo/logo.png`
8
+ - Input: topic text
9
+ - Output: generated video + logs
10
+
11
+ ## Output Location
12
+
13
+ All generated files are written under temporary storage:
14
+
15
+ - Linux/Hugging Face: `/tmp/bytebrain-output`
16
+ - Local Windows/macOS: system temp directory + `bytebrain-output`
17
+
18
+ You can override with:
19
+
20
+ - `PIPELINE_OUTPUT_DIR=/your/path`
21
+
22
+ ## Required Environment Variables
23
+
24
+ - `GEMINI_API_KEY`
25
+ - `OPENAI_API_KEY`
26
+ - `HF_TOKEN` (recommended)
27
+
28
+ Optional:
29
+
30
+ - `HF_SPACE` (defaults to `banao-tech/vibe-voice-custom-voices`)
31
+ - `OPENAI_TRANSCRIBE_MODEL` (defaults to `gpt-4o-mini-transcribe`)
32
+ - `NARRATION_MODEL` (defaults to `gemini-2.5-pro`)
33
+
34
+ ## Run Locally
35
+
36
+ ```bash
37
+ pip install -r requirements.txt
38
+ playwright install chromium
39
+ python app.py
40
+ ```
41
+
42
+ Open `http://localhost:7860`.
43
+
44
+ ## Docker
45
+
46
+ ```bash
47
+ docker build -t bytebrain-app .
48
+ docker run -p 7860:7860 \
49
+ -e GEMINI_API_KEY=your_key \
50
+ -e OPENAI_API_KEY=your_key \
51
+ -e HF_TOKEN=your_hf_token \
52
+ bytebrain-app
53
+ ```
54
+
55
+ ## Hugging Face Space
56
+
57
+ - Use Docker Space.
58
+ - Set the same environment secrets in Space settings.
59
+ - Entry command is already configured by `Dockerfile` (`python app.py`).
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py
3
+ ------
4
+ Gradio UI wrapper around run_pipeline.py.
5
+ Fixes Windows cp1252 UnicodeEncodeError by forcing UTF-8 in the subprocess.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import subprocess
11
+ import tempfile
12
+ from pathlib import Path
13
+ from datetime import datetime
14
+ import gradio as gr
15
+
16
+
17
+ HERE = Path(__file__).parent.resolve()
18
+ LOGO_PATH = HERE / "logo" / "logo.png"
19
+ RUN_PIPELINE = HERE / "run_pipeline.py"
20
+
21
+
22
+ def _slug(text: str) -> str:
23
+ return "".join(ch.lower() if ch.isalnum() else "_" for ch in text).strip("_")
24
+
25
+
26
+ def generate_video(topic: str):
27
+ topic = (topic or "").strip()
28
+ if not topic:
29
+ raise gr.Error("Please enter a topic.")
30
+
31
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
32
+ output_root = Path(tempfile.gettempdir()) / "bytebrain-output"
33
+ output_root.mkdir(parents=True, exist_ok=True)
34
+ output_video = output_root / f"{_slug(topic)}_{ts}_video.mp4"
35
+
36
+ env = os.environ.copy()
37
+ env["PIPELINE_OUTPUT_DIR"] = str(output_root)
38
+ # Force UTF-8 so Windows cp1252 never chokes on box-drawing / Devanagari chars
39
+ env["PYTHONIOENCODING"] = "utf-8"
40
+ env["PYTHONUTF8"] = "1"
41
+
42
+ cmd = [
43
+ sys.executable,
44
+ "-X", "utf8", # Python 3.7+ UTF-8 mode
45
+ str(RUN_PIPELINE),
46
+ topic,
47
+ "--output", str(output_video),
48
+ ]
49
+
50
+ proc = subprocess.run(
51
+ cmd,
52
+ capture_output=True,
53
+ encoding="utf-8", # decode stdout/stderr as UTF-8
54
+ errors="replace", # replace undecodable bytes instead of crashing
55
+ cwd=str(HERE),
56
+ env=env,
57
+ )
58
+ logs = (proc.stdout or "") + ("\n" + proc.stderr if proc.stderr else "")
59
+
60
+ if proc.returncode != 0:
61
+ tail = "\n".join(logs.strip().splitlines()[-50:])
62
+ raise gr.Error(f"Generation failed.\n{tail}")
63
+
64
+ if not output_video.exists():
65
+ raise gr.Error("Pipeline finished but output video file is missing.")
66
+
67
+ return str(output_video), logs
68
+
69
+
70
+ # ── Gradio UI ─────────────────────────────────────────────────────────────────
71
+
72
+ with gr.Blocks(title="ByteBrain Video Generator") as demo:
73
+ if LOGO_PATH.exists():
74
+ gr.Image(value=str(LOGO_PATH), show_label=False, width=140, height=140)
75
+ gr.Markdown("## ByteBrain -- Topic to Video")
76
+ gr.Markdown(
77
+ "Enter any ML/CS topic and get a chalkboard explainer video "
78
+ "with Hindi Trump-Modi narration."
79
+ )
80
+ topic_input = gr.Textbox(label="Topic", placeholder="e.g. Softmax Function")
81
+ generate_btn = gr.Button("Generate Video", variant="primary")
82
+ video_output = gr.Video(label="Generated Video")
83
+ logs_output = gr.Textbox(label="Pipeline Logs", lines=20, max_lines=40)
84
+
85
+ generate_btn.click(
86
+ fn=generate_video,
87
+ inputs=topic_input,
88
+ outputs=[video_output, logs_output],
89
+ )
90
+
91
+
92
+ if __name__ == "__main__":
93
+ demo.queue().launch(
94
+ server_name="0.0.0.0",
95
+ server_port=7860,
96
+ show_error=True,
97
+ share=True,
98
+ prevent_thread_lock=False,
99
+ )
avatar/app.txt ADDED
File without changes
generate_chalkboard.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ generate_chalkboard.py
3
+ ──────────────────────────────────────────────────────────────────────────────
4
+ Two-pass Gemini pipeline:
5
+ Pass 1 β€” structured JSON (title, bullets, formula, key terms ...)
6
+ Pass 2 β€” animated SVG diagram (chalk-style, topic-specific)
7
+
8
+ Both are injected into the Jinja2 HTML template to produce a complete board.
9
+
10
+ Usage
11
+ -----
12
+ python generate_chalkboard.py "Backpropagation"
13
+ python generate_chalkboard.py "Attention Mechanism" --save-json
14
+ python generate_chalkboard.py --from-json output/backprop_....json
15
+
16
+ Requirements
17
+ ------------
18
+ pip install google-genai jinja2 python-dotenv
19
+
20
+ .env
21
+ ----
22
+ GEMINI_API_KEY=your-key-here
23
+ """
24
+
25
+ import os
26
+ import re
27
+ import json
28
+ import argparse
29
+ from pathlib import Path
30
+ from datetime import datetime
31
+
32
+ try:
33
+ from dotenv import load_dotenv
34
+ load_dotenv()
35
+ except ImportError:
36
+ pass
37
+
38
+ from google import genai
39
+ from google.genai import types
40
+ from jinja2 import Environment, FileSystemLoader
41
+
42
+
43
+ # ── Config ────────────────────────────────────────────────────────────────────
44
+
45
+ MODEL = "gemini-2.5-pro"
46
+ TEMPLATE_FILE = "template.html"
47
+ TEMPLATE_DIR = Path(__file__).parent
48
+ OUTPUT_DIR = Path(__file__).parent / "output"
49
+
50
+
51
+ # ── Pass 1 prompt β€” structured content JSON ───────────────────────────────────
52
+
53
+ CONTENT_PROMPT = """
54
+ You are an expert ML/CS educator creating chalk-board explainers.
55
+ Output ONLY a valid JSON object -- no markdown fences, no prose, no extra keys.
56
+
57
+ Schema (follow exactly):
58
+ {
59
+ "title": "<emoji + topic name, <=40 chars>",
60
+ "subtitle": "<domain Β· tagline, <=60 chars>",
61
+ "idea_items": [
62
+ {"bullet": "->", "html": "<note with <span class='colour'> tags>"},
63
+ {"bullet": "->", "html": "..."},
64
+ {"bullet": "->", "html": "..."}
65
+ ],
66
+ "minima_label": "<section label <=30 chars>",
67
+ "minima_items": [
68
+ {"bullet": "x", "html": "<span class=\"pink ul\">bad thing</span> -- why bad"},
69
+ {"bullet": "v", "html": "<span class=\"yellow ul\">good thing</span> -- why good"}
70
+ ],
71
+ "formula": "<core equation, plain text + <span> ok>",
72
+ "key_terms": [
73
+ {"bullet": "<sym>", "html": "<span class=\"colour ul\">term</span> -- def"},
74
+ {"bullet": "<sym>", "html": "..."},
75
+ {"bullet": "<sym>", "html": "..."}
76
+ ],
77
+ "footnote": "* <tip <=80 chars>",
78
+ "extra_label": "<bottom-right label <=40 chars>",
79
+ "extra_sub": "<bottom-right hint <=60 chars>",
80
+ "diagram_hint": "<one sentence: what the diagram should visually show>"
81
+ }
82
+
83
+ Allowed colour classes: yellow pink blue orange
84
+ Underline class: ul (combine with colour, e.g. class="blue ul")
85
+ Only <span> tags inside html values. Output ONLY the JSON.
86
+ """.strip()
87
+
88
+
89
+ # ── Pass 2 prompt β€” animated SVG diagram ─────────────────────────────────────
90
+
91
+ DIAGRAM_PROMPT = """
92
+ You are an SVG animation expert creating chalk-style educational diagrams.
93
+
94
+ CONTEXT
95
+ -------
96
+ Topic : {topic}
97
+ Hint : {hint}
98
+ Colours :
99
+ --chalk-white #f5f0e8
100
+ --chalk-yellow #f7e06a
101
+ --chalk-pink #f4a0b0
102
+ --chalk-blue #a0c4f4
103
+ --chalk-orange #f4b87a
104
+ board bg #2d5a27 (dark green chalkboard)
105
+
106
+ OUTPUT RULES
107
+ ------------
108
+ - Output ONLY a raw <svg> element -- no wrapper, no markdown, no explanation.
109
+ - viewBox="0 0 354 300" width="354" height="300"
110
+ - style="filter:url(#chalk-filter)" (already defined in the page)
111
+ - All strokes/fills use the colour values above.
112
+ - Every line/path that "draws on" must use this pattern:
113
+ stroke-dasharray="<len>" stroke-dashoffset="<len>"
114
+ style="animation: drawOn <dur>s ease forwards <delay>s"
115
+ - Every element that pops in must use:
116
+ opacity="0" style="animation: popIn 0.3s ease forwards <delay>s"
117
+ - Use font-family="Patrick Hand, cursive" or "Caveat, cursive" for labels.
118
+ - Keep the diagram clear and readable -- axes, curves, nodes, arrows, labels.
119
+ - The diagram must be TOPIC-SPECIFIC and visually explain the concept.
120
+ - Total animation duration should be 5-8 seconds.
121
+ - Add a <style> block INSIDE the <svg> with ONLY these two keyframes
122
+ (do NOT redefine any other keyframe -- they already exist globally):
123
+ @keyframes drawOn {{ to {{ stroke-dashoffset: 0; }} }}
124
+ @keyframes popIn {{ from {{ opacity:0; transform:scale(.5); }} to {{ opacity:1; transform:scale(1); }} }}
125
+
126
+ Output the <svg>...</svg> block only. Nothing else.
127
+ """.strip()
128
+
129
+
130
+ # ── Gemini client ─────────────────────────────────────────────────────────────
131
+
132
+ def _get_client() -> genai.Client:
133
+ api_key = os.environ.get("GEMINI_API_KEY")
134
+ if not api_key:
135
+ print("[error] GEMINI_API_KEY not set. Add it to your .env file.")
136
+ raise SystemExit(1)
137
+ return genai.Client(api_key=api_key)
138
+
139
+
140
+ def _strip_fences(text: str) -> str:
141
+ text = re.sub(r"^```[a-z]*\n?", "", text.strip())
142
+ text = re.sub(r"\n?```$", "", text)
143
+ return text.strip()
144
+
145
+
146
+ # ── Pass 1 β€” content JSON ─────────────────────────────────────────────────────
147
+
148
+ def generate_content(topic: str) -> dict:
149
+ print(f"[pass1] Generating content JSON for: {topic!r} ...")
150
+ client = _get_client()
151
+ response = client.models.generate_content(
152
+ model=MODEL,
153
+ contents=f"{CONTENT_PROMPT}\n\nTopic: {topic}",
154
+ config=types.GenerateContentConfig(
155
+ temperature=0.7,
156
+ max_output_tokens=4096,
157
+ ),
158
+ )
159
+ raw = _strip_fences(response.text)
160
+ try:
161
+ data = json.loads(raw)
162
+ except json.JSONDecodeError as exc:
163
+ print("[error] Pass 1 did not return valid JSON:")
164
+ print(raw[:800])
165
+ raise SystemExit(1) from exc
166
+ print("[pass1] Content JSON OK")
167
+ return data
168
+
169
+
170
+ # ── Pass 2 β€” animated SVG ─────────────────────────────────────────────────────
171
+
172
+ def generate_diagram(topic: str, hint: str) -> str | None:
173
+ print("[pass2] Generating SVG diagram ...")
174
+ client = _get_client()
175
+ prompt = DIAGRAM_PROMPT.format(topic=topic, hint=hint)
176
+ response = client.models.generate_content(
177
+ model=MODEL,
178
+ contents=prompt,
179
+ config=types.GenerateContentConfig(
180
+ temperature=0.9,
181
+ max_output_tokens=8192,
182
+ ),
183
+ )
184
+ raw = _strip_fences(response.text)
185
+
186
+ if not raw.strip().startswith("<svg"):
187
+ match = re.search(r"(<svg[\s\S]+?</svg>)", raw, re.IGNORECASE)
188
+ if match:
189
+ raw = match.group(1)
190
+ else:
191
+ print("[warn] Pass 2 did not return a valid <svg>. Diagram will use placeholder.")
192
+ return None
193
+
194
+ print("[pass2] SVG diagram OK")
195
+ return raw
196
+
197
+
198
+ # ── Jinja rendering ───────────────────────────────────────────────────────────
199
+
200
+ def render_template(ctx: dict, template_file: str = TEMPLATE_FILE) -> str:
201
+ template_path = TEMPLATE_DIR / template_file
202
+ if not template_path.exists():
203
+ raise FileNotFoundError(
204
+ f"Template not found: {template_path}\n"
205
+ f"Ensure '{template_file}' is in the same folder as this script."
206
+ )
207
+ env = Environment(
208
+ loader=FileSystemLoader(str(TEMPLATE_DIR)),
209
+ autoescape=False,
210
+ )
211
+ return env.get_template(template_file).render(**ctx)
212
+
213
+
214
+ # ── Output helpers ────────────────────────────────────────────────────────────
215
+
216
+ def save_html(html: str, path: Path) -> None:
217
+ path.parent.mkdir(parents=True, exist_ok=True)
218
+ path.write_text(html, encoding="utf-8")
219
+ print(f"[out] HTML -> {path}")
220
+
221
+
222
+ def save_json(data: dict, path: Path) -> None:
223
+ path.parent.mkdir(parents=True, exist_ok=True)
224
+ path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
225
+ print(f"[out] JSON -> {path}")
226
+
227
+
228
+ # ── Build Jinja context ───────────────────────────────────────────────────────
229
+
230
+ def build_context(data: dict, diagram_svg: str | None) -> dict:
231
+
232
+ def render_list(items: list) -> str:
233
+ lis = []
234
+ for item in items:
235
+ bullet = item.get("bullet", "->")
236
+ html = item.get("html", "")
237
+ lis.append(
238
+ f'<li data-b="{bullet}" '
239
+ f'style="font-family:\'Indie Flower\',cursive;font-size:15px;'
240
+ f'line-height:1.4;padding-left:22px;position:relative;'
241
+ f'margin-bottom:3px;color:var(--chalk-white);">'
242
+ f'{html}</li>'
243
+ )
244
+ return "\n".join(lis)
245
+
246
+ return {
247
+ # Header
248
+ "title": data.get("title", "Topic"),
249
+ "subtitle": data.get("subtitle", ""),
250
+
251
+ # Top-right side notes
252
+ "idea_items_html": render_list(data.get("idea_items", [])),
253
+ "minima_label": data.get("minima_label", "Key Contrast"),
254
+ "minima_items_html": render_list(data.get("minima_items", [])),
255
+
256
+ # Bottom-left
257
+ "formula": data.get("formula", ""),
258
+ "key_terms_html": render_list(data.get("key_terms", [])),
259
+ "footnote": data.get("footnote", ""),
260
+
261
+ # Bottom-right placeholder
262
+ "extra_label": data.get("extra_label", "Your content here"),
263
+ "extra_sub": data.get("extra_sub", "Pass extra_content to fill"),
264
+ "extra_content": None,
265
+
266
+ # Diagram slot -- filled by Pass 2 SVG (or None -> placeholder)
267
+ "diagram_label": data.get("diagram_label", "Diagram"),
268
+ "diagram_sub": data.get("diagram_sub", ""),
269
+ "diagram_content": diagram_svg,
270
+ }
271
+
272
+
273
+ # ── CLI ───────────────────────────────────────────────────────────────────────
274
+
275
+ def parse_args():
276
+ parser = argparse.ArgumentParser(
277
+ description="Generate a full chalk-board HTML explainer via Gemini (2-pass)."
278
+ )
279
+ group = parser.add_mutually_exclusive_group(required=True)
280
+ group.add_argument("topic", nargs="?",
281
+ help="Topic to explain e.g. 'Backpropagation'")
282
+ group.add_argument("--from-json", metavar="FILE",
283
+ help="Skip Pass 1 -- load content from JSON file (still runs Pass 2)")
284
+
285
+ parser.add_argument("--output", "-o", metavar="FILE",
286
+ help="Output HTML path (default: output/<slug>_<ts>.html)")
287
+ parser.add_argument("--save-json", action="store_true",
288
+ help="Save raw content JSON alongside the HTML")
289
+ parser.add_argument("--no-diagram", action="store_true",
290
+ help="Skip Pass 2 -- leave diagram as placeholder")
291
+ parser.add_argument("--template", default=TEMPLATE_FILE,
292
+ help=f"Jinja2 template file (default: {TEMPLATE_FILE})")
293
+ return parser.parse_args()
294
+
295
+
296
+ def slug(text: str) -> str:
297
+ return re.sub(r"[^a-z0-9]+", "_", text.lower()).strip("_")
298
+
299
+
300
+ # ── Main ──────────────────────────────────────────────────────────────────────
301
+
302
+ def main():
303
+ args = parse_args()
304
+
305
+ # -- Pass 1: content JSON
306
+ if args.from_json:
307
+ json_path = Path(args.from_json)
308
+ print(f"[load] Reading JSON from {json_path} ...")
309
+ raw_data = json.loads(json_path.read_text(encoding="utf-8"))
310
+ topic = raw_data.get("title", json_path.stem)
311
+ topic_slug = slug(topic)
312
+ else:
313
+ topic = args.topic
314
+ raw_data = generate_content(topic)
315
+ topic_slug = slug(topic)
316
+
317
+ if args.save_json and not args.from_json:
318
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
319
+ save_json(raw_data, OUTPUT_DIR / f"{topic_slug}_{ts}.json")
320
+
321
+ # -- Pass 2: animated SVG diagram
322
+ diagram_svg = None
323
+ if not args.no_diagram:
324
+ hint = raw_data.get("diagram_hint", f"A visual diagram explaining {topic}")
325
+ diagram_svg = generate_diagram(topic, hint)
326
+
327
+ # -- Build context + render
328
+ ctx = build_context(raw_data, diagram_svg)
329
+ html = render_template(ctx, template_file=args.template)
330
+
331
+ # -- Save output
332
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
333
+ out_path = Path(args.output) if args.output else OUTPUT_DIR / f"{topic_slug}_{ts}.html"
334
+ save_html(html, out_path)
335
+
336
+ print(f"\nDone! Open in browser -> {out_path.resolve()}")
337
+
338
+
339
+ if __name__ == "__main__":
340
+ main()
logo/app.txt ADDED
File without changes
narrate_and_render.py ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ narrate_and_render.py
3
+ ──────────────────────────────────────────────────────────────────────────────
4
+ Full pipeline:
5
+ 1. generate_narration() -- Gemini writes a funny Trump vs Modi
6
+ Hindi dialogue for the given topic
7
+ 2. generate_audio() -- sends each dialogue line to the HuggingFace
8
+ Gradio TTS (banao-tech/vibe-voice-custom-voices)
9
+ using speaker voice files you provide
10
+ 3. render_html_frames() -- Playwright opens the generated HTML board,
11
+ takes a screenshot per dialogue beat
12
+ 4. build_video() -- FFmpeg stitches frames + per-line audio into
13
+ the final MP4
14
+
15
+ Usage
16
+ -----
17
+ python narrate_and_render.py \\
18
+ --html output/softmax_20260329.html \\
19
+ --topic "Softmax Function" \\
20
+ --voice-trump voices/trump.wav \\
21
+ --voice-modi voices/modi.wav \\
22
+ --output final/softmax_video.mp4
23
+
24
+ Requirements
25
+ ------------
26
+ pip install google-genai gradio_client playwright python-dotenv
27
+ playwright install chromium
28
+
29
+ .env
30
+ ----
31
+ GEMINI_API_KEY=your-key-here
32
+ """
33
+
34
+ import os
35
+ import re
36
+ import json
37
+ import shutil
38
+ import argparse
39
+ import subprocess
40
+ import uuid
41
+ from pathlib import Path
42
+ from datetime import datetime
43
+
44
+ try:
45
+ from dotenv import load_dotenv
46
+ load_dotenv()
47
+ except ImportError:
48
+ pass
49
+
50
+ # ── lazy imports ──────────────────────────────────────────────────────────────
51
+
52
+ def _require(module: str, pip_name: str = None):
53
+ import importlib
54
+ try:
55
+ return importlib.import_module(module)
56
+ except ImportError:
57
+ pkg = pip_name or module
58
+ raise SystemExit(
59
+ f"[error] Missing package: '{pkg}'\n"
60
+ f" Run: pip install {pkg}"
61
+ )
62
+
63
+
64
+ # ── Config ────────────────────────────────────────────────────────────────────
65
+
66
+ HF_SPACE = "banao-tech/vibe-voice-custom-voices"
67
+ NARRATION_MODEL = os.environ.get("NARRATION_MODEL", "gemini-2.5-pro")
68
+ BOARD_WIDTH = 414
69
+ BOARD_HEIGHT = 736
70
+
71
+
72
+ # ══════════════════════════════════════════════════════════════════════════════
73
+ # STEP 1 -- Generate Hindi Trump vs Modi narration via Gemini
74
+ # ══════════════════════════════════════════════════════════════════════════════
75
+
76
+ NARRATION_PROMPT = """
77
+ You are a satire comedy writer. Create a sarcastically funny Hindi dialogue between Trump [1] and Modi [2]
78
+ explaining the given ML/CS topic. Rules:
79
+ - Exactly 8-10 lines total, alternating [1] and [2]
80
+ - Output lines primarily in Hindi (Devanagari script), allowing English only for technical terms
81
+ - Trump is overconfident, often confused, and slightly dim-witted in a playful way; keep it witty and non-hateful
82
+ - Modi explains patiently with desi analogies
83
+ - Use most indian way of comedy, not english, also not include fake news term here
84
+ - Tone should be sarcastic and punchy, not plain funny
85
+ - Each line MAX 25 words
86
+ - End with both understanding the concept
87
+ - Output ONLY the dialogue lines, one per line, exactly in this format:
88
+ [1]: <line>
89
+ [2]: <line>
90
+ No extra text, no intro, no outro.
91
+
92
+ IMPORTANT: BOTH MUST SPEAK IN HINDI LANGUAGE. ONLY TECHNICAL TERMS OR JARGON IS ALLOWED.
93
+
94
+ Example style:
95
+ [1]: Modi bhai, yeh Gradient Descent kya hai? Kuch samajh nahi aaya!
96
+ [2]: Are Trump bhai! Socho -- ek pahad hai, gend ko neeche pahunchana hai.
97
+ [1]: Neeche kyun Modi? Main toh TOP pe rehta hoon -- America First!
98
+ [2]: Yahan ulta hai! Neeche matlab Loss kam hai -- yahi ML ka khel hai!
99
+ """.strip()
100
+
101
+
102
+ def generate_narration(topic: str) -> list[dict]:
103
+ """
104
+ Returns list of: [{"speaker": 1, "line": "..."}, ...]
105
+ speaker 1 = Trump, speaker 2 = Modi
106
+ """
107
+ from google import genai as google_genai
108
+ from google.genai import types as google_types
109
+
110
+ api_key = os.environ.get("GEMINI_API_KEY")
111
+ if not api_key:
112
+ raise SystemExit("[error] GEMINI_API_KEY not set in .env")
113
+
114
+ client = google_genai.Client(api_key=api_key)
115
+ print(f"[narr] Generating Hindi dialogue for: {topic!r} ...")
116
+ prompt = f"{NARRATION_PROMPT}\n\nTopic: {topic}"
117
+
118
+ raw = ""
119
+ last_error = None
120
+ for model_name in [NARRATION_MODEL, "gemini-2.0-flash"]:
121
+ try:
122
+ response = client.models.generate_content(
123
+ model=model_name,
124
+ contents=prompt,
125
+ config=google_types.GenerateContentConfig(
126
+ temperature=0.9,
127
+ max_output_tokens=800,
128
+ ),
129
+ )
130
+ raw = (response.text or "").strip()
131
+ if raw:
132
+ break
133
+ except Exception as e:
134
+ last_error = e
135
+ print(f"[warn] Narration model '{model_name}' failed: {e}")
136
+
137
+ if not raw:
138
+ raise SystemExit(f"[error] Narration generation failed: {last_error}")
139
+
140
+ lines = []
141
+ for raw_line in raw.splitlines():
142
+ raw_line = raw_line.strip()
143
+ m = re.match(r"\[([12])\]:\s*(.+)", raw_line)
144
+ if m:
145
+ lines.append({"speaker": int(m.group(1)), "line": m.group(2).strip()})
146
+
147
+ if not lines:
148
+ raise SystemExit("[error] Gemini returned no parseable dialogue lines.")
149
+
150
+ print(f"[narr] {len(lines)} lines generated")
151
+ return lines
152
+
153
+
154
+ # ══════════════════════════════════════════════════════════════════════════════
155
+ # STEP 2 -- TTS via HuggingFace Gradio
156
+ # ══════════════════════════════════════════════════════════════════════════════
157
+
158
+ def generate_audio(
159
+ dialogue: list[dict],
160
+ voice_trump: str,
161
+ voice_modi: str,
162
+ audio_dir: Path,
163
+ hf_space: str,
164
+ hf_token: str | None,
165
+ ) -> list[Path]:
166
+ """
167
+ Sends each dialogue line to the TTS Gradio space.
168
+ Returns list of audio file paths in dialogue order.
169
+ """
170
+ gradio_client = _require("gradio_client")
171
+ Client = gradio_client.Client
172
+ handle_file = gradio_client.handle_file
173
+
174
+ audio_dir.mkdir(parents=True, exist_ok=True)
175
+
176
+ print(f"[tts] Connecting to HuggingFace space: {hf_space} ...")
177
+ client_kwargs = {}
178
+ if hf_token:
179
+ client_kwargs["hf_token"] = hf_token
180
+ try:
181
+ client = Client(hf_space, **client_kwargs)
182
+ except TypeError:
183
+ client = Client(hf_space)
184
+
185
+ trump_voice = handle_file(voice_trump)
186
+ modi_voice = handle_file(voice_modi)
187
+
188
+ audio_paths = []
189
+ for i, entry in enumerate(dialogue):
190
+ speaker = entry["speaker"]
191
+ text = entry["line"]
192
+ out_file = audio_dir / f"line_{i+1:02d}_spk{speaker}.wav"
193
+
194
+ spk1 = trump_voice if speaker == 1 else modi_voice
195
+ spk2 = modi_voice if speaker == 1 else trump_voice
196
+
197
+ print(f"[tts] Line {i+1}/{len(dialogue)} (Speaker {speaker}): {text[:50]}...")
198
+
199
+ try:
200
+ result = client.predict(
201
+ text=text,
202
+ speaker1_audio_path=spk1,
203
+ speaker2_audio_path=spk2,
204
+ speaker3_audio_path=spk1,
205
+ speaker4_audio_path=spk2,
206
+ seed=42,
207
+ diffusion_steps=20,
208
+ cfg_scale=1.3,
209
+ use_sampling=False,
210
+ temperature=0.95,
211
+ top_p=0.95,
212
+ max_words_per_chunk=250,
213
+ api_name="/generate_speech_gradio",
214
+ )
215
+ if isinstance(result, dict):
216
+ src = result.get("value") or result.get("path") or result.get("name")
217
+ else:
218
+ src = result
219
+
220
+ shutil.copy2(src, out_file)
221
+ print(f"[tts] Saved -> {out_file}")
222
+ except Exception as exc:
223
+ print(f"[warn] TTS failed for line {i+1}: {exc}")
224
+ _silent_wav(out_file, duration=2)
225
+
226
+ audio_paths.append(out_file)
227
+
228
+ print(f"[tts] All {len(audio_paths)} audio files ready")
229
+ return audio_paths
230
+
231
+
232
+ def _silent_wav(path: Path, duration: int = 2):
233
+ subprocess.run([
234
+ "ffmpeg", "-y", "-f", "lavfi",
235
+ "-i", f"anullsrc=r=44100:cl=stereo",
236
+ "-ar", "44100",
237
+ "-t", str(duration),
238
+ str(path),
239
+ ], capture_output=True)
240
+
241
+
242
+ # ══════════════════════════════════════════════════════════════════════════════
243
+ # STEP 3 -- Playwright: render HTML board -> screenshots per dialogue beat
244
+ # ══════════════════════════════════════════════════════════════════════════════
245
+
246
+ def render_html_frames(
247
+ html_path: Path,
248
+ dialogue: list[dict],
249
+ audio_paths: list[Path],
250
+ frames_dir: Path,
251
+ ) -> list[Path]:
252
+ pw_module = _require("playwright.sync_api", "playwright")
253
+ sync_playwright = pw_module.sync_playwright
254
+
255
+ frames_dir.mkdir(parents=True, exist_ok=True)
256
+ frame_paths = []
257
+
258
+ durations = []
259
+ for ap in audio_paths:
260
+ dur = _get_audio_duration(ap)
261
+ durations.append(dur if dur else 3.0)
262
+
263
+ html_url = html_path.resolve().as_uri()
264
+ print(f"[frames] Launching Playwright -> {html_url}")
265
+
266
+ with sync_playwright() as p:
267
+ browser = p.chromium.launch()
268
+ page = browser.new_page(
269
+ viewport={"width": BOARD_WIDTH, "height": BOARD_HEIGHT}
270
+ )
271
+ page.goto(html_url, wait_until="networkidle")
272
+ page.wait_for_timeout(8500)
273
+
274
+ page.add_style_tag(content="""
275
+ #subtitle-overlay {
276
+ position: fixed;
277
+ bottom: 54px;
278
+ left: 50%;
279
+ transform: translateX(-50%);
280
+ width: 88%;
281
+ background: rgba(0,0,0,0.72);
282
+ border-radius: 8px;
283
+ padding: 8px 14px;
284
+ font-family: 'Caveat', cursive;
285
+ font-size: 15px;
286
+ color: #f5f0e8;
287
+ text-align: center;
288
+ line-height: 1.5;
289
+ z-index: 9999;
290
+ display: none;
291
+ border: 1px solid rgba(245,240,232,0.2);
292
+ }
293
+ #subtitle-overlay.visible { display: block; }
294
+ #subtitle-speaker {
295
+ font-size: 11px;
296
+ letter-spacing: 1.5px;
297
+ text-transform: uppercase;
298
+ margin-bottom: 3px;
299
+ opacity: 0.6;
300
+ }
301
+ """)
302
+
303
+ page.evaluate("""
304
+ const div = document.createElement('div');
305
+ div.id = 'subtitle-overlay';
306
+ div.innerHTML = '<div id="subtitle-speaker"></div><div id="subtitle-text"></div>';
307
+ document.body.appendChild(div);
308
+ """)
309
+
310
+ for i, (entry, audio_path, duration) in enumerate(
311
+ zip(dialogue, audio_paths, durations)
312
+ ):
313
+ speaker_name = "Trump" if entry["speaker"] == 1 else "Modi"
314
+ line_text = entry["line"]
315
+
316
+ page.evaluate(f"""
317
+ document.getElementById('subtitle-speaker').textContent = {json.dumps(speaker_name)};
318
+ document.getElementById('subtitle-text').textContent = {json.dumps(line_text)};
319
+ document.getElementById('subtitle-overlay').classList.add('visible');
320
+ """)
321
+
322
+ frame_path = frames_dir / f"frame_{i+1:03d}.png"
323
+ page.screenshot(path=str(frame_path), full_page=False)
324
+ frame_paths.append(frame_path)
325
+ print(f"[frames] Frame {i+1}/{len(dialogue)} -> {frame_path.name} ({duration:.1f}s)")
326
+
327
+ page.evaluate("document.getElementById('subtitle-overlay').classList.remove('visible')")
328
+ outro_path = frames_dir / f"frame_{len(dialogue)+1:03d}.png"
329
+ page.screenshot(path=str(outro_path), full_page=False)
330
+ frame_paths.append(outro_path)
331
+
332
+ browser.close()
333
+
334
+ print(f"[frames] {len(frame_paths)} frames rendered")
335
+ return frame_paths
336
+
337
+
338
+ def _get_audio_duration(path: Path) -> float | None:
339
+ try:
340
+ result = subprocess.run(
341
+ ["ffprobe", "-v", "error", "-show_entries", "format=duration",
342
+ "-of", "default=noprint_wrappers=1:nokey=1", str(path)],
343
+ capture_output=True, text=True
344
+ )
345
+ return float(result.stdout.strip())
346
+ except Exception:
347
+ return None
348
+
349
+
350
+ # ══════════════════════════════════════════════════════════════════════════════
351
+ # STEP 4 -- FFmpeg: stitch frames + audio -> MP4
352
+ # ══════════════════════════════════════════════════════════════════════════════
353
+
354
+ def build_video(
355
+ frame_paths: list[Path],
356
+ audio_paths: list[Path],
357
+ durations: list[float],
358
+ output_path: Path,
359
+ fps: int = 24,
360
+ ):
361
+ output_path.parent.mkdir(parents=True, exist_ok=True)
362
+ tmp = output_path.parent / f"_tmp_segments_{uuid.uuid4().hex[:8]}"
363
+ tmp.mkdir(exist_ok=True)
364
+
365
+ segment_paths = []
366
+
367
+ for i, (frame, audio, dur) in enumerate(zip(frame_paths, audio_paths, durations)):
368
+ seg = tmp / f"seg_{i:03d}.mp4"
369
+ cmd = [
370
+ "ffmpeg", "-y",
371
+ "-loop", "1", "-i", str(frame),
372
+ "-i", str(audio),
373
+ "-c:v", "libx264", "-preset", "fast",
374
+ "-tune", "stillimage",
375
+ "-c:a", "aac", "-b:a", "192k",
376
+ "-pix_fmt", "yuv420p",
377
+ "-vf", f"scale={BOARD_WIDTH}:{BOARD_HEIGHT}:force_original_aspect_ratio=decrease,"
378
+ f"pad={BOARD_WIDTH}:{BOARD_HEIGHT}:(ow-iw)/2:(oh-ih)/2:color=black",
379
+ "-t", str(dur),
380
+ "-shortest",
381
+ str(seg),
382
+ ]
383
+ try:
384
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
385
+ except subprocess.CalledProcessError as exc:
386
+ raise SystemExit(
387
+ f"[error] FFmpeg segment encode failed at segment {i+1}.\n"
388
+ f"stderr:\n{exc.stderr}"
389
+ )
390
+ segment_paths.append(seg)
391
+ print(f"[video] Segment {i+1}/{len(audio_paths)} encoded ({dur:.1f}s)")
392
+
393
+ if len(frame_paths) > len(audio_paths):
394
+ outro_frame = frame_paths[-1]
395
+ outro_seg = tmp / "seg_outro.mp4"
396
+ cmd = [
397
+ "ffmpeg", "-y",
398
+ "-loop", "1", "-i", str(outro_frame),
399
+ "-f", "lavfi", "-i", "anullsrc=r=44100:cl=stereo",
400
+ "-c:v", "libx264", "-preset", "fast",
401
+ "-c:a", "aac", "-b:a", "192k",
402
+ "-pix_fmt", "yuv420p",
403
+ "-vf", f"scale={BOARD_WIDTH}:{BOARD_HEIGHT}:force_original_aspect_ratio=decrease,"
404
+ f"pad={BOARD_WIDTH}:{BOARD_HEIGHT}:(ow-iw)/2:(oh-ih)/2:color=black",
405
+ "-t", "2",
406
+ "-shortest",
407
+ str(outro_seg),
408
+ ]
409
+ try:
410
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
411
+ except subprocess.CalledProcessError as exc:
412
+ raise SystemExit(f"[error] FFmpeg outro encode failed.\nstderr:\n{exc.stderr}")
413
+ segment_paths.append(outro_seg)
414
+
415
+ concat_list = tmp / "concat.txt"
416
+ with open(concat_list, "w") as f:
417
+ for sp in segment_paths:
418
+ f.write(f"file '{sp.resolve()}'\n")
419
+
420
+ print(f"[video] Concatenating {len(segment_paths)} segments -> {output_path}")
421
+ cmd = [
422
+ "ffmpeg", "-y",
423
+ "-f", "concat", "-safe", "0",
424
+ "-i", str(concat_list),
425
+ "-c:v", "libx264", "-preset", "medium", "-crf", "20",
426
+ "-c:a", "aac", "-b:a", "192k", "-ar", "48000",
427
+ "-movflags", "+faststart",
428
+ str(output_path),
429
+ ]
430
+ subprocess.run(cmd, check=True)
431
+ print(f"[video] Final video -> {output_path.resolve()}")
432
+
433
+ shutil.rmtree(tmp, ignore_errors=True)
434
+
435
+
436
+ # ── CLI ───────────────────────────────────────────────────────────────────────
437
+
438
+ def parse_args():
439
+ parser = argparse.ArgumentParser(
440
+ description="Generate Hindi Trump-Modi narration, TTS audio, and render video from chalkboard HTML."
441
+ )
442
+ parser.add_argument("--html", required=True, help="Path to the generated chalkboard HTML file")
443
+ parser.add_argument("--topic", required=True, help="Topic name")
444
+ parser.add_argument("--voice-trump", required=True, help="WAV file for Trump's voice")
445
+ parser.add_argument("--voice-modi", required=True, help="WAV file for Modi's voice")
446
+ parser.add_argument("--output", default=None, help="Output MP4 path")
447
+ parser.add_argument("--save-script", action="store_true", help="Save dialogue script as JSON")
448
+ parser.add_argument("--fps", type=int, default=24, help="Output video FPS")
449
+ parser.add_argument("--hf-space", default=os.environ.get("HF_SPACE", HF_SPACE))
450
+ parser.add_argument("--hf-token", default=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN"))
451
+ parser.add_argument("--run-root", default="output")
452
+ parser.add_argument("--keep-workdir", action="store_true")
453
+ return parser.parse_args()
454
+
455
+
456
+ def slug(text: str) -> str:
457
+ return re.sub(r"[^a-z0-9]+", "_", text.lower()).strip("_")
458
+
459
+
460
+ def _resolve_html_path(raw_html_path: Path) -> Path:
461
+ if not raw_html_path.exists():
462
+ raise SystemExit(f"[error] HTML file not found: {raw_html_path.resolve()}")
463
+ return raw_html_path
464
+
465
+
466
+ def main():
467
+ args = parse_args()
468
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
469
+ topic_slug = slug(args.topic)
470
+
471
+ run_root = Path(args.run_root)
472
+ run_root.mkdir(parents=True, exist_ok=True)
473
+ html_path = _resolve_html_path(Path(args.html))
474
+
475
+ for label, vpath in [("--voice-trump", args.voice_trump), ("--voice-modi", args.voice_modi)]:
476
+ if not Path(vpath).exists():
477
+ raise SystemExit(f"[error] Voice file not found ({label}): {vpath}")
478
+
479
+ work_dir = run_root / f"_work_{topic_slug}_{ts}"
480
+ work_dir.mkdir(parents=True, exist_ok=True)
481
+
482
+ out_dir = run_root
483
+ output_path = Path(args.output) if args.output else out_dir / f"{topic_slug}_{ts}_video.mp4"
484
+
485
+ dialogue = generate_narration(args.topic)
486
+
487
+ if args.save_script:
488
+ script_path = out_dir / f"{topic_slug}_{ts}_script.json"
489
+ script_path.write_text(json.dumps(dialogue, ensure_ascii=False, indent=2), encoding="utf-8")
490
+ print(f"[narr] Script saved -> {script_path}")
491
+
492
+ print("\n-- Dialogue Script ------------------------------------------")
493
+ for entry in dialogue:
494
+ name = "Trump" if entry["speaker"] == 1 else "Modi "
495
+ print(f" [{name}]: {entry['line']}")
496
+ print("-------------------------------------------------------------\n")
497
+
498
+ audio_dir = work_dir / "audio"
499
+ audio_paths = generate_audio(
500
+ dialogue,
501
+ voice_trump=args.voice_trump,
502
+ voice_modi=args.voice_modi,
503
+ audio_dir=audio_dir,
504
+ hf_space=args.hf_space,
505
+ hf_token=args.hf_token,
506
+ )
507
+
508
+ frames_dir = work_dir / "frames"
509
+ frame_paths = render_html_frames(html_path, dialogue, audio_paths, frames_dir)
510
+
511
+ durations = [_get_audio_duration(ap) or 3.0 for ap in audio_paths]
512
+ build_video(frame_paths, audio_paths, durations, output_path, fps=args.fps)
513
+
514
+ if not args.keep_workdir:
515
+ shutil.rmtree(work_dir, ignore_errors=True)
516
+ else:
517
+ print(f"[debug] Work files kept at: {work_dir.resolve()}")
518
+
519
+ print(f"\nDone! Video saved -> {output_path.resolve()}")
520
+
521
+
522
+ if __name__ == "__main__":
523
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ python-dotenv
3
+ google-generativeai
4
+ jinja2
5
+ gradio_client
6
+ playwright
7
+ openai
run_pipeline.py ADDED
@@ -0,0 +1,564 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ run_pipeline.py
3
+ ──────────────────────────────────────────────────────────────────────────────
4
+ ONE command to rule them all.
5
+
6
+ Give it a topic -> get a fully rendered MP4 chalkboard video with
7
+ Hindi Trump-Modi narration.
8
+
9
+ Usage
10
+ -----
11
+ python run_pipeline.py "Gradient Descent"
12
+ python run_pipeline.py "Softmax Function" --voice-trump voices/trump.wav --voice-modi voices/modi.wav
13
+ python run_pipeline.py "Attention Mechanism" --no-diagram --keep-workdir
14
+
15
+ What it does internally
16
+ -----------------------
17
+ Step 1 generate_chalkboard.py Pass 1 -- Gemini generates title + diagram hint (JSON)
18
+ Step 2 generate_chalkboard.py Pass 2 -- Gemini generates animated SVG diagram
19
+ Step 3 Jinja2 renders HTML board -- SVG + title injected into template
20
+ Step 4 narrate_and_render.py Step 1 -- Gemini writes Hindi Trump-Modi dialogue
21
+ Step 5 narrate_and_render.py Step 2 -- HuggingFace TTS generates audio per line
22
+ Step 6 narrate_and_render.py Step 3 -- Playwright screenshots board per line
23
+ Step 7 narrate_and_render.py Step 4 -- FFmpeg stitches frames + audio -> MP4
24
+
25
+ File layout expected
26
+ --------------------
27
+ run_pipeline.py <- THIS FILE
28
+ template.html <- Jinja2 HTML template
29
+ generate_chalkboard.py <- chalkboard generator (imported)
30
+ narrate_and_render.py <- narration + video renderer (imported)
31
+ voices/
32
+ trump.wav <- Trump voice sample (default path)
33
+ modi.wav <- Modi voice sample (default path)
34
+ avatar/
35
+ trump.png <- Trump avatar image (default path)
36
+ modi.png <- Modi avatar image (default path)
37
+ output/ <- all outputs land here (auto-created)
38
+
39
+ Requirements
40
+ ------------
41
+ pip install google-genai jinja2 python-dotenv gradio_client playwright
42
+ playwright install chromium
43
+ # ffmpeg must be on PATH
44
+
45
+ .env
46
+ ----
47
+ GEMINI_API_KEY=your-gemini-key
48
+ HF_TOKEN=your-huggingface-token (optional but helps with rate limits)
49
+ """
50
+
51
+ import os
52
+ import sys
53
+ import json
54
+ import base64
55
+ import argparse
56
+ import threading
57
+ import socketserver
58
+ import http.server
59
+ import subprocess
60
+ import shutil
61
+ import tempfile
62
+ from pathlib import Path
63
+ from datetime import datetime
64
+
65
+ # Ensure UTF-8 output on Windows to avoid cp1252 UnicodeEncodeError
66
+ if sys.stdout.encoding and sys.stdout.encoding.lower() != "utf-8":
67
+ try:
68
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
69
+ except AttributeError:
70
+ pass
71
+ if sys.stderr.encoding and sys.stderr.encoding.lower() != "utf-8":
72
+ try:
73
+ sys.stderr.reconfigure(encoding="utf-8", errors="replace")
74
+ except AttributeError:
75
+ pass
76
+
77
+ try:
78
+ from dotenv import load_dotenv
79
+ load_dotenv()
80
+ except ImportError:
81
+ pass
82
+
83
+ # Make sure siblings are importable regardless of cwd
84
+ HERE = Path(__file__).parent.resolve()
85
+ sys.path.insert(0, str(HERE))
86
+
87
+ from generate_chalkboard import (
88
+ generate_content,
89
+ generate_diagram,
90
+ build_context,
91
+ render_template,
92
+ save_html,
93
+ save_json,
94
+ slug,
95
+ TEMPLATE_FILE,
96
+ )
97
+ from narrate_and_render import (
98
+ generate_narration,
99
+ generate_audio,
100
+ render_html_frames,
101
+ build_video,
102
+ _get_audio_duration,
103
+ _silent_wav,
104
+ HF_SPACE,
105
+ BOARD_WIDTH,
106
+ BOARD_HEIGHT,
107
+ )
108
+
109
+
110
+ # ── Config ────────────────────────────────────────────────────────────────────
111
+
112
+ OUTPUT_DIR = Path(os.environ.get("PIPELINE_OUTPUT_DIR", str(Path(tempfile.gettempdir()) / "bytebrain-output")))
113
+ DEFAULT_TRUMP = HERE / "voices" / "trump.wav"
114
+ DEFAULT_MODI = HERE / "voices" / "modi.wav"
115
+ DEFAULT_TRUMP_AVATAR = HERE / "avatar" / "trump.png"
116
+ DEFAULT_MODI_AVATAR = HERE / "avatar" / "modi.png"
117
+ HTTP_PORT = 8765
118
+
119
+
120
+ # ── Local HTTP server (fixes Google Fonts over file://) ──────────────────────
121
+
122
+ class _QuietHandler(http.server.SimpleHTTPRequestHandler):
123
+ def log_message(self, *args):
124
+ pass
125
+
126
+
127
+ def _start_http_server(directory: Path) -> socketserver.TCPServer:
128
+ os.chdir(directory)
129
+ httpd = socketserver.TCPServer(("", HTTP_PORT), _QuietHandler)
130
+ thread = threading.Thread(target=httpd.serve_forever, daemon=True)
131
+ thread.start()
132
+ print(f"[http] Local font server started on http://localhost:{HTTP_PORT}")
133
+ return httpd
134
+
135
+
136
+ # ── CLI ───────────────────────────────────────────────────────────────────────
137
+
138
+ def parse_args():
139
+ parser = argparse.ArgumentParser(
140
+ description="Full chalkboard video pipeline -- just give a topic."
141
+ )
142
+ parser.add_argument(
143
+ "topic",
144
+ help="ML/CS topic to explain, e.g. 'Gradient Descent'",
145
+ )
146
+ parser.add_argument(
147
+ "--voice-trump", default=str(DEFAULT_TRUMP),
148
+ help=f"WAV voice sample for Trump (default: {DEFAULT_TRUMP})",
149
+ )
150
+ parser.add_argument(
151
+ "--voice-modi", default=str(DEFAULT_MODI),
152
+ help=f"WAV voice sample for Modi (default: {DEFAULT_MODI})",
153
+ )
154
+ parser.add_argument(
155
+ "--output", "-o", default=None,
156
+ help="Final MP4 output path",
157
+ )
158
+ parser.add_argument(
159
+ "--no-diagram", action="store_true",
160
+ help="Skip SVG diagram generation (faster, uses placeholder)",
161
+ )
162
+ parser.add_argument(
163
+ "--save-json", action="store_true",
164
+ help="Save the intermediate content JSON",
165
+ )
166
+ parser.add_argument(
167
+ "--save-script", action="store_true",
168
+ help="Save the Hindi dialogue script as JSON",
169
+ )
170
+ parser.add_argument(
171
+ "--keep-workdir", action="store_true",
172
+ help="Keep intermediate frames/audio files for debugging",
173
+ )
174
+ parser.add_argument(
175
+ "--hf-space", default=os.environ.get("HF_SPACE", HF_SPACE),
176
+ help="HuggingFace Space ID for TTS",
177
+ )
178
+ parser.add_argument(
179
+ "--hf-token",
180
+ default=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
181
+ help="HuggingFace API token (optional)",
182
+ )
183
+ parser.add_argument(
184
+ "--avatar-trump", default=str(DEFAULT_TRUMP_AVATAR),
185
+ help=f"Trump avatar image path (default: {DEFAULT_TRUMP_AVATAR})",
186
+ )
187
+ parser.add_argument(
188
+ "--avatar-modi", default=str(DEFAULT_MODI_AVATAR),
189
+ help=f"Modi avatar image path (default: {DEFAULT_MODI_AVATAR})",
190
+ )
191
+ parser.add_argument(
192
+ "--openai-transcribe-model",
193
+ default=os.environ.get("OPENAI_TRANSCRIBE_MODEL", "gpt-4o-mini-transcribe"),
194
+ help="OpenAI transcription model for timing extraction",
195
+ )
196
+ return parser.parse_args()
197
+
198
+
199
+ # ── Helpers ───────────────────────────────────────────────────────────────────
200
+
201
+ def _image_to_data_uri(image_path: Path) -> str:
202
+ ext = image_path.suffix.lower()
203
+ mime = "image/png" if ext == ".png" else "image/jpeg"
204
+ data = base64.b64encode(image_path.read_bytes()).decode("ascii")
205
+ return f"data:{mime};base64,{data}"
206
+
207
+
208
+ def _silent_wav_stereo(path: Path, duration: int = 2):
209
+ subprocess.run([
210
+ "ffmpeg", "-y", "-f", "lavfi",
211
+ "-i", "anullsrc=r=44100:cl=stereo",
212
+ "-ar", "44100",
213
+ "-t", str(duration),
214
+ str(path),
215
+ ], capture_output=True)
216
+
217
+
218
+ def _concat_audio_tracks(audio_paths: list[Path], output_audio_path: Path) -> Path:
219
+ output_audio_path.parent.mkdir(parents=True, exist_ok=True)
220
+ concat_list = output_audio_path.parent / "audio_concat.txt"
221
+ with open(concat_list, "w", encoding="utf-8") as f:
222
+ for ap in audio_paths:
223
+ f.write(f"file '{ap.resolve()}'\n")
224
+ cmd = [
225
+ "ffmpeg", "-y",
226
+ "-f", "concat", "-safe", "0",
227
+ "-i", str(concat_list),
228
+ "-c", "copy",
229
+ str(output_audio_path),
230
+ ]
231
+ try:
232
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
233
+ except subprocess.CalledProcessError:
234
+ cmd = [
235
+ "ffmpeg", "-y",
236
+ "-f", "concat", "-safe", "0",
237
+ "-i", str(concat_list),
238
+ "-ar", "24000", "-ac", "1",
239
+ str(output_audio_path),
240
+ ]
241
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
242
+ return output_audio_path
243
+
244
+
245
+ def _mux_recorded_video_with_audio(recorded_video_path: Path, audio_path: Path, output_path: Path):
246
+ output_path.parent.mkdir(parents=True, exist_ok=True)
247
+ cmd = [
248
+ "ffmpeg", "-y",
249
+ "-i", str(recorded_video_path),
250
+ "-i", str(audio_path),
251
+ "-c:v", "libx264", "-preset", "medium", "-crf", "20",
252
+ "-pix_fmt", "yuv420p",
253
+ "-c:a", "aac", "-b:a", "192k",
254
+ "-shortest",
255
+ str(output_path),
256
+ ]
257
+ subprocess.run(cmd, check=True)
258
+
259
+
260
+ def _build_speaker_timeline_with_openai(
261
+ dialogue: list[dict],
262
+ audio_paths: list[Path],
263
+ durations: list[float | None],
264
+ transcribe_model: str,
265
+ ) -> list[dict]:
266
+ try:
267
+ openai_module = __import__("openai")
268
+ except Exception:
269
+ raise SystemExit("[error] openai package is required. Run: pip install openai")
270
+ OpenAI = openai_module.OpenAI
271
+ api_key = os.environ.get("OPENAI_API_KEY")
272
+ if not api_key:
273
+ raise SystemExit("[error] OPENAI_API_KEY is required for transcription-based avatar timing.")
274
+ client = OpenAI(api_key=api_key)
275
+
276
+ timeline = []
277
+ start_at = 0.0
278
+ for idx, entry in enumerate(dialogue):
279
+ dur = durations[idx] if idx < len(durations) and durations[idx] else 3.0
280
+ audio_path = audio_paths[idx] if idx < len(audio_paths) else None
281
+ transcript = ""
282
+ if audio_path and audio_path.exists():
283
+ try:
284
+ with audio_path.open("rb") as f:
285
+ result = client.audio.transcriptions.create(
286
+ model=transcribe_model,
287
+ file=f,
288
+ response_format="verbose_json",
289
+ timestamp_granularities=["word"],
290
+ )
291
+ transcript = getattr(result, "text", "") or ""
292
+ words = getattr(result, "words", None) or []
293
+ if words:
294
+ first_start = words[0].get("start", 0.0) if isinstance(words[0], dict) else getattr(words[0], "start", 0.0)
295
+ last_end = words[-1].get("end", 0.0) if isinstance(words[-1], dict) else getattr(words[-1], "end", 0.0)
296
+ dur = max(float(last_end) - float(first_start), 0.1)
297
+ except Exception as e:
298
+ print(f"[warn] OpenAI transcription failed for line {idx+1}: {e}")
299
+ end_at = start_at + float(dur)
300
+ timeline.append({
301
+ "index": idx + 1,
302
+ "speaker": "trump" if entry.get("speaker") == 1 else "modi",
303
+ "start_sec": round(start_at, 3),
304
+ "end_sec": round(end_at, 3),
305
+ "duration_sec": round(float(dur), 3),
306
+ "text": entry.get("line", ""),
307
+ "transcript": transcript,
308
+ })
309
+ start_at = end_at
310
+ return timeline
311
+
312
+
313
+ # ── Playwright recorder (accepts http:// URL directly) ────────────────────────
314
+
315
+ def _record_animation_via_http(
316
+ html_url: str,
317
+ timeline: list[dict],
318
+ work_dir: Path,
319
+ avatar_trump_path: Path,
320
+ avatar_modi_path: Path,
321
+ ) -> Path:
322
+ from narrate_and_render import _require
323
+
324
+ pw_module = _require("playwright.sync_api", "playwright")
325
+ sync_playwright = pw_module.sync_playwright
326
+
327
+ recording_dir = work_dir / "recordings"
328
+ recording_dir.mkdir(parents=True, exist_ok=True)
329
+
330
+ avatar_trump_data = _image_to_data_uri(avatar_trump_path)
331
+ avatar_modi_data = _image_to_data_uri(avatar_modi_path)
332
+
333
+ with sync_playwright() as p:
334
+ browser = p.chromium.launch()
335
+ context = browser.new_context(
336
+ viewport={"width": BOARD_WIDTH, "height": BOARD_HEIGHT},
337
+ record_video_dir=str(recording_dir),
338
+ record_video_size={"width": BOARD_WIDTH, "height": BOARD_HEIGHT},
339
+ )
340
+ page = context.new_page()
341
+ page.goto(html_url, wait_until="networkidle")
342
+ page.wait_for_timeout(8500)
343
+
344
+ page.add_style_tag(content="""
345
+ .speaker-avatar {
346
+ position: fixed;
347
+ bottom: 1px;
348
+ height: 100%;
349
+ max-width: 68%;
350
+ object-fit: contain;
351
+ object-position: bottom center;
352
+ z-index: 9998;
353
+ filter: drop-shadow(0 6px 16px rgba(0,0,0,.45));
354
+ }
355
+ #avatar-trump { left: 20px; }
356
+ #avatar-modi { right: 20px; }
357
+ .avatar-active { display: block; opacity: 1; }
358
+ .avatar-hidden { display: none; opacity: 0; }
359
+ """)
360
+
361
+ page.evaluate(f"""
362
+ const trump = document.createElement('img');
363
+ trump.id = 'avatar-trump';
364
+ trump.className = 'speaker-avatar avatar-hidden';
365
+ trump.src = {json.dumps(avatar_trump_data)};
366
+ document.body.appendChild(trump);
367
+
368
+ const modi = document.createElement('img');
369
+ modi.id = 'avatar-modi';
370
+ modi.className = 'speaker-avatar avatar-hidden';
371
+ modi.src = {json.dumps(avatar_modi_data)};
372
+ document.body.appendChild(modi);
373
+ """)
374
+
375
+ for item in timeline:
376
+ speaker = item.get("speaker", "trump")
377
+ duration_ms = int(max(0.1, float(item.get("duration_sec", 0.1))) * 1000)
378
+
379
+ page.evaluate(f"""
380
+ const trump = document.getElementById('avatar-trump');
381
+ const modi = document.getElementById('avatar-modi');
382
+ const active = {json.dumps(speaker)};
383
+ if (active === 'trump') {{
384
+ trump.classList.add('avatar-active');
385
+ trump.classList.remove('avatar-hidden');
386
+ modi.classList.add('avatar-hidden');
387
+ modi.classList.remove('avatar-active');
388
+ }} else {{
389
+ modi.classList.add('avatar-active');
390
+ modi.classList.remove('avatar-hidden');
391
+ trump.classList.add('avatar-hidden');
392
+ trump.classList.remove('avatar-active');
393
+ }}
394
+ """)
395
+ page.wait_for_timeout(duration_ms)
396
+
397
+ # Outro: hide avatars
398
+ page.evaluate("""
399
+ ['avatar-trump', 'avatar-modi'].forEach(id => {
400
+ const el = document.getElementById(id);
401
+ if (el) {
402
+ el.classList.add('avatar-hidden');
403
+ el.classList.remove('avatar-active');
404
+ }
405
+ });
406
+ """)
407
+ page.wait_for_timeout(600)
408
+
409
+ video_path_str = page.video.path()
410
+ page.close()
411
+ context.close()
412
+ browser.close()
413
+
414
+ raw_video_path = Path(video_path_str)
415
+ if not raw_video_path.exists():
416
+ raise SystemExit("[error] Playwright recording failed: no video file produced.")
417
+ return raw_video_path
418
+
419
+
420
+ # ── Main pipeline ─────────────────────────────────────────────────────────────
421
+
422
+ def main():
423
+ args = parse_args()
424
+ topic = args.topic
425
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
426
+ topic_slug = slug(topic)
427
+
428
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
429
+ work_dir = OUTPUT_DIR / f"_work_{topic_slug}_{ts}"
430
+ work_dir.mkdir(parents=True, exist_ok=True)
431
+
432
+ output_path = Path(args.output) if args.output \
433
+ else OUTPUT_DIR / f"{topic_slug}_{ts}_video.mp4"
434
+
435
+ print(f"\n{'='*60}")
436
+ print(f" TOPIC : {topic}")
437
+ print(f" OUTPUT: {output_path}")
438
+ print(f"{'='*60}\n")
439
+
440
+ # Validate voice files early
441
+ for label, vpath in [("--voice-trump", args.voice_trump), ("--voice-modi", args.voice_modi)]:
442
+ if not Path(vpath).exists():
443
+ raise SystemExit(
444
+ f"[error] Voice file not found ({label}): {Path(vpath).resolve()}\n"
445
+ f" Place WAV samples in voices/trump.wav and voices/modi.wav\n"
446
+ f" or pass --voice-trump / --voice-modi explicitly."
447
+ )
448
+
449
+ for label, ipath in [("--avatar-trump", args.avatar_trump), ("--avatar-modi", args.avatar_modi)]:
450
+ if not Path(ipath).exists():
451
+ raise SystemExit(
452
+ f"[error] Avatar image not found ({label}): {Path(ipath).resolve()}\n"
453
+ f" Place images in avatar/trump.png and avatar/modi.png\n"
454
+ f" or pass --avatar-trump / --avatar-modi explicitly."
455
+ )
456
+
457
+ # =========================================================================
458
+ # PHASE 1 -- Generate the chalkboard HTML
459
+ # =========================================================================
460
+ print("-- PHASE 1: Chalkboard HTML ----------------------------------\n")
461
+
462
+ raw_data = generate_content(topic)
463
+
464
+ if args.save_json:
465
+ save_json(raw_data, OUTPUT_DIR / f"{topic_slug}_{ts}.json")
466
+
467
+ diagram_svg = None
468
+ if not args.no_diagram:
469
+ hint = raw_data.get("diagram_hint", f"A visual diagram explaining {topic}")
470
+ diagram_svg = generate_diagram(topic, hint)
471
+
472
+ ctx = build_context(raw_data, diagram_svg)
473
+ html = render_template(ctx, template_file=TEMPLATE_FILE)
474
+
475
+ html_path = OUTPUT_DIR / f"{topic_slug}_{ts}.html"
476
+ save_html(html, html_path)
477
+
478
+ print(f"\n[phase1] HTML ready -> {html_path.name}\n")
479
+
480
+ # =========================================================================
481
+ # PHASE 2 -- Narration + audio
482
+ # =========================================================================
483
+ print("-- PHASE 2: Narration & TTS Audio ----------------------------\n")
484
+
485
+ dialogue = generate_narration(topic)
486
+
487
+ if args.save_script:
488
+ script_path = OUTPUT_DIR / f"{topic_slug}_{ts}_script.json"
489
+ script_path.write_text(
490
+ json.dumps(dialogue, ensure_ascii=False, indent=2), encoding="utf-8"
491
+ )
492
+ print(f"[narr] Script saved -> {script_path.name}")
493
+
494
+ print("\n-- Dialogue Script ------------------------------------------")
495
+ for entry in dialogue:
496
+ name = "Trump" if entry["speaker"] == 1 else "Modi "
497
+ print(f" [{name}]: {entry['line']}")
498
+ print("-------------------------------------------------------------\n")
499
+
500
+ audio_dir = work_dir / "audio"
501
+ audio_paths = generate_audio(
502
+ dialogue,
503
+ voice_trump=args.voice_trump,
504
+ voice_modi=args.voice_modi,
505
+ audio_dir=audio_dir,
506
+ hf_space=args.hf_space,
507
+ hf_token=args.hf_token,
508
+ )
509
+
510
+ durations = [_get_audio_duration(ap) for ap in audio_paths]
511
+ timeline = _build_speaker_timeline_with_openai(
512
+ dialogue,
513
+ audio_paths,
514
+ durations,
515
+ transcribe_model=args.openai_transcribe_model,
516
+ )
517
+ (work_dir / "speaker_timeline.json").write_text(
518
+ json.dumps(timeline, ensure_ascii=False, indent=2),
519
+ encoding="utf-8",
520
+ )
521
+
522
+ print(f"\n[phase2] {len(audio_paths)} audio clips ready\n")
523
+
524
+ # =========================================================================
525
+ # PHASE 3 -- Record animation via Playwright
526
+ # =========================================================================
527
+ print("-- PHASE 3: Recording Animation ------------------------------\n")
528
+
529
+ httpd = _start_http_server(OUTPUT_DIR)
530
+ html_url = f"http://localhost:{HTTP_PORT}/{html_path.name}"
531
+
532
+ recorded_video_path = _record_animation_via_http(
533
+ html_url,
534
+ timeline,
535
+ work_dir,
536
+ avatar_trump_path=Path(args.avatar_trump),
537
+ avatar_modi_path=Path(args.avatar_modi),
538
+ )
539
+
540
+ httpd.shutdown()
541
+ print(f"\n[phase3] Animation recorded -> {recorded_video_path.name}\n")
542
+
543
+ # =========================================================================
544
+ # PHASE 4 -- Build final MP4
545
+ # =========================================================================
546
+ print("-- PHASE 4: Building Video -----------------------------------\n")
547
+
548
+ merged_audio_path = work_dir / "merged_audio.wav"
549
+ _concat_audio_tracks(audio_paths, merged_audio_path)
550
+ _mux_recorded_video_with_audio(recorded_video_path, merged_audio_path, output_path)
551
+
552
+ if not args.keep_workdir:
553
+ shutil.rmtree(work_dir, ignore_errors=True)
554
+ else:
555
+ print(f"[debug] Work files kept at: {work_dir.resolve()}")
556
+
557
+ print(f"\n{'='*60}")
558
+ print(f" DONE!")
559
+ print(f" Video -> {output_path.resolve()}")
560
+ print(f"{'='*60}\n")
561
+
562
+
563
+ if __name__ == "__main__":
564
+ main()
template.html ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8"/>
5
+ <link href="https://fonts.googleapis.com/css2?family=Caveat:wght@400;600;700&family=Indie+Flower&family=Patrick+Hand&display=swap" rel="stylesheet"/>
6
+ <style>
7
+ *,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
8
+ :root{
9
+ --chalk-white:#f5f0e8;--chalk-yellow:#f7e06a;
10
+ --chalk-pink:#f4a0b0;--chalk-blue:#a0c4f4;--chalk-orange:#f4b87a;
11
+ --frame-wood:#5c3a1e;
12
+ }
13
+
14
+ /* 9:16 vertical ratio container */
15
+ html,body{
16
+ width:100%;
17
+ min-height:100vh;
18
+ display:flex;
19
+ justify-content:center;
20
+ align-items:center;
21
+ background:#111;
22
+ font-family:'Caveat',cursive;
23
+ padding:20px 0 40px;
24
+ }
25
+
26
+ .frame{
27
+ position:relative;
28
+ /* 9:16 ratio β€” width Γ— (16/9) = height */
29
+ width:390px;
30
+ height:693px; /* 390 Γ— 16/9 β‰ˆ 693 */
31
+ background:var(--frame-wood);
32
+ border-radius:6px;
33
+ padding:18px 18px 0;
34
+ box-shadow:inset 0 0 14px rgba(0,0,0,.7),0 10px 50px rgba(0,0,0,.9),0 0 0 3px #3a2010,0 0 0 7px #7a4a20;
35
+ overflow:hidden;
36
+ display:flex;
37
+ flex-direction:column;
38
+ }
39
+ .frame::before{
40
+ content:'';
41
+ position:absolute;
42
+ inset:0;
43
+ background:repeating-linear-gradient(90deg,transparent 0,transparent 20px,rgba(0,0,0,.07) 20px,rgba(0,0,0,.07) 21px);
44
+ pointer-events:none;
45
+ z-index:1;
46
+ }
47
+
48
+ .chalk-tray{
49
+ position:relative;
50
+ height:24px;
51
+ background:linear-gradient(180deg,#3a2010 0%,#5c3a1e 40%,#7a4a20 100%);
52
+ border-top:2px solid #8b5c2a;
53
+ display:flex;
54
+ align-items:center;
55
+ padding:0 22px;
56
+ gap:12px;
57
+ flex-shrink:0;
58
+ margin-top:auto;
59
+ z-index:2;
60
+ }
61
+ .chalk-stick{height:11px;border-radius:4px;opacity:.9}
62
+
63
+ .board{
64
+ position:relative;
65
+ flex:1;
66
+ background:radial-gradient(ellipse at 25% 15%,#3a7030 0%,transparent 55%),
67
+ radial-gradient(ellipse at 80% 85%,rgba(70,110,50,.35) 0%,transparent 50%),
68
+ #2d5a27;
69
+ border-radius:2px;
70
+ overflow:hidden;
71
+ padding:16px 18px 14px;
72
+ display:flex;
73
+ flex-direction:column;
74
+ }
75
+
76
+ /* chalk filter */
77
+ .chalk{filter:url(#chalk-filter)}
78
+
79
+ /* text elements */
80
+ .ch-title{font-family:'Caveat',cursive;font-size:26px;font-weight:700;color:var(--chalk-yellow);text-align:center;letter-spacing:1.5px;text-shadow:0 0 16px rgba(247,224,106,.4);opacity:0;animation:fadeChalk .5s ease both .1s}
81
+ .ch-sub{font-family:'Patrick Hand',cursive;font-size:12px;color:var(--chalk-orange);text-align:center;letter-spacing:1px;margin-top:2px;opacity:0;animation:fadeChalk .5s ease both .2s}
82
+ .ch-divider{height:2px;background:var(--chalk-white);border-radius:2px;margin:10px 4px 12px;opacity:0;transform-origin:left;animation:drawLine .7s ease both .35s}
83
+ .ch-label{font-family:'Patrick Hand',cursive;font-size:11.5px;font-weight:600;letter-spacing:2px;text-transform:uppercase;margin-bottom:6px;opacity:0;animation:fadeChalk .5s ease both}
84
+ .ch-list{list-style:none;padding:0;margin:0}
85
+ .ch-list li{font-family:'Indie Flower',cursive;font-size:15px;line-height:1.4;padding-left:22px;position:relative;margin-bottom:3px;color:var(--chalk-white);opacity:0;animation:fadeChalk .5s ease both}
86
+ .ch-list li::before{content:attr(data-b);position:absolute;left:2px;color:var(--chalk-yellow)}
87
+ .ch-formula{font-family:'Caveat',cursive;font-size:18px;font-weight:600;color:var(--chalk-yellow);text-align:center;border:2px dashed rgba(245,240,232,.35);border-radius:6px;padding:7px 10px;margin:8px 0 6px;opacity:0;animation:fadeChalk .5s ease both}
88
+ .ch-footnote{font-family:'Patrick Hand',cursive;font-size:11.5px;color:rgba(245,240,232,.55);font-style:italic;text-align:center;padding-top:4px;opacity:0;animation:fadeChalk .5s ease both}
89
+
90
+ .yellow{color:var(--chalk-yellow)}.pink{color:var(--chalk-pink)}.blue{color:var(--chalk-blue)}.orange{color:var(--chalk-orange)}
91
+ .ul{border-bottom:2px solid currentColor;padding-bottom:1px}
92
+
93
+ /* SVG draw animation */
94
+ .draw{stroke-dasharray:2000;stroke-dashoffset:2000;animation:drawPath var(--dur,2s) ease forwards var(--delay,0s)}
95
+ .draw-fast{stroke-dasharray:600;stroke-dashoffset:600;animation:drawPath var(--dur,.8s) ease forwards var(--delay,0s)}
96
+ .pop{opacity:0;animation:popIn .3s ease both var(--delay,0s)}
97
+
98
+ @keyframes drawPath{to{stroke-dashoffset:0}}
99
+ @keyframes fadeChalk{from{opacity:0;transform:translateY(4px)}to{opacity:1;transform:translateY(0)}}
100
+ @keyframes drawLine{from{transform:scaleX(0);opacity:0}to{transform:scaleX(1);opacity:.5}}
101
+ @keyframes popIn{from{opacity:0;transform:scale(.5)}to{opacity:1;transform:scale(1)}}
102
+
103
+
104
+ .d1{animation-delay:.15s!important}.d2{animation-delay:.3s!important}.d3{animation-delay:.5s!important}
105
+ .d4{animation-delay:.65s!important}.d5{animation-delay:.8s!important}.d6{animation-delay:.95s!important}
106
+ .d7{animation-delay:1.1s!important}.d8{animation-delay:1.25s!important}.d9{animation-delay:1.4s!important}
107
+ .d10{animation-delay:1.55s!important}.d11{animation-delay:1.7s!important}.d12{animation-delay:1.85s!important}
108
+ .d13{animation-delay:2.0s!important}.d14{animation-delay:2.2s!important}.d15{animation-delay:2.4s!important}
109
+ .d16{animation-delay:2.6s!important}.d17{animation-delay:2.8s!important}.d18{animation-delay:3.0s!important}
110
+ .d19{animation-delay:3.2s!important}.d20{animation-delay:3.4s!important}
111
+ </style>
112
+ </head>
113
+ <body>
114
+
115
+ <svg width="0" height="0" style="position:absolute">
116
+ <defs>
117
+ <filter id="chalk-filter" x="-5%" y="-5%" width="110%" height="110%">
118
+ <feTurbulence type="fractalNoise" baseFrequency="0.65" numOctaves="3" result="noise"/>
119
+ <feDisplacementMap in="SourceGraphic" in2="noise" scale="1.5" xChannelSelector="R" yChannelSelector="G"/>
120
+ </filter>
121
+ </defs>
122
+ </svg>
123
+
124
+ <div class="frame">
125
+ <div class="board">
126
+
127
+ {# ── TITLE BLOCK ── #}
128
+ <div class="ch-title chalk d1">{{ title | default("πŸ“‰ Gradient Descent") }}</div>
129
+ <div class="ch-sub chalk d2">{{ subtitle | default("Machine Learning Β· How models learn") }}</div>
130
+ <div class="ch-divider chalk"></div>
131
+
132
+ {# ══════════════════════════════════════════════════════
133
+ DIAGRAM SLOT β€” full board width
134
+ Pass diagram_content (raw SVG/HTML) to fill.
135
+ Dimensions: full width, height: 300px
136
+ ══════════════════════════════════════════════════════ #}
137
+ <div style="width:100%;height:300px;flex-shrink:0;margin-bottom:10px;opacity:0;animation:fadeChalk .4s ease both .4s">
138
+
139
+ {%- if diagram_content is defined and diagram_content -%}
140
+ <div style="width:100%;height:100%;display:flex;align-items:center;justify-content:center;">
141
+ {{ diagram_content | safe }}
142
+ </div>
143
+ {%- else -%}
144
+ <div style="width:100%;height:100%;border:2px dashed rgba(245,240,232,.25);border-radius:6px;display:flex;flex-direction:column;align-items:center;justify-content:center;gap:10px;background:rgba(245,240,232,.03);filter:url(#chalk-filter);">
145
+ <svg width="72" height="60" viewBox="0 0 72 60" fill="none" xmlns="http://www.w3.org/2000/svg">
146
+ <line x1="8" y1="6" x2="8" y2="52" stroke="rgba(245,240,232,.3)" stroke-width="1.8" stroke-linecap="round"/>
147
+ <line x1="8" y1="52" x2="66" y2="52" stroke="rgba(245,240,232,.3)" stroke-width="1.8" stroke-linecap="round"/>
148
+ <path d="M10 44 C18 34,24 20,36 16 C46 13,54 15,60 24" stroke="rgba(160,196,244,.4)" stroke-width="2" fill="none" stroke-linecap="round"/>
149
+ <circle cx="36" cy="16" r="3.5" fill="rgba(247,224,106,.4)"/>
150
+ </svg>
151
+ <span style="font-family:'Patrick Hand',cursive;font-size:10px;color:rgba(245,240,232,.3);letter-spacing:2px;text-transform:uppercase;">diagram slot</span>
152
+ <span style="font-family:'Indie Flower',cursive;font-size:11px;color:rgba(245,240,232,.18);text-align:center;padding:0 20px;">pass <span style="color:rgba(160,196,244,.4)">diagram_content</span> to fill this area</span>
153
+ </div>
154
+ {%- endif -%}
155
+
156
+ </div>{# end diagram slot #}
157
+
158
+
159
+ </div>{# end board #}
160
+
161
+ {# Chalk tray #}
162
+ <div class="chalk-tray">
163
+ <div class="chalk-stick" style="width:54px;background:#f5f0e8"></div>
164
+ <div class="chalk-stick" style="width:40px;background:#f7e06a"></div>
165
+ <div class="chalk-stick" style="width:28px;background:#f4a0b0"></div>
166
+ <div class="chalk-stick" style="width:20px;background:#a0c4f4"></div>
167
+ <div class="chalk-stick" style="width:14px;background:#f4b87a"></div>
168
+ </div>
169
+
170
+ </div>{# end frame #}
171
+
172
+ </body>
173
+ </html>
voices/app.txt ADDED
File without changes