Spaces:

mariapar
/

hypervideo

Running

App Files Files Community

mariapar commited on May 28, 2025

Commit

76160db

verified ·

1 Parent(s): 7129f76

Upload 4 files

Browse files

Files changed (4) hide show

app.py +12 -0
app_core.py +189 -0
packages.txt +1 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import gradio as gr
+from app_core import generate_video
+demo = gr.Interface(
+    fn=generate_video,
+    inputs=gr.Textbox(lines=14, label="Paste your lesson text here"),
+    outputs=gr.Video(label="Generated MP4"),
+    title="AI Presentation Generator"
+)
+if __name__ == "__main__":
+    demo.launch()

app_core.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""
+app_core.py
+-----------
+Pure-Python module that turns raw text into an MP4:
+  text  ➜  outline  ➜  HTML+PNG  ➜  TTS (OpenAI)  ➜  MP4
+"""
+# -------- standard libs --------
+import os, json, subprocess, shutil, textwrap, tempfile
+from pathlib import Path
+from datetime import datetime
+# -------- third-party --------
+from openai import OpenAI               #  ≥ 1.33.0
+from pydub import AudioSegment
+from playwright.sync_api import sync_playwright   # headless screenshots
+# ------------------------------------------------------------------
+# 0.  GLOBAL CONFIG  (edit if you wish)
+# ------------------------------------------------------------------
+MODEL_CHAT = "gpt-4o-mini"      # outline + narration
+MODEL_TTS  = "tts-1"            # OpenAI TTS voice
+VOICE_ID   = "alloy"
+MAX_SLIDES = 10
+AUDIO_SPEED = 1.5               # 1.0 = normal, 1.5 ≈ 50 % faster
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+# ------------------------------------------------------------------
+# 1.  OUTLINE PROMPT
+# ------------------------------------------------------------------
+PROMPT_JSON = textwrap.dedent(f"""
+You are a presentation-outliner. (json!)
+• Maximum slides: {MAX_SLIDES} (incl. title)
+• First slide = "title".
+• Prefer "list" slides whenever possible (≤ 5 bullets).
+• Allowed types: "list" | "text" | "quote" | "code"
+• Preserve ``` code ``` verbatim.
+Return **only**
+{{
+  "slides":[
+    {{ "slide_idx":N, "type":"…", "title":"…", "body":… }},
+    …
+  ]}
+}}
+""").strip()
+def text_to_outline(raw_text: str) -> list:
+    rsp = client.chat.completions.create(
+        model        = MODEL_CHAT,
+        temperature  = 0.3,
+        response_format = {"type":"json_object"},
+        messages=[
+            {"role":"system","content":PROMPT_JSON},
+            {"role":"user",  "content":raw_text}
+        ])
+    return json.loads(rsp.choices[0].message.content)["slides"]
+# ------------------------------------------------------------------
+# 2.  HTML SLIDE TEMPLATE  (white bg, brand colours)
+# ------------------------------------------------------------------
+HTML_BASE = """
+<!DOCTYPE html><html><head><meta charset="utf-8">
+<link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
+<style>
+ body{{margin:0;width:1280px;height:720px;display:flex;flex-direction:column;
+      justify-content:center;align-items:center;font-family:'PT Root UI',Arial,sans-serif;
+      background:#fff;color:#000}}
+ h1{{font-size:60px;margin:0 0 40px;color:#6C63FF}}
+ p{{font-size:36px;margin:0}}
+ ul{{font-size:34px;margin:0;padding-left:40px;text-align:left}}
+ li{{margin:12px 0}}
+ blockquote{{font-size:40px;font-style:italic;margin:0;border-left:6px solid #6C63FF;padding-left:24px}}
+ pre{{font-size:28px;margin:0;padding:24px;background:#f5f5f5;border-radius:10px;overflow-x:auto}}
+</style></head><body><div style="max-width:1000px;text-align:center">{content}</div></body></html>
+"""
+import html as _h
+def slide_to_html(slide: dict) -> str:
+    t, body = slide["type"], slide["body"]; title=_h.escape(slide["title"])
+    if t=="title":
+        c=f"<h1>{title}</h1>"
+    elif t=="list":
+        items="\n".join(f"<li>{_h.escape(str(i))}</li>" for i in body)
+        c=f"<h1>{title}</h1><ul>{items}</ul>"
+    elif t=="quote":
+        c=f"<blockquote>“{_h.escape(str(body))}”</blockquote>"
+    elif t=="code":
+        code=_h.escape(str(body).strip().lstrip("`").rstrip("`"))
+        c=f"<h1>{title}</h1><pre><code>{code}</code></pre>"
+    else:
+        c=f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"
+    return HTML_BASE.format(content=c)
+# ------------------------------------------------------------------
+# 3.  NARRATION PROMPT
+# ------------------------------------------------------------------
+PROMPT_NARR = textwrap.dedent("""
+You are a friendly narrator. (json!)
+For every slide write 2-3 warm sentences (25–60 words) that match its content.
+Return:
+{ "narration":[ { "slide_idx":N, "voice_text":"..." }, … ] }
+""").strip()
+def narration(raw_text:str, slides:list) -> list:
+    rsp = client.chat.completions.create(
+        model = MODEL_CHAT,
+        temperature = 0.7,
+        response_format={"type":"json_object"},
+        messages=[
+            {"role":"system","content":PROMPT_NARR},
+            {"role":"user",  "content":json.dumps({"raw_text":raw_text,"slides":slides},ensure_ascii=False)}
+        ])
+    return json.loads(rsp.choices[0].message.content)["narration"]
+# ------------------------------------------------------------------
+# 4.  MAIN PIPELINE
+# ------------------------------------------------------------------
+def generate_video(text: str) -> str:
+    """
+    Entry-point for Gradio:
+    returns absolute path to MP4 in a temp directory.
+    """
+    # temp workdir
+    tmp_root = Path(tempfile.mkdtemp(prefix="ai_vid_"))
+    slides_dir = tmp_root/"slides"; audio_dir = tmp_root/"audio"
+    slides_dir.mkdir(); audio_dir.mkdir()
+    # 1. outline
+    slides = text_to_outline(text)
+    # 2. build HTML
+    html_paths=[]
+    for s in slides:
+        p=slides_dir/f"slide_{s['slide_idx']:03}.html"
+        p.write_text(slide_to_html(s),encoding="utf-8"); html_paths.append(p)
+    # 2b. HTML→PNG screenshots
+    with sync_playwright() as p:
+        br=p.chromium.launch()
+        page=br.new_page(viewport={"width":1280,"height":720})
+        png_paths=[]; durations=[]
+        for html in html_paths:
+            page.goto(html.as_uri()); png=html.with_suffix(".png")
+            page.screenshot(path=png); png_paths.append(png)
+        br.close()
+    # rough equal timing: 5 s per slide
+    durations=[5.0]*len(png_paths)
+    # 3. TTS
+    narr = narration(text,slides)
+    wav_paths=[]
+    for item in narr:
+        speech = client.audio.speech.create(
+            model=MODEL_TTS, voice=VOICE_ID,
+            input=item["voice_text"], response_format="wav")
+        w=audio_dir/f"slide_{item['slide_idx']:03}.wav"
+        speech.stream_to_file(w); wav_paths.append(w)
+    combined=AudioSegment.empty()
+    for w in sorted(wav_paths): combined+=AudioSegment.from_file(w)
+    wav=audio_dir/"narration.wav"; combined.export(wav,"wav")
+    # 3b. speed-up
+    if AUDIO_SPEED!=1.0:
+        fast=audio_dir/"_fast.wav"
+        subprocess.run(["ffmpeg","-y","-i",wav,"-filter:a",
+                        f"atempo={AUDIO_SPEED}",fast],check=True)
+        shutil.move(fast,wav)
+        durations=[round(d/AUDIO_SPEED,2) for d in durations]
+    # 4. slides.txt for ffmpeg
+    concat=slides_dir/"slides.txt"
+    with concat.open("w") as f:
+        for img,d in zip(png_paths,durations):
+            f.write(f"file '{img}'\n"); f.write(f"duration {d}\n")
+        f.write(f"file '{png_paths[-1]}'\n")
+    output = tmp_root/"output.mp4"
+    subprocess.run([
+        "ffmpeg","-y","-f","concat","-safe","0","-i",concat,
+        "-i",wav,"-c:v","libx264","-pix_fmt","yuv420p",
+        "-c:a","aac","-shortest",output],check=True)
+    return str(output)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+openai>=1.33.0
+playwright
+pydub
+gradio