Spaces:

mariapar
/

hypervideo

Running

App Files Files Community

mariapar commited on May 28, 2025

Commit

b245107

verified ·

1 Parent(s): c45af57

Update app_core.py

Browse files

Files changed (1) hide show

app_core.py +254 -117

app_core.py CHANGED Viewed

@@ -1,21 +1,26 @@
-# app_core.py  ────────────────────────────────────────────────────────────
 """
-Однофайловый конвейер:
-  raw text → outline → HTML+PNG → OpenAI-TTS → MP4
-ВАЖНО: PROMPT_JSON и DETAILED_PROMPT оставлены 1-в-1
 """
 # ─ стандартные ─
-import os, json, textwrap, subprocess, tempfile, shutil
 from pathlib import Path
 from datetime import datetime
-# ─ third-party ─
-import openai
 from openai import OpenAI
 from pydub import AudioSegment
 from playwright.sync_api import sync_playwright
 # ─────────────────────────────────────────────────────────────
 # 0. Playwright браузер (устанавливаем 1 раз без sudo)
@@ -26,10 +31,10 @@ if not _pw_flag.exists():
     _pw_flag.touch()
 # ─────────────────────────────────────────────────────────────
-# 1.   П Р О М П Т Ы  (точно как в вашем файле)
 # ─────────────────────────────────────────────────────────────
 PROMPT_JSON = textwrap.dedent("""
-    You are a presentation-outliner.
     The user needs VALID json only — no extra commentary. (json!)
     ✦ Rules
@@ -41,13 +46,13 @@ PROMPT_JSON = textwrap.dedent("""
         (body may stay empty)
       2. Prefer **"list"** whenever possible.
-        • Break sentences into concise bullet-points.
-        • Use "text" only when the content truly cannot be listed.
-        Allowed types:
-          "list"  – array, ≤ 5 items   ← default
-          "text"  – short paragraph
-          "quote" – short quotation or bold statement
-          "code"  – code block, copy verbatim from ``` fences
       3. Preserve every ``` … ``` code block unchanged.
@@ -60,10 +65,11 @@ PROMPT_JSON = textwrap.dedent("""
           }
     Output the json only.
 """).strip()
 DETAILED_PROMPT = textwrap.dedent("""
-  You are a friendly, motivational voice-over writer.
   The user needs VALID json only — no extra commentary. (json!)
   Source:
@@ -71,7 +77,7 @@ DETAILED_PROMPT = textwrap.dedent("""
     • "slides"    — list of slide dictionaries (title, type, body)
   Task for EACH slide in order:
-    • Write **at least two sentences** (≈ 25–60 words total).
     • Use the slide’s visible content **and** extra context from raw_text.
     • Keep a welcoming tone: encourage, explain, or add a useful tip.
     • Mention code or quote briefly (“In this code snippet you’ll see …”).
@@ -83,115 +89,246 @@ DETAILED_PROMPT = textwrap.dedent("""
 """).strip()
 # ─────────────────────────────────────────────────────────────
-# 2.  Вспомогательные функции
 # ─────────────────────────────────────────────────────────────
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-def text_to_outline(raw: str) -> list:
-    rsp = client.chat.completions.create(
-        model="gpt-4o",
         temperature=0.3,
-        response_format={"type":"json_object"},
-        messages=[{"role":"system","content":PROMPT_JSON},
-                  {"role":"user",  "content":raw}]
     )
-    return json.loads(rsp.choices[0].message.content)["slides"]
 def build_slide_html(slide: dict) -> str:
-    import html as _h
-    title = _h.escape(slide["title"]); t=slide["type"]; body=slide["body"]
-    if t=="title":
-        content=f"<h1>{title}</h1>"
-    elif t=="list":
-        items="\n".join(f"<li>{_h.escape(str(i))}</li>" for i in body)
-        content=f"<h1>{title}</h1><ul>{items}</ul>"
-    elif t=="quote":
-        content=f"<blockquote>“{_h.escape(str(body))}”</blockquote>"
-    elif t=="code":
-        code=_h.escape(str(body).strip().lstrip("`").rstrip("`"))
-        content=f"<h1>{title}</h1><pre><code>{code}</code></pre>"
-    else:
-        content=f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"
-    HTML_BASE = """
-    <!DOCTYPE html><html><head><meta charset="utf-8">
-    <link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
-    <style>
-     body{{margin:0;width:1280px;height:720px;display:flex;flex-direction:column;
-          justify-content:center;align-items:center;font-family:'PT Root UI',Arial,sans-serif;
-          background:#fff;color:#000}}
-     h1{{font-size:60px;margin:0 0 40px;color:#6C63FF}}
-     p{{font-size:36px;margin:0}}
-     ul{{font-size:34px;margin:0;padding-left:40px;text-align:left}}
-     li{{margin:12px 0}}
-     blockquote{{font-size:40px;font-style:italic;margin:0;border-left:6px solid #6C63FF;padding-left:24px}}
-     pre{{font-size:28px;margin:0;padding:24px;background:#f5f5f5;border-radius:10px;overflow-x:auto}}
-    </style></head><body><div style="max-width:1000px;text-align:center">{content}</div></body></html>
-    """
-    return HTML_BASE.format(content=content)
-def make_narration(raw: str, slides: list) -> list:
-    rsp = client.chat.completions.create(
-        model="gpt-4o",
         temperature=0.8,
-        response_format={"type":"json_object"},
-        messages=[{"role":"system","content":DETAILED_PROMPT},
-                  {"role":"user",  "content":json.dumps({"raw_text":raw,"slides":slides},ensure_ascii=False)}]
     )
-    return json.loads(rsp.choices[0].message.content)["narration"]
 # ─────────────────────────────────────────────────────────────
-# 3.  Главная ФУНКЦИЯ для Gradio
 # ─────────────────────────────────────────────────────────────
-def generate_video(text: str) -> str:
-    """Возвращает абсолютный путь к output.mp4"""
-    work = Path(tempfile.mkdtemp(prefix="ppt2vid_"))
-    slides_dir = work/"slides"; audio_dir=work/"audio"
-    slides_dir.mkdir(); audio_dir.mkdir()
-    # 1 outline
-    slides = text_to_outline(text)
-    # 2 HTML + PNG
-    htmls=[]
-    for s in slides:
-        h=slides_dir/f"slide_{s['slide_idx']:03}.html"
-        h.write_text(build_slide_html(s),encoding="utf-8"); htmls.append(h)
-    with sync_playwright() as p:
-        browser=p.chromium.launch()
-        page=browser.new_page(viewport={"width":1280,"height":720})
-        pngs=[]
-        for h in htmls:
-            page.goto(h.as_uri())
-            img=h.with_suffix(".png")
-            page.screenshot(path=img); pngs.append(img)
-        browser.close()
-    durations=[5.0]*len(pngs)          # фикс 5 сек на слайд (без ускорения)
-    # 3 TTS
-    narr=make_narration(text,slides)
-    wavs=[]
-    for n in narr:
-        wav=audio_dir/f"slide_{n['slide_idx']:03}.wav"
-        speech=client.audio.speech.create(
-            model="tts-1",voice="alloy",
-            input=n["voice_text"],response_format="wav")
-        speech.stream_to_file(wav); wavs.append(wav)
-    mix=AudioSegment.empty()
-    for w in sorted(wavs): mix+=AudioSegment.from_file(w)
-    wav=audio_dir/"narration.wav"; mix.export(wav,"wav")
-    # 4 slides.txt + ffmpeg
-    concat=slides_dir/"slides.txt"
-    with concat.open("w") as f:
-        for img,d in zip(sorted(pngs),durations):
-            f.write(f"file '{img}'\n"); f.write(f"duration {d}\n")
-        f.write(f"file '{pngs[-1]}'\n")
-    mp4=work/"output.mp4"
-    subprocess.run(["ffmpeg","-y","-f","concat","-safe","0","-i",concat,
-                    "-i",wav,"-c:v","libx264","-pix_fmt","yuv420p",
-                    "-c:a","aac","-shortest",mp4],check=True)
-    return str(mp4)

 """
+Однофайловый конвейер для Gradio:
+  raw text → outline → HTML+PNG → OpenAI‑TTS → MP4
+ВАЖНО: PROMPT_JSON и DETAILED_PROMPT оставлены 1‑в‑1.
+Запуск:
+    python app_core.py           # локальный запуск Gradio
+    # или
+    import app_core; app_core.main()
 """
 # ─ стандартные ─
+import os, json, textwrap, subprocess, tempfile, shutil, html, asyncio
 from pathlib import Path
 from datetime import datetime
+# ─ third‑party ─
+import openai                      # ≥ 1.33.0
 from openai import OpenAI
 from pydub import AudioSegment
 from playwright.sync_api import sync_playwright
+import gradio as gr
 # ─────────────────────────────────────────────────────────────
 # 0. Playwright браузер (устанавливаем 1 раз без sudo)
     _pw_flag.touch()
 # ─────────────────────────────────────────────────────────────
+# 1. System prompts (оставлены без изменений)
 # ─────────────────────────────────────────────────────────────
 PROMPT_JSON = textwrap.dedent("""
+    You are a presentation‑outliner.
     The user needs VALID json only — no extra commentary. (json!)
     ✦ Rules
         (body may stay empty)
       2. Prefer **"list"** whenever possible.
+      +    • Break sentences into concise bullet‑points.
+      +    • Use "text" only when the content truly cannot be listed.
+      +    Allowed types:
+      +      "list"  – array, ≤ 5 items   ← _default choice_
+      +      "text"  – short paragraph
+      +      "quote" – short quotation or bold statement
+      +      "code"  – code block, copy verbatim from ``` fences
       3. Preserve every ``` … ``` code block unchanged.
           }
     Output the json only.
 """).strip()
 DETAILED_PROMPT = textwrap.dedent("""
+  You are a friendly, motivational voice‑over writer.
   The user needs VALID json only — no extra commentary. (json!)
   Source:
     • "slides"    — list of slide dictionaries (title, type, body)
   Task for EACH slide in order:
+    • Write **at least two sentences** (≈ 25–60 words total).
     • Use the slide’s visible content **and** extra context from raw_text.
     • Keep a welcoming tone: encourage, explain, or add a useful tip.
     • Mention code or quote briefly (“In this code snippet you’ll see …”).
 """).strip()
 # ─────────────────────────────────────────────────────────────
+# 2. HTML/CSS шаблон слайдов (без изменений)
+# ─────────────────────────────────────────────────────────────
+HTML_BASE = """
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset=\"utf-8\">
+  <title>{title}</title>
+  <!-- Hyperskill brand‑like styling -->
+  <link href=\"https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap\" rel=\"stylesheet\">
+  <style>
+    :root {{
+      --grad-from:#4BFFDF;
+      --grad-to:#7AB7FE;
+      --accent:#6C63FF;
+      --text-dark:#000;
+      --bg-light:#fff;
+    }}
+    body {{
+      margin:0;
+      width:1280px;height:720px;
+      display:flex;flex-direction:column;
+      justify-content:center;align-items:center;
+      font-family:'PT Root UI',Arial,sans-serif;
+      color:var(--text-dark);
+      background:var(--bg-light);
+    }}
+    .wrap {{max-width:1000px;text-align:center;padding:0 40px;}}
+    h1 {{
+      font-size:60px;font-weight:700;margin:0 0 40px;
+      color:var(--accent);
+    }}
+    p {{font-size:36px;margin:0;}}
+    ul {{
+      font-size:34px;text-align:left;margin:0 auto;padding-left:40px;
+    }}
+    li {{margin:12px 0;}}
+    blockquote {{
+      font-size:40px;font-style:italic;margin:0;
+      border-left:6px solid var(--accent);padding-left:24px;
+    }}
+    pre {{
+      font-size:28px;line-height:1.35;margin:0;padding:24px;
+      background:#f5f5f5;border-radius:10px;text-align:left;overflow-x:auto;
+    }}
+  </style>
+</head>
+<body>
+  <div class=\"wrap\">
+    {content}
+  </div>
+</body>
+</html>
+"""
+# ─────────────────────────────────────────────────────────────
+# 3. Функции конвейера
 # ─────────────────────────────────────────────────────────────
+client = OpenAI()   # ключ берётся из OPENAI_API_KEY
+def text_to_outline(raw_text: str, model: str = "gpt-4o") -> list:
+    """GPT → список словарей слайдов"""
+    resp = client.chat.completions.create(
+        model=model,
         temperature=0.3,
+        response_format={"type": "json_object"},
+        messages=[
+            {"role": "system", "content": PROMPT_JSON},
+            {"role": "user",   "content": raw_text}
+        ],
+        max_tokens=2048,
     )
+    slides = json.loads(resp.choices[0].message.content)["slides"]
+    return slides
 def build_slide_html(slide: dict) -> str:
+    t, body = slide["type"], slide["body"]
+    title   = html.escape(slide["title"])
+    if t == "title":
+        content = f"<h1>{title}</h1>"
+    elif t == "list":
+        items = "\n".join(f"<li>{html.escape(str(it))}</li>" for it in body)
+        content = f"<h1>{title}</h1><ul>{items}</ul>"
+    elif t == "quote":
+        content = f"<blockquote>“{html.escape(str(body))}”</blockquote>"
+    elif t == "code":
+        code = html.escape(str(body).strip().lstrip("`").rstrip("`"))
+        content = f"<h1>{title}</h1><pre><code>{code}</code></pre>"
+    else:                                   # text
+        content = f"<h1>{title}</h1><p>{html.escape(str(body))}</p>"
+    return HTML_BASE.format(title=title, content=content)
+def save_html(slides: list, slides_dir: Path) -> list:
+    html_paths = []
+    for s in slides:
+        f = slides_dir / f"slide_{s['slide_idx']:03}.html"
+        f.write_text(build_slide_html(s), encoding="utf-8")
+        html_paths.append(f)
+    return html_paths
+def html_to_png(html_paths: list):
+    png_paths = []
+    with sync_playwright() as p:
+        browser = p.chromium.launch()
+        page = browser.new_page(viewport={"width":1280, "height":720})
+        for f in html_paths:
+            page.goto(f.as_uri())
+            png_path = f.with_suffix(".png")
+            page.screenshot(path=png_path)
+            png_paths.append(png_path)
+        browser.close()
+    return png_paths
+def generate_narration(raw_text: str, slides: list, model: str = "gpt-4o") -> list:
+    resp = client.chat.completions.create(
+        model=model,
         temperature=0.8,
+        response_format={"type": "json_object"},
+        messages=[
+            {"role": "system", "content": DETAILED_PROMPT},
+            {"role": "user",   "content": json.dumps({
+                "raw_text": raw_text,
+                "slides":   slides
+            }, ensure_ascii=False)}
+        ],
+        max_tokens=2048,
     )
+    return json.loads(resp.choices[0].message.content)["narration"]
+def tts_narration(narration_list: list, audio_dir: Path):
+    audio_dir.mkdir(exist_ok=True)
+    wav_paths, durations = [], []
+    for item in narration_list:
+        idx, text = item["slide_idx"], item["voice_text"]
+        speech = client.audio.speech.create(
+            model="tts-1",
+            voice="alloy",
+            input=text,
+            response_format="wav"
+        )
+        wav_path = audio_dir / f"slide_{idx:03}.wav"
+        speech.stream_to_file(wav_path)
+        wav_paths.append(wav_path)
+        snd = AudioSegment.from_file(wav_path)
+        durations.append(round(snd.duration_seconds, 2))
+    # glue together
+    combined = AudioSegment.empty()
+    for w in sorted(wav_paths):
+        combined += AudioSegment.from_file(w)
+    final_wav = audio_dir / "narration.wav"
+    combined.export(final_wav, format="wav")
+    return final_wav, durations
+def assemble_video(slides_dir: Path, audio_path: Path, durations: list, output_mp4: Path):
+    png_files = sorted(slides_dir.glob("slide_*.png"))
+    if not png_files:
+        raise RuntimeError("PNG slides not found")
+    concat_file = slides_dir / "slides.txt"
+    with concat_file.open("w") as f:
+        for img, dur in zip(png_files, durations):
+            f.write(f"file '{img}'\n")
+            f.write(f"duration {dur}\n")
+        f.write(f"file '{png_files[-1]}'\n")  # repeat last frame
+    ffmpeg_cmd = [
+        "ffmpeg", "-y",
+        "-f", "concat", "-safe", "0", "-i", str(concat_file),
+        "-i", str(audio_path),
+        "-c:v", "libx264", "-pix_fmt", "yuv420p",
+        "-c:a", "aac", "-shortest",
+        str(output_mp4)
+    ]
+    subprocess.run(ffmpeg_cmd, check=True)
+    return output_mp4
 # ─────────────────────────────────────────────────────────────
+# 4. Главная функция конвейера (вызывается из Gradio)
 # ─────────────────────────────────────────────────────────────
+def process_pipeline(raw_text: str) -> str:
+    """Полный запуск: возвращает путь к MP4"""
+    run_dir = Path(tempfile.mkdtemp(prefix="ai_presentation_"))
+    slides_dir = run_dir / "slides"
+    audio_dir  = run_dir / "audio"
+    slides_dir.mkdir()
+    # 1. GPT → outline
+    slides = text_to_outline(raw_text)
+    # 2. outline → HTML → PNG
+    html_paths = save_html(slides, slides_dir)
+    html_to_png(html_paths)
+    # 3. GPT → narration → TTS
+    narration = generate_narration(raw_text, slides)
+    wav_path, durations = tts_narration(narration, audio_dir)
+    # 4. PNG + WAV → MP4
+    output_mp4 = run_dir / "output.mp4"
+    assemble_video(slides_dir, wav_path, durations, output_mp4)
+    return str(output_mp4)
+# ─────────────────────────────────────────────────────────────
+# 5. Gradio UI
+# ─────────────────────────────────────────────────────────────
+def main():
+    iface = gr.Interface(
+        fn=process_pipeline,
+        inputs=gr.Textbox(lines=20, label="Raw article / script text"),
+        outputs=gr.File(label="Generated presentation (MP4)"),
+        title="AI Presentation Generator",
+        description="One‑click conversion of raw text into narrated video slides."
+    )
+    iface.launch()
+if __name__ == "__main__":
+    main()