Spaces:

mariapar
/

hypervideo

Running

File size: 8,455 Bytes

"""
app_core.py
-----------
Функция generate_video(text) → абсолютный путь к готовому MP4.

❗ Промпты PROMPT_JSON и DETAILED_PROMPT скопированы без изменений
   из вашего исходного ноутбука.
"""

# ── стандартные ───────────────────────────────────────────────
import os, json, textwrap, subprocess, tempfile
from pathlib import Path
from datetime import datetime

# ── сторонние ─────────────────────────────────────────────────
from openai import OpenAI               # openai-python ≥1.33.0
from pydub import AudioSegment
from playwright.sync_api import sync_playwright

# ── Playwright браузер (скачиваем только один раз) ────────────
_pw_flag = Path("/tmp/.pw_chrom_installed")
if not _pw_flag.exists():
    subprocess.run(["playwright", "install", "chromium"], check=True)
    _pw_flag.touch()

# ── OpenAI client (ключ берётся из секрета) ───────────────────
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# ───────────────────  PROMPT_JSON  ────────────────────────────
PROMPT_JSON = textwrap.dedent(r"""
    You are a presentation-outliner.
    The user needs VALID json only — no extra commentary. (json!)

    ✦ Rules
      0. Total slides ≤ 10 (including the title slide).
        • If the source text is longer, merge or summarise content to stay within 10 slides.

      1. First slide MUST be type "title":
          { "slide_idx":1, "type":"title", "title":"…", "body":"" }
        (body may stay empty)

      2. Prefer **"list"** whenever possible.
        • Break sentences into concise bullet-points.
        • Use "text" only when the content truly cannot be listed.
        Allowed types:
          "list"  – array, ≤ 5 items   ← _default choice_
          "text"  – short paragraph
          "quote" – short quotation or bold statement
          "code"  – code block, copy verbatim from ``` fences

      3. Preserve every ``` … ``` code block unchanged.

      4. Return this exact schema:
          {
            "slides":[
              { "slide_idx":N, "type":"…", "title":"…", "body":… },
              …               // ≤ 9 more objects after the title slide
            ]
          }

    Output the json only.
""").strip()

# ─────────────────  DETAILED_PROMPT  ──────────────────────────
DETAILED_PROMPT = textwrap.dedent(r"""
  You are a friendly, motivational voice-over writer.
  The user needs VALID json only — no extra commentary. (json!)

  Source:
    • "raw_text"  — full original article
    • "slides"    — list of slide dictionaries (title, type, body)

  Task for EACH slide in order:
    • Write **at least two sentences** (≈ 25–60 words total).
    • Use the slide’s visible content **and** extra context from raw_text.
    • Keep a welcoming tone: encourage, explain, or add a useful tip.
    • Mention code or quote briefly (“In this code snippet you’ll see …”).
    • First slide  → start with a warm greeting + slide title.
    • Last slide   → quick recap + short friendly goodbye.

  Output exactly:
    { "narration":[ { "slide_idx":N, "voice_text":"..." }, … ] }
""").strip()

# ──────────────────  вспомогательные функции  ─────────────────

def text_to_outline(raw: str) -> list:
    rsp = client.chat.completions.create(
        model="gpt-4o",
        temperature=0.3,
        response_format={"type":"json_object"},
        messages=[
            {"role":"system","content":PROMPT_JSON},
            {"role":"user",  "content":raw}
        ]
    )
    return json.loads(rsp.choices[0].message.content)["slides"]

def make_narration(raw: str, slides: list) -> list:
    rsp = client.chat.completions.create(
        model="gpt-4o",
        temperature=0.7,
        response_format={"type":"json_object"},
        messages=[
            {"role":"system","content":DETAILED_PROMPT},
            {"role":"user",  "content":json.dumps(
                {"raw_text":raw,"slides":slides}, ensure_ascii=False)}
        ]
    )
    return json.loads(rsp.choices[0].message.content)["narration"]

def build_html(slide: dict) -> str:
    import html as _h
    t, body = slide["type"], slide["body"]
    title   = _h.escape(slide["title"])
    if t=="title":
        cont=f"<h1>{title}</h1>"
    elif t=="list":
        items="\n".join(f"<li>{_h.escape(str(it))}</li>" for it in body)
        cont=f"<h1>{title}</h1><ul>{items}</ul>"
    elif t=="quote":
        cont=f"<blockquote>“{_h.escape(str(body))}”</blockquote>"
    elif t=="code":
        code=_h.escape(str(body).strip().lstrip("`").rstrip("`"))
        cont=f"<h1>{title}</h1><pre><code>{code}</code></pre>"
    else:
        cont=f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"

    HTML_WRAP = """
    <!DOCTYPE html><html><head><meta charset="utf-8">
    <link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
    <style>
     body{{margin:0;width:1280px;height:720px;display:flex;flex-direction:column;
          justify-content:center;align-items:center;font-family:'PT Root UI',Arial,sans-serif;
          background:#fff;color:#000}}
     h1{{font-size:60px;margin:0 0 40px;color:#6C63FF}}
     p{{font-size:36px;margin:0}}
     ul{{font-size:34px;margin:0;padding-left:40px;text-align:left}}
     li{{margin:12px 0}}
     blockquote{{font-size:40px;font-style:italic;margin:0;border-left:6px solid #6C63FF;padding-left:24px}}
     pre{{font-size:28px;margin:0;padding:24px;background:#f5f5f5;border-radius:10px;overflow-x:auto}}
    </style></head><body><div style="max-width:1000px;text-align:center">{content}</div></body></html>
    """
    return HTML_WRAP.format(content=cont)

# ────────────────────  generate_video  ────────────────────────
def generate_video(text: str) -> str:
    """
    Вызывается Gradio. Возвращает путь к MP4.
    """
    work = Path(tempfile.mkdtemp(prefix="ppt2vid_"))
    slides_dir = work / "slides"; audio_dir = work / "audio"
    slides_dir.mkdir(); audio_dir.mkdir()

    # 1. outline
    slides = text_to_outline(text)

    # 2. HTML → PNG
    html_paths=[]
    for s in slides:
        h=slides_dir/f"slide_{s['slide_idx']:03}.html"
        h.write_text(build_html(s),encoding="utf-8"); html_paths.append(h)

    with sync_playwright() as p:
        br=p.chromium.launch()
        pg=br.new_page(viewport={"width":1280,"height":720})
        pngs=[]
        for h in html_paths:
            pg.goto(h.as_uri())
            img=h.with_suffix(".png"); pg.screenshot(path=img)
            pngs.append(img)
        br.close()
    durations=[]

    # 3. narration + TTS
    narr=make_narration(text,slides)
    wavs=[]
    for n in narr:
        wav=audio_dir/f"slide_{n['slide_idx']:03}.wav"
        speech=client.audio.speech.create(
            model="tts-1", voice="alloy",
            input=n["voice_text"], response_format="wav")
        speech.stream_to_file(wav); wavs.append(wav)

        dur = AudioSegment.from_file(wav).duration_seconds
        durations.append(round(dur,2))

    combo=AudioSegment.empty()
    for w in sorted(wavs):
        combo += AudioSegment.from_file(w)
    wav = audio_dir/"narration.wav"
    combo.export(wav,"wav")

    # 4. ffmpeg concat
    concat=slides_dir/"slides.txt"
    with concat.open("w") as f:
        for img,d in zip(sorted(pngs),durations):
            f.write(f"file '{img}'\n"); f.write(f"duration {d}\n")
        f.write(f"file '{pngs[-1]}'\n")

    mp4=work/"output.mp4"
    subprocess.run([
        "ffmpeg","-y","-f","concat","-safe","0","-i",concat,
        "-i",wav,"-c:v","libx264","-pix_fmt","yuv420p",
        "-c:a","aac","-shortest",mp4], check=True)

    return str(mp4)