hypervideo / app_core.py
mariapar's picture
Update app_core.py
a1c8c28 verified
"""
app_core.py
-----------
Ѐункция generate_video(text) β†’ Π°Π±ΡΠΎΠ»ΡŽΡ‚Π½Ρ‹ΠΉ ΠΏΡƒΡ‚ΡŒ ΠΊ Π³ΠΎΡ‚ΠΎΠ²ΠΎΠΌΡƒ MP4.
❗ ΠŸΡ€ΠΎΠΌΠΏΡ‚Ρ‹ PROMPT_JSON ΠΈ DETAILED_PROMPT скопированы Π±Π΅Π· ΠΈΠ·ΠΌΠ΅Π½Π΅Π½ΠΈΠΉ
ΠΈΠ· вашСго исходного Π½ΠΎΡƒΡ‚Π±ΡƒΠΊΠ°.
"""
# ── стандартныС ───────────────────────────────────────────────
import os, json, textwrap, subprocess, tempfile
from pathlib import Path
from datetime import datetime
# ── сторонниС ─────────────────────────────────────────────────
from openai import OpenAI # openai-python β‰₯1.33.0
from pydub import AudioSegment
from playwright.sync_api import sync_playwright
# ── Playwright Π±Ρ€Π°ΡƒΠ·Π΅Ρ€ (скачиваСм Ρ‚ΠΎΠ»ΡŒΠΊΠΎ ΠΎΠ΄ΠΈΠ½ Ρ€Π°Π·) ────────────
_pw_flag = Path("/tmp/.pw_chrom_installed")
if not _pw_flag.exists():
subprocess.run(["playwright", "install", "chromium"], check=True)
_pw_flag.touch()
# ── OpenAI client (ΠΊΠ»ΡŽΡ‡ бСрётся ΠΈΠ· сСкрСта) ───────────────────
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# ─────────────────── PROMPT_JSON ────────────────────────────
PROMPT_JSON = textwrap.dedent(r"""
You are a presentation-outliner.
The user needs VALID json only β€” no extra commentary. (json!)
✦ Rules
0. Total slides ≀ 10 (including the title slide).
β€’ If the source text is longer, merge or summarise content to stay within 10 slides.
1. First slide MUST be type "title":
{ "slide_idx":1, "type":"title", "title":"…", "body":"" }
(body may stay empty)
2. Prefer **"list"** whenever possible.
β€’ Break sentences into concise bullet-points.
β€’ Use "text" only when the content truly cannot be listed.
Allowed types:
"list" – array, ≀ 5 items ← _default choice_
"text" – short paragraph
"quote" – short quotation or bold statement
"code" – code block, copy verbatim from ``` fences
3. Preserve every ``` … ``` code block unchanged.
4. Return this exact schema:
{
"slides":[
{ "slide_idx":N, "type":"…", "title":"…", "body":… },
… // ≀ 9 more objects after the title slide
]
}
Output the json only.
""").strip()
# ───────────────── DETAILED_PROMPT ──────────────────────────
DETAILED_PROMPT = textwrap.dedent(r"""
You are a friendly, motivational voice-over writer.
The user needs VALID json only β€” no extra commentary. (json!)
Source:
β€’ "raw_text" β€” full original article
β€’ "slides" β€” list of slide dictionaries (title, type, body)
Task for EACH slide in order:
β€’ Write **at least two sentences** (β‰ˆ 25–60 words total).
β€’ Use the slide’s visible content **and** extra context from raw_text.
β€’ Keep a welcoming tone: encourage, explain, or add a useful tip.
β€’ Mention code or quote briefly (β€œIn this code snippet you’ll see …”).
β€’ First slide β†’ start with a warm greeting + slide title.
β€’ Last slide β†’ quick recap + short friendly goodbye.
Output exactly:
{ "narration":[ { "slide_idx":N, "voice_text":"..." }, … ] }
""").strip()
# ────────────────── Π²ΡΠΏΠΎΠΌΠΎΠ³Π°Ρ‚Π΅Π»ΡŒΠ½Ρ‹Π΅ Ρ„ΡƒΠ½ΠΊΡ†ΠΈΠΈ ─────────────────
def text_to_outline(raw: str) -> list:
rsp = client.chat.completions.create(
model="gpt-4o",
temperature=0.3,
response_format={"type":"json_object"},
messages=[
{"role":"system","content":PROMPT_JSON},
{"role":"user", "content":raw}
]
)
return json.loads(rsp.choices[0].message.content)["slides"]
def make_narration(raw: str, slides: list) -> list:
rsp = client.chat.completions.create(
model="gpt-4o",
temperature=0.7,
response_format={"type":"json_object"},
messages=[
{"role":"system","content":DETAILED_PROMPT},
{"role":"user", "content":json.dumps(
{"raw_text":raw,"slides":slides}, ensure_ascii=False)}
]
)
return json.loads(rsp.choices[0].message.content)["narration"]
def build_html(slide: dict) -> str:
import html as _h
t, body = slide["type"], slide["body"]
title = _h.escape(slide["title"])
if t=="title":
cont=f"<h1>{title}</h1>"
elif t=="list":
items="\n".join(f"<li>{_h.escape(str(it))}</li>" for it in body)
cont=f"<h1>{title}</h1><ul>{items}</ul>"
elif t=="quote":
cont=f"<blockquote>β€œ{_h.escape(str(body))}”</blockquote>"
elif t=="code":
code=_h.escape(str(body).strip().lstrip("`").rstrip("`"))
cont=f"<h1>{title}</h1><pre><code>{code}</code></pre>"
else:
cont=f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"
HTML_WRAP = """
<!DOCTYPE html><html><head><meta charset="utf-8">
<link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
<style>
body{{margin:0;width:1280px;height:720px;display:flex;flex-direction:column;
justify-content:center;align-items:center;font-family:'PT Root UI',Arial,sans-serif;
background:#fff;color:#000}}
h1{{font-size:60px;margin:0 0 40px;color:#6C63FF}}
p{{font-size:36px;margin:0}}
ul{{font-size:34px;margin:0;padding-left:40px;text-align:left}}
li{{margin:12px 0}}
blockquote{{font-size:40px;font-style:italic;margin:0;border-left:6px solid #6C63FF;padding-left:24px}}
pre{{font-size:28px;margin:0;padding:24px;background:#f5f5f5;border-radius:10px;overflow-x:auto}}
</style></head><body><div style="max-width:1000px;text-align:center">{content}</div></body></html>
"""
return HTML_WRAP.format(content=cont)
# ──────────────────── generate_video ────────────────────────
def generate_video(text: str) -> str:
"""
ВызываСтся Gradio. Π’ΠΎΠ·Π²Ρ€Π°Ρ‰Π°Π΅Ρ‚ ΠΏΡƒΡ‚ΡŒ ΠΊ MP4.
"""
work = Path(tempfile.mkdtemp(prefix="ppt2vid_"))
slides_dir = work / "slides"; audio_dir = work / "audio"
slides_dir.mkdir(); audio_dir.mkdir()
# 1. outline
slides = text_to_outline(text)
# 2. HTML β†’ PNG
html_paths=[]
for s in slides:
h=slides_dir/f"slide_{s['slide_idx']:03}.html"
h.write_text(build_html(s),encoding="utf-8"); html_paths.append(h)
with sync_playwright() as p:
br=p.chromium.launch()
pg=br.new_page(viewport={"width":1280,"height":720})
pngs=[]
for h in html_paths:
pg.goto(h.as_uri())
img=h.with_suffix(".png"); pg.screenshot(path=img)
pngs.append(img)
br.close()
durations=[]
# 3. narration + TTS
narr=make_narration(text,slides)
wavs=[]
for n in narr:
wav=audio_dir/f"slide_{n['slide_idx']:03}.wav"
speech=client.audio.speech.create(
model="tts-1", voice="alloy",
input=n["voice_text"], response_format="wav")
speech.stream_to_file(wav); wavs.append(wav)
dur = AudioSegment.from_file(wav).duration_seconds
durations.append(round(dur,2))
combo=AudioSegment.empty()
for w in sorted(wavs):
combo += AudioSegment.from_file(w)
wav = audio_dir/"narration.wav"
combo.export(wav,"wav")
# 4. ffmpeg concat
concat=slides_dir/"slides.txt"
with concat.open("w") as f:
for img,d in zip(sorted(pngs),durations):
f.write(f"file '{img}'\n"); f.write(f"duration {d}\n")
f.write(f"file '{pngs[-1]}'\n")
mp4=work/"output.mp4"
subprocess.run([
"ffmpeg","-y","-f","concat","-safe","0","-i",concat,
"-i",wav,"-c:v","libx264","-pix_fmt","yuv420p",
"-c:a","aac","-shortest",mp4], check=True)
return str(mp4)