Spaces:
Running
Running
File size: 8,455 Bytes
76160db 275d87e a1c8c28 275d87e a1c8c28 275d87e b354735 275d87e a1c8c28 b354735 76160db a1c8c28 b354735 b245107 a1c8c28 76160db a1c8c28 9a0391f 1806ec2 a1c8c28 1806ec2 76160db a1c8c28 275d87e b245107 275d87e a1c8c28 b245107 a1c8c28 c45af57 a1c8c28 275d87e a1c8c28 275d87e a1c8c28 275d87e a1c8c28 275d87e a1c8c28 275d87e a1c8c28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
"""
app_core.py
-----------
Π€ΡΠ½ΠΊΡΠΈΡ generate_video(text) β Π°Π±ΡΠΎΠ»ΡΡΠ½ΡΠΉ ΠΏΡΡΡ ΠΊ Π³ΠΎΡΠΎΠ²ΠΎΠΌΡ MP4.
β ΠΡΠΎΠΌΠΏΡΡ PROMPT_JSON ΠΈ DETAILED_PROMPT ΡΠΊΠΎΠΏΠΈΡΠΎΠ²Π°Π½Ρ Π±Π΅Π· ΠΈΠ·ΠΌΠ΅Π½Π΅Π½ΠΈΠΉ
ΠΈΠ· Π²Π°ΡΠ΅Π³ΠΎ ΠΈΡΡ
ΠΎΠ΄Π½ΠΎΠ³ΠΎ Π½ΠΎΡΡΠ±ΡΠΊΠ°.
"""
# ββ ΡΡΠ°Π½Π΄Π°ΡΡΠ½ΡΠ΅ βββββββββββββββββββββββββββββββββββββββββββββββ
import os, json, textwrap, subprocess, tempfile
from pathlib import Path
from datetime import datetime
# ββ ΡΡΠΎΡΠΎΠ½Π½ΠΈΠ΅ βββββββββββββββββββββββββββββββββββββββββββββββββ
from openai import OpenAI # openai-python β₯1.33.0
from pydub import AudioSegment
from playwright.sync_api import sync_playwright
# ββ Playwright Π±ΡΠ°ΡΠ·Π΅Ρ (ΡΠΊΠ°ΡΠΈΠ²Π°Π΅ΠΌ ΡΠΎΠ»ΡΠΊΠΎ ΠΎΠ΄ΠΈΠ½ ΡΠ°Π·) ββββββββββββ
_pw_flag = Path("/tmp/.pw_chrom_installed")
if not _pw_flag.exists():
subprocess.run(["playwright", "install", "chromium"], check=True)
_pw_flag.touch()
# ββ OpenAI client (ΠΊΠ»ΡΡ Π±Π΅ΡΡΡΡΡ ΠΈΠ· ΡΠ΅ΠΊΡΠ΅ΡΠ°) βββββββββββββββββββ
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# βββββββββββββββββββ PROMPT_JSON ββββββββββββββββββββββββββββ
PROMPT_JSON = textwrap.dedent(r"""
You are a presentation-outliner.
The user needs VALID json only β no extra commentary. (json!)
β¦ Rules
0. Total slides β€ 10 (including the title slide).
β’ If the source text is longer, merge or summarise content to stay within 10 slides.
1. First slide MUST be type "title":
{ "slide_idx":1, "type":"title", "title":"β¦", "body":"" }
(body may stay empty)
2. Prefer **"list"** whenever possible.
β’ Break sentences into concise bullet-points.
β’ Use "text" only when the content truly cannot be listed.
Allowed types:
"list" β array, β€ 5 items β _default choice_
"text" β short paragraph
"quote" β short quotation or bold statement
"code" β code block, copy verbatim from ``` fences
3. Preserve every ``` β¦ ``` code block unchanged.
4. Return this exact schema:
{
"slides":[
{ "slide_idx":N, "type":"β¦", "title":"β¦", "body":β¦ },
β¦ // β€ 9 more objects after the title slide
]
}
Output the json only.
""").strip()
# βββββββββββββββββ DETAILED_PROMPT ββββββββββββββββββββββββββ
DETAILED_PROMPT = textwrap.dedent(r"""
You are a friendly, motivational voice-over writer.
The user needs VALID json only β no extra commentary. (json!)
Source:
β’ "raw_text" β full original article
β’ "slides" β list of slide dictionaries (title, type, body)
Task for EACH slide in order:
β’ Write **at least two sentences** (β 25β60 words total).
β’ Use the slideβs visible content **and** extra context from raw_text.
β’ Keep a welcoming tone: encourage, explain, or add a useful tip.
β’ Mention code or quote briefly (βIn this code snippet youβll see β¦β).
β’ First slide β start with a warm greeting + slide title.
β’ Last slide β quick recap + short friendly goodbye.
Output exactly:
{ "narration":[ { "slide_idx":N, "voice_text":"..." }, β¦ ] }
""").strip()
# ββββββββββββββββββ Π²ΡΠΏΠΎΠΌΠΎΠ³Π°ΡΠ΅Π»ΡΠ½ΡΠ΅ ΡΡΠ½ΠΊΡΠΈΠΈ βββββββββββββββββ
def text_to_outline(raw: str) -> list:
rsp = client.chat.completions.create(
model="gpt-4o",
temperature=0.3,
response_format={"type":"json_object"},
messages=[
{"role":"system","content":PROMPT_JSON},
{"role":"user", "content":raw}
]
)
return json.loads(rsp.choices[0].message.content)["slides"]
def make_narration(raw: str, slides: list) -> list:
rsp = client.chat.completions.create(
model="gpt-4o",
temperature=0.7,
response_format={"type":"json_object"},
messages=[
{"role":"system","content":DETAILED_PROMPT},
{"role":"user", "content":json.dumps(
{"raw_text":raw,"slides":slides}, ensure_ascii=False)}
]
)
return json.loads(rsp.choices[0].message.content)["narration"]
def build_html(slide: dict) -> str:
import html as _h
t, body = slide["type"], slide["body"]
title = _h.escape(slide["title"])
if t=="title":
cont=f"<h1>{title}</h1>"
elif t=="list":
items="\n".join(f"<li>{_h.escape(str(it))}</li>" for it in body)
cont=f"<h1>{title}</h1><ul>{items}</ul>"
elif t=="quote":
cont=f"<blockquote>β{_h.escape(str(body))}β</blockquote>"
elif t=="code":
code=_h.escape(str(body).strip().lstrip("`").rstrip("`"))
cont=f"<h1>{title}</h1><pre><code>{code}</code></pre>"
else:
cont=f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"
HTML_WRAP = """
<!DOCTYPE html><html><head><meta charset="utf-8">
<link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
<style>
body{{margin:0;width:1280px;height:720px;display:flex;flex-direction:column;
justify-content:center;align-items:center;font-family:'PT Root UI',Arial,sans-serif;
background:#fff;color:#000}}
h1{{font-size:60px;margin:0 0 40px;color:#6C63FF}}
p{{font-size:36px;margin:0}}
ul{{font-size:34px;margin:0;padding-left:40px;text-align:left}}
li{{margin:12px 0}}
blockquote{{font-size:40px;font-style:italic;margin:0;border-left:6px solid #6C63FF;padding-left:24px}}
pre{{font-size:28px;margin:0;padding:24px;background:#f5f5f5;border-radius:10px;overflow-x:auto}}
</style></head><body><div style="max-width:1000px;text-align:center">{content}</div></body></html>
"""
return HTML_WRAP.format(content=cont)
# ββββββββββββββββββββ generate_video ββββββββββββββββββββββββ
def generate_video(text: str) -> str:
"""
ΠΡΠ·ΡΠ²Π°Π΅ΡΡΡ Gradio. ΠΠΎΠ·Π²ΡΠ°ΡΠ°Π΅Ρ ΠΏΡΡΡ ΠΊ MP4.
"""
work = Path(tempfile.mkdtemp(prefix="ppt2vid_"))
slides_dir = work / "slides"; audio_dir = work / "audio"
slides_dir.mkdir(); audio_dir.mkdir()
# 1. outline
slides = text_to_outline(text)
# 2. HTML β PNG
html_paths=[]
for s in slides:
h=slides_dir/f"slide_{s['slide_idx']:03}.html"
h.write_text(build_html(s),encoding="utf-8"); html_paths.append(h)
with sync_playwright() as p:
br=p.chromium.launch()
pg=br.new_page(viewport={"width":1280,"height":720})
pngs=[]
for h in html_paths:
pg.goto(h.as_uri())
img=h.with_suffix(".png"); pg.screenshot(path=img)
pngs.append(img)
br.close()
durations=[]
# 3. narration + TTS
narr=make_narration(text,slides)
wavs=[]
for n in narr:
wav=audio_dir/f"slide_{n['slide_idx']:03}.wav"
speech=client.audio.speech.create(
model="tts-1", voice="alloy",
input=n["voice_text"], response_format="wav")
speech.stream_to_file(wav); wavs.append(wav)
dur = AudioSegment.from_file(wav).duration_seconds
durations.append(round(dur,2))
combo=AudioSegment.empty()
for w in sorted(wavs):
combo += AudioSegment.from_file(w)
wav = audio_dir/"narration.wav"
combo.export(wav,"wav")
# 4. ffmpeg concat
concat=slides_dir/"slides.txt"
with concat.open("w") as f:
for img,d in zip(sorted(pngs),durations):
f.write(f"file '{img}'\n"); f.write(f"duration {d}\n")
f.write(f"file '{pngs[-1]}'\n")
mp4=work/"output.mp4"
subprocess.run([
"ffmpeg","-y","-f","concat","-safe","0","-i",concat,
"-i",wav,"-c:v","libx264","-pix_fmt","yuv420p",
"-c:a","aac","-shortest",mp4], check=True)
return str(mp4)
|