Spaces:

mariapar
/

hypervideo

Sleeping

App Files Files Community

hypervideo / app_core.py

mariapar

Update app_core.py

a1c8c28 verified 7 months ago

raw

history blame contribute delete

8.46 kB

	"""
	app_core.py
	-----------
	Функция generate_video(text) → абсолютный путь к готовому MP4.

	❗ Промпты PROMPT_JSON и DETAILED_PROMPT скопированы без изменений
	из вашего исходного ноутбука.
	"""

	# ── стандартные ───────────────────────────────────────────────
	import os, json, textwrap, subprocess, tempfile
	from pathlib import Path
	from datetime import datetime

	# ── сторонние ─────────────────────────────────────────────────
	from openai import OpenAI # openai-python ≥1.33.0
	from pydub import AudioSegment
	from playwright.sync_api import sync_playwright

	# ── Playwright браузер (скачиваем только один раз) ────────────
	_pw_flag = Path("/tmp/.pw_chrom_installed")
	if not _pw_flag.exists():
	subprocess.run(["playwright", "install", "chromium"], check=True)
	_pw_flag.touch()

	# ── OpenAI client (ключ берётся из секрета) ───────────────────
	client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

	# ─────────────────── PROMPT_JSON ────────────────────────────
	PROMPT_JSON = textwrap.dedent(r"""
	You are a presentation-outliner.
	The user needs VALID json only — no extra commentary. (json!)

	✦ Rules
	0. Total slides ≤ 10 (including the title slide).
	• If the source text is longer, merge or summarise content to stay within 10 slides.

	1. First slide MUST be type "title":
	{ "slide_idx":1, "type":"title", "title":"…", "body":"" }
	(body may stay empty)

	2. Prefer "list" whenever possible.
	• Break sentences into concise bullet-points.
	• Use "text" only when the content truly cannot be listed.
	Allowed types:
	"list" – array, ≤ 5 items ← _default choice_
	"text" – short paragraph
	"quote" – short quotation or bold statement
	"code" – code block, copy verbatim from ``` fences

	3. Preserve every ``` … ``` code block unchanged.

	4. Return this exact schema:
	{
	"slides":[
	{ "slide_idx":N, "type":"…", "title":"…", "body":… },
	… // ≤ 9 more objects after the title slide
	]
	}

	Output the json only.
	""").strip()

	# ───────────────── DETAILED_PROMPT ──────────────────────────
	DETAILED_PROMPT = textwrap.dedent(r"""
	You are a friendly, motivational voice-over writer.
	The user needs VALID json only — no extra commentary. (json!)

	Source:
	• "raw_text" — full original article
	• "slides" — list of slide dictionaries (title, type, body)

	Task for EACH slide in order:
	• Write at least two sentences (≈ 25–60 words total).
	• Use the slide’s visible content and extra context from raw_text.
	• Keep a welcoming tone: encourage, explain, or add a useful tip.
	• Mention code or quote briefly (“In this code snippet you’ll see …”).
	• First slide → start with a warm greeting + slide title.
	• Last slide → quick recap + short friendly goodbye.

	Output exactly:
	{ "narration":[ { "slide_idx":N, "voice_text":"..." }, … ] }
	""").strip()

	# ────────────────── вспомогательные функции ─────────────────

	def text_to_outline(raw: str) -> list:
	rsp = client.chat.completions.create(
	model="gpt-4o",
	temperature=0.3,
	response_format={"type":"json_object"},
	messages=[
	{"role":"system","content":PROMPT_JSON},
	{"role":"user", "content":raw}
	]
	)
	return json.loads(rsp.choices[0].message.content)["slides"]

	def make_narration(raw: str, slides: list) -> list:
	rsp = client.chat.completions.create(
	model="gpt-4o",
	temperature=0.7,
	response_format={"type":"json_object"},
	messages=[
	{"role":"system","content":DETAILED_PROMPT},
	{"role":"user", "content":json.dumps(
	{"raw_text":raw,"slides":slides}, ensure_ascii=False)}
	]
	)
	return json.loads(rsp.choices[0].message.content)["narration"]

	def build_html(slide: dict) -> str:
	import html as _h
	t, body = slide["type"], slide["body"]
	title = _h.escape(slide["title"])
	if t=="title":
	cont=f"<h1>{title}</h1>"
	elif t=="list":
	items="\n".join(f"<li>{_h.escape(str(it))}</li>" for it in body)
	cont=f"<h1>{title}</h1><ul>{items}</ul>"
	elif t=="quote":
	cont=f"<blockquote>“{_h.escape(str(body))}”</blockquote>"
	elif t=="code":
	code=_h.escape(str(body).strip().lstrip("`").rstrip("`"))
	cont=f"<h1>{title}</h1><pre><code>{code}</code></pre>"
	else:
	cont=f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"

	HTML_WRAP = """
	<!DOCTYPE html><html><head><meta charset="utf-8">
	<link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
	<style>
	body{{margin:0;width:1280px;height:720px;display:flex;flex-direction:column;
	justify-content:center;align-items:center;font-family:'PT Root UI',Arial,sans-serif;
	background:#fff;color:#000}}
	h1{{font-size:60px;margin:0 0 40px;color:#6C63FF}}
	p{{font-size:36px;margin:0}}
	ul{{font-size:34px;margin:0;padding-left:40px;text-align:left}}
	li{{margin:12px 0}}
	blockquote{{font-size:40px;font-style:italic;margin:0;border-left:6px solid #6C63FF;padding-left:24px}}
	pre{{font-size:28px;margin:0;padding:24px;background:#f5f5f5;border-radius:10px;overflow-x:auto}}
	</style></head><body><div style="max-width:1000px;text-align:center">{content}</div></body></html>
	"""
	return HTML_WRAP.format(content=cont)

	# ──────────────────── generate_video ────────────────────────
	def generate_video(text: str) -> str:
	"""
	Вызывается Gradio. Возвращает путь к MP4.
	"""
	work = Path(tempfile.mkdtemp(prefix="ppt2vid_"))
	slides_dir = work / "slides"; audio_dir = work / "audio"
	slides_dir.mkdir(); audio_dir.mkdir()

	# 1. outline
	slides = text_to_outline(text)

	# 2. HTML → PNG
	html_paths=[]
	for s in slides:
	h=slides_dir/f"slide_{s['slide_idx']:03}.html"
	h.write_text(build_html(s),encoding="utf-8"); html_paths.append(h)

	with sync_playwright() as p:
	br=p.chromium.launch()
	pg=br.new_page(viewport={"width":1280,"height":720})
	pngs=[]
	for h in html_paths:
	pg.goto(h.as_uri())
	img=h.with_suffix(".png"); pg.screenshot(path=img)
	pngs.append(img)
	br.close()
	durations=[]

	# 3. narration + TTS
	narr=make_narration(text,slides)
	wavs=[]
	for n in narr:
	wav=audio_dir/f"slide_{n['slide_idx']:03}.wav"
	speech=client.audio.speech.create(
	model="tts-1", voice="alloy",
	input=n["voice_text"], response_format="wav")
	speech.stream_to_file(wav); wavs.append(wav)

	dur = AudioSegment.from_file(wav).duration_seconds
	durations.append(round(dur,2))

	combo=AudioSegment.empty()
	for w in sorted(wavs):
	combo += AudioSegment.from_file(w)
	wav = audio_dir/"narration.wav"
	combo.export(wav,"wav")

	# 4. ffmpeg concat
	concat=slides_dir/"slides.txt"
	with concat.open("w") as f:
	for img,d in zip(sorted(pngs),durations):
	f.write(f"file '{img}'\n"); f.write(f"duration {d}\n")
	f.write(f"file '{pngs[-1]}'\n")

	mp4=work/"output.mp4"
	subprocess.run([
	"ffmpeg","-y","-f","concat","-safe","0","-i",concat,
	"-i",wav,"-c:v","libx264","-pix_fmt","yuv420p",
	"-c:a","aac","-shortest",mp4], check=True)

	return str(mp4)