Spaces:

mariapar
/

hypervideo

Running

App Files Files Community

mariapar commited on May 28, 2025

Commit

275d87e

verified ·

1 Parent(s): 9a0391f

Update app_core.py

Browse files

Files changed (1) hide show

app_core.py +339 -104

app_core.py CHANGED Viewed

@@ -1,38 +1,65 @@
 """
-Однофайловый конвейер для Gradio:
-  raw text → outline → HTML+PNG → OpenAI‑TTS → MP4
-ВАЖНО: PROMPT_JSON и DETAILED_PROMPT оставлены 1‑в‑1.
-Запуск:
-    python app_core.py           # локальный запуск Gradio
-    # или
-    import app_core; app_core.generate_video(text)
 """
-# ─ стандартные ─
-import os, json, textwrap, subprocess, tempfile, shutil, html, asyncio
 from pathlib import Path
 from datetime import datetime
-# ─ third‑party ─
-import openai                      # ≥ 1.33.0
 from openai import OpenAI
-from pydub import AudioSegment
-from playwright.sync_api import sync_playwright
-import gradio as gr
-# ─────────────────────────────────────────────────────────────
-# 0. Playwright браузер (устанавливаем 1 раз без sudo)
-# ─────────────────────────────────────────────────────────────
-_pw_flag = Path("/tmp/.pw_chromium_installed")
-if not _pw_flag.exists():
-    subprocess.run(["playwright", "install", "chromium"], check=True)
-    _pw_flag.touch()
-# ─────────────────────────────────────────────────────────────
-# 1. System prompts (оставлены без изменений)
-# ─────────────────────────────────────────────────────────────
 PROMPT_JSON = textwrap.dedent("""
     You are a presentation-outliner.
     The user needs VALID json only — no extra commentary. (json!)
@@ -68,38 +95,129 @@ PROMPT_JSON = textwrap.dedent("""
 """).strip()
-DETAILED_PROMPT = textwrap.dedent("""
-  You are a friendly, motivational voice-over writer.
-  The user needs VALID json only — no extra commentary. (json!)
-  Source:
-    • "raw_text"  — full original article
-    • "slides"    — list of slide dictionaries (title, type, body)
-  Task for EACH slide in order:
-    • Write **at least two sentences** (≈ 25–60 words total).
-    • Use the slide’s visible content **and** extra context from raw_text.
-    • Keep a welcoming tone: encourage, explain, or add a useful tip.
-    • Mention code or quote briefly (“In this code snippet you’ll see …”).
-    • First slide  → start with a warm greeting + slide title.
-    • Last slide   → quick recap + short friendly goodbye.
-  Output exactly:
-    { "narration":[ { "slide_idx":N, "voice_text":"..." }, … ] }
-""").strip()
-# ─────────────────────────────────────────────────────────────
-# 2. HTML/CSS шаблон слайдов (без изменений)
-# ─────────────────────────────────────────────────────────────
 HTML_BASE = """
 <!DOCTYPE html>
 <html>
 <head>
-  <meta charset=\"utf-8\">
   <title>{title}</title>
   <!-- Hyperskill brand-like styling -->
-  <link href=\"https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap\" rel=\"stylesheet\">
   <style>
     :root {{
@@ -146,83 +264,104 @@ HTML_BASE = """
   </style>
 </head>
 <body>
-  <div class=\"wrap\">
     {content}
   </div>
 </body>
 </html>
 """
-# ─────────────────────────────────────────────────────────────
-# 3. Функции конвейера
-# ─────────────────────────────────────────────────────────────
-client = OpenAI()   # ключ берётся из OPENAI_API_KEY
-def text_to_outline(raw_text: str, model: str = "gpt-4o") -> list:
-    """GPT → список словарей слайдов"""
-    resp = client.chat.completions.create(
-        model=model,
-        temperature=0.3,
-        response_format={"type": "json_object"},
-        messages=[
-            {"role": "system", "content": PROMPT_JSON},
-            {"role": "user",   "content": raw_text}
-        ],
-        max_tokens=2048,
-    )
-    slides = json.loads(resp.choices[0].message.content)["slides"]
-    return slides
 def build_slide_html(slide: dict) -> str:
     t, body = slide["type"], slide["body"]
-    title   = html.escape(slide["title"])
     if t == "title":
         content = f"<h1>{title}</h1>"
     elif t == "list":
-        items = "\n".join(f"<li>{html.escape(str(it))}</li>" for it in body)
         content = f"<h1>{title}</h1><ul>{items}</ul>"
     elif t == "quote":
-        content = f"<blockquote>“{html.escape(str(body))}”</blockquote>"
     elif t == "code":
-        code = html.escape(str(body).strip().lstrip("`").rstrip("`"))
         content = f"<h1>{title}</h1><pre><code>{code}</code></pre>"
-    else:                                   # text
-        content = f"<h1>{title}</h1><p>{html.escape(str(body))}</p>"
     return HTML_BASE.format(title=title, content=content)
-def save_html(slides: list, slides_dir: Path) -> list:
-    html_paths = []
-    for s in slides:
-        f = slides_dir / f"slide_{s['slide_idx']:03}.html"
-        f.write_text(build_slide_html(s), encoding="utf-8")
-        html_paths.append(f)
-    return html_paths
-def html_to_png(html_paths: list):
-    png_paths = []
-    with sync_playwright() as p:
-        browser = p.chromium.launch()
-        page = browser.new_page(viewport={"width":1280, "height":720})
-        for f in html_paths:
-            page.goto(f.as_uri())
             png_path = f.with_suffix(".png")
-            page.screenshot(path=png_path)
-            png_paths.append(png_path)
-        browser.close()
-    return png_paths
-def generate_narration(raw_text: str, slides: list, model: str = "gpt-4o") -> list:
     resp = client.chat.completions.create(
         model=model,
         temperature=0.8,
-        response_format={"type": "json_object"},
         messages=[
             {"role": "system", "content": DETAILED_PROMPT},
             {"role": "user",   "content": json.dumps({
@@ -232,14 +371,110 @@ def generate_narration(raw_text: str, slides: list, model: str = "gpt-4o") -> li
         ],
         max_tokens=2048,
     )
-    return json.loads(resp.choices[0].message.content)["narration"]
-def tts_narration(narration_list: list, audio_dir: Path):
-    audio_dir.mkdir(exist_ok=True)
-    wav_paths, durations = [], []
-    for item in narration_list:
-        idx, text = item["slide_idx"], item["voice_text"]
-        speech = client.audio.speech.create(
-            model="

 """
+app_core.py
+-----------
+Обёртка для Gradio: функция generate_video(text) возвращает
+путь к MP4. Весь ваш существующий код вставляется внутрь
+комментария  # <<< YOUR PIPELINE >>>  без изменений.
+"""
+from pathlib import Path
+import tempfile
+def generate_video(text: str) -> str:
+    """
+    Главная точка входа для Gradio.
+    Принимает сырой текст, запускает ваш скрипт,
+    возвращает абсолютный путь к сгенерированному MP4.
+    """
+    # — создаём рабочую временную папку (не трогайте, если не нужно) —
+    work_dir = Path(tempfile.mkdtemp(prefix="ppt2vid_"))
+    # -*- coding: utf-8 -*-
+"""AI presentation Generator.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/11RrHdQUWEiajChTVrryhcnbc4LUkgWUA
 """
+# 🐍 0-A. Устанавливаем и импортируем
+!pip install --quiet --upgrade openai playwright
+!playwright install chromium     # нужен для «скриншотов» слайдов позже
+!apt-get -y install ffmpeg       # для финального рендеринга видео
+import os, json, textwrap, openai
 from pathlib import Path
 from datetime import datetime
+RUN_ID   = datetime.now().strftime("%Y%m%d_%H%M%S")      # уникальный ID
+BASE_DIR = Path(f"/content/run_{RUN_ID}")                #
+SLIDES_DIR = BASE_DIR / "slides"
+AUDIO_DIR  = BASE_DIR / "audio"
+SLIDES_DIR.mkdir(parents=True, exist_ok=True)
+AUDIO_DIR.mkdir(parents=True, exist_ok=True)
+print("Current run folder ⇒", BASE_DIR)
+# 🗝️ 0-B. Ключ OpenAI —
+openai.api_key = os.getenv("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")
+print("✓ Environment is ready")
+# 1. Text -> HTML
+!pip install --quiet --upgrade "openai>=1.33.0"
+import os, json, textwrap
 from openai import OpenAI
+client = OpenAI()
+os.environ["OPENAI_API_KEY"] = "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"
+# System prompt
 PROMPT_JSON = textwrap.dedent("""
     You are a presentation-outliner.
     The user needs VALID json only — no extra commentary. (json!)
 """).strip()
+def text_to_outline(raw_text: str,
+                    model: str = "gpt-4o"):
+    """Return structured slide list and print it."""
+    resp = client.chat.completions.create(
+        model=model,
+        temperature=0.3,
+        response_format={"type": "json_object"},
+        messages=[
+            {"role": "system", "content": PROMPT_JSON},
+            {"role": "user",   "content": raw_text}
+        ],
+        max_tokens=2048,
+    )
+    slides = json.loads(resp.choices[0].message.content)["slides"]
+    print("=== SLIDE OUTLINE ===")
+    print(json.dumps(slides, indent=2, ensure_ascii=False))
+    return slides
+# ——— DEMO Text
+demo_text = '''
+Programs in which there's nothing to calculate are quite rare. Therefore, learning to program with numbers is never a bad idea. An even more valuable skill we are about to learn is the processing of user data. With its help, you can create interactive and by far more flexible applications. So let's get started!
+Reading numbers from user input
+Since you have become familiar with the input() function in Python, it's hardly new to you that any data passed to this function is treated as a string. But how should we deal with numerical values? As a general rule, they are explicitly converted to corresponding numerical types:
+integer = int(input())
+floating_point = float(input())
+Pay attention to current best practices: it's crucial not to name your variables as built-in types (say, float or int). Also, we should take into account user mistakes: if a user types an inaccurate input, say, a string 'two' instead of a number 2, a ValueError will occur. At the moment, we won't focus on it; but don't worry, more information about errors is available in a dedicated topic. Now, consider a more detailed and pragmatic example of handling numerical inputs.
+Free air miles
+Imagine you have a credit card with a free air miles bonus program (or maybe you already have one). As a user, you are expected to input the amount of money you spend on average from this card per month. Let's assume that the bonus program gives you 2 free air miles for every dollar you spend. Here's a simple program to figure out when you can travel somewhere for free:
+# the average amount of money per month
+money = int(input("How much money do you spend per month: "))
+# the number of miles per unit of money
+n_miles = 2
+# earned miles
+miles_per_month = money * n_miles
+# the distance between London and Paris
+distance = 215
+# how many months do you need to get
+# a free trip from London to Paris and back
+print(distance * 2 / miles_per_month)
+This program will calculate how many months it takes to travel the selected distance and back.
+Although it is recommended to write messages for users in the input() function, avoid them in our educational programming challenges, otherwise your code may not pass our tests.
+Advanced forms of assignment
+Whenever you use an equal sign =, you actually assign some value to a variable. For that reason, = is typically referred to as an assignment operator. Meanwhile, there are other assignment operators you can use in Python. They are also called compound assignment operators, for they carry out an arithmetic operation and assignment in one step. Have a look at the code snippet below:
+# simple assignment
+number = 10
+number = number + 1  # 11
+This code is equivalent to the following one:
+# compound assignment
+number = 10
+number += 1  # 11
+One can clearly see from the example that the second piece of code is more concise (for it doesn't repeat the variable's name).
+Naturally, similar assignment forms exist for the rest of arithmetic operations: -=, *=, /=, //=, %=, **=. Given the opportunity, use them to save time and effort.
+One possible application of compound assignment comes next.
+Counter variable
+In programming, there is a concept called loop. It is used to repeat some block of code a certain number of times. Pretty often they have special variables called counters alongside them. A counter, as the name presupposes, counts something: how many times a condition is met, how many elements in the sequence, etc. Hence, counters should be integers. Now we are getting to the point: you can use the operators += and -= to increase or decrease the counter respectively.
+Consider this example where a user determines the value by which the counter is increased:
+counter = 1
+step = int(input())  # let it be 3
+counter += step
+print(counter)  # it should be 4
+In case you need only non-negative integers from the user (we are increasing the counter after all!), you can prevent incorrect inputs by using the abs() function. It is a Python built-in function that returns the absolute value of a number (that is, value regardless of its sign). Let's readjust our last program a bit:
+counter = 1
+step = abs(int(input()))  # user types -3
+counter += step
+print(counter)  # it's still 4
+As you can see, thanks to the abs() function we got a positive number.
+For now, it's all right that you do not know much about the mentioned details of errors, loops, and built-in functions in Python. We will catch up and make sure that you know these topics comprehensively. Keep learning!
+Summary
+Thus, we have shed some light on new details about integer arithmetic and the processing of numerical inputs in Python. Feel free to use them in your future projects. In this topic, we discussed:
+how to read numbers from the user input;
+how to assign numbers to variables and use arithmetic operators to assign the result of the calculation;
+what counters are and when they are used.
+'''
+slides = text_to_outline(demo_text)
+from datetime import datetime
+from pathlib import Path
+import html as _h
+# 0. Уникальный каталог для каждого запуска
+RUN_ID   = datetime.now().strftime("%Y%m%d_%H%M%S")
+BASE_DIR = Path(f"/content/run_{RUN_ID}")
+SLIDES_DIR = BASE_DIR / "slides"
+SLIDES_DIR.mkdir(parents=True, exist_ok=True)
+print("Files will be saved to", SLIDES_DIR)
+# 1. Мини-шаблон HTML (белый фон, чёрный текст)
 HTML_BASE = """
 <!DOCTYPE html>
 <html>
 <head>
+  <meta charset="utf-8">
   <title>{title}</title>
   <!-- Hyperskill brand-like styling -->
+  <link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
   <style>
     :root {{
   </style>
 </head>
 <body>
+  <div class="wrap">
     {content}
   </div>
 </body>
 </html>
 """
 def build_slide_html(slide: dict) -> str:
+    import html as _h
     t, body = slide["type"], slide["body"]
+    title   = _h.escape(slide["title"])
     if t == "title":
         content = f"<h1>{title}</h1>"
     elif t == "list":
+        items = "\n".join(f"<li>{_h.escape(str(it))}</li>" for it in body)
         content = f"<h1>{title}</h1><ul>{items}</ul>"
     elif t == "quote":
+        content = f"<blockquote>“{_h.escape(str(body))}”</blockquote>"
     elif t == "code":
+        code = _h.escape(str(body).strip().lstrip("`").rstrip("`"))
         content = f"<h1>{title}</h1><pre><code>{code}</code></pre>"
+    else:                                       # text
+        content = f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"
     return HTML_BASE.format(title=title, content=content)
+# 2. Сохраняем HTML-слайды в новую папку
+html_paths = []
+for s in slides:
+    f = SLIDES_DIR / f"slide_{s['slide_idx']:03}.html"
+    f.write_text(build_slide_html(s), encoding="utf-8")
+    html_paths.append(f)
+    print("saved →", f.name)
+print(f"✓ {len(html_paths)} HTML files saved to {SLIDES_DIR}")
+# ────────────────────────────────────────────────────────────
+# 2. HTML → PNG
+# ──────────────────────────────────��─────────────────────────
+from pathlib import Path
+import html, textwrap
+from playwright.async_api import async_playwright
+html_paths = sorted(SLIDES_DIR.glob("slide_*.html"))
+assert html_paths, "❌ No HTML files found — generate them first!"
+async def render_pngs(paths):
+    async with async_playwright() as p:
+        browser = await p.chromium.launch()
+        page = await browser.new_page(viewport={"width":1280,"height":720})
+        for f in paths:
+            await page.goto(f.as_uri())
             png_path = f.with_suffix(".png")
+            await page.screenshot(path=png_path)
+            print("  →", png_path.name)
+        await browser.close()
+await render_pngs(html_paths)   # ← верхнеуровневый await
+print("✓ PNG generation complete — open /content/slides")
+# Demo text -> Comments
+import json, textwrap
+from openai import OpenAI
+client = OpenAI()          # API-ключ уже задан в окружении
+raw_text = demo_text
+DETAILED_PROMPT = textwrap.dedent("""
+  You are a friendly, motivational voice-over writer.
+  The user needs VALID json only — no extra commentary. (json!)
+  Source:
+    • "raw_text"  — full original article
+    • "slides"    — list of slide dictionaries (title, type, body)
+  Task for EACH slide in order:
+    • Write **at least two sentences** (≈ 25–60 words total).
+    • Use the slide’s visible content **and** extra context from raw_text.
+    • Keep a welcoming tone: encourage, explain, or add a useful tip.
+    • Mention code or quote briefly (“In this code snippet you’ll see …”).
+    • First slide  → start with a warm greeting + slide title.
+    • Last slide   → quick recap + short friendly goodbye.
+  Output exactly:
+    { "narration":[ { "slide_idx":N, "voice_text":"..." }, … ] }
+""").strip()
+def generate_detailed_narration(raw_text: str,
+                                slides: list,
+                                model: str = "gpt-4o"):
+    """Return narration list; print for review."""
     resp = client.chat.completions.create(
         model=model,
         temperature=0.8,
+        response_format={ "type": "json_object" },
         messages=[
             {"role": "system", "content": DETAILED_PROMPT},
             {"role": "user",   "content": json.dumps({
         ],
         max_tokens=2048,
     )
+    narration = json.loads(resp.choices[0].message.content)["narration"]
+    print("=== DETAILED NARRATION ===")
+    for n in narration:
+        print(f"[Slide {n['slide_idx']}] {n['voice_text']}\n")
+    return narration
+# ▸ запускаем с текущими raw_text и slides
+narration_list = generate_detailed_narration(raw_text, slides)
+# ──────────────────────────────────────────────────────────────
+# 4. Text-to-Speech  ➜  per-slide WAV  ➜  narration.wav
+# ──────────────────────────────────────────────────────────────
+!pip install --quiet --upgrade --no-cache-dir "openai>=1.33.0"
+!pip install --quiet pydub
+import importlib, json, os, openai
+openai = importlib.reload(openai)
+print("OpenAI SDK version:", openai.__version__)
+from openai import OpenAI
+from pathlib import Path
+from pydub import AudioSegment
+client = OpenAI()                           # ключ берётся из окружения
+# — исходные данные
+assert 'narration_list' in globals(), "narration_list not found"
+print(f"{len(narration_list)} slides to voice.")
+AUDIO_DIR.mkdir(exist_ok=True)
+wav_paths, durations = [], []
+for item in narration_list:
+    idx, text = item["slide_idx"], item["voice_text"]
+    print(f"🔊 Slide {idx}: synthesizing…")
+    # правильный параметр — response_format
+    speech = client.audio.speech.create(
+        model="tts-1",
+        voice="alloy",
+        input=text,
+        response_format="wav"       # ← теперь корректно
+    )
+    wav_path = AUDIO_DIR / f"slide_{idx:03}.wav"
+    speech.stream_to_file(wav_path)
+    wav_paths.append(wav_path)
+    snd = AudioSegment.from_file(wav_path)
+    durations.append(round(snd.duration_seconds, 2))
+print(f"✓ {len(wav_paths)} WAV files saved to {AUDIO_DIR}")
+# — склейка
+combined = AudioSegment.empty()
+for w in sorted(wav_paths):
+    combined += AudioSegment.from_file(w)
+final_wav = AUDIO_DIR / "narration.wav"
+combined.export(final_wav, format="wav")
+print(f"✓ Combined audio saved as {final_wav}")
+# --- отчёт ---
+for i, d in enumerate(durations, 1):
+    print(f"  slide_{i:03}: {d}s")
+print("✓ TTS stage complete — ready for ffmpeg video assembly")
+# === Ячейка: сборка видео (PNG + narration.wav → MP4) =====================
+import subprocess
+from pathlib import Path
+# 0) Проверяем входные файлы
+png_files   = sorted(SLIDES_DIR.glob("slide_*.png"))
+assert png_files, "❌ PNG slides not found"
+assert (AUDIO_DIR / "narration.wav").exists(), "❌ narration.wav missing"
+assert 'durations' in globals(), "❌ durations[] list not found"
+# 1) slides.txt для ffmpeg (лежит в той же папке, что PNG)
+concat_file = SLIDES_DIR / "slides.txt"
+with concat_file.open("w") as f:
+    for img, dur in zip(png_files, durations):
+        f.write(f"file '{img}'\n")
+        f.write(f"duration {dur}\n")
+    f.write(f"file '{png_files[-1]}'\n")      # повторяем последний кадр
+print("✓ slides.txt created →", concat_file)
+# 2) Финальный MP4 в папке текущего запуска
+output_mp4 = BASE_DIR / "output.mp4"          # ← теперь в BASE_DIR
+ffmpeg_cmd = [
+    "ffmpeg", "-y",
+    "-f", "concat", "-safe", "0", "-i", str(concat_file),
+    "-i", str(AUDIO_DIR / "narration.wav"),
+    "-c:v", "libx264", "-pix_fmt", "yuv420p",
+    "-c:a", "aac", "-shortest",
+    str(output_mp4)
+]
+print("🔧 Running ffmpeg …")
+subprocess.run(ffmpeg_cmd, check=True)
+print("✓ Video saved to", output_mp4)
+# ==========================================================================
+    # ↓↓↓ ничего ниже не трогайте
+    return str(output_mp4)