File size: 8,455 Bytes
76160db
275d87e
 
a1c8c28
 
 
 
275d87e
 
a1c8c28
 
275d87e
b354735
275d87e
a1c8c28
 
b354735
 
76160db
a1c8c28
b354735
 
 
 
b245107
a1c8c28
 
76160db
a1c8c28
 
9a0391f
1806ec2
 
 
 
 
 
 
 
 
 
 
a1c8c28
 
 
 
 
 
 
1806ec2
 
 
 
 
 
 
 
 
 
 
 
76160db
 
a1c8c28
 
275d87e
 
 
 
 
 
b245107
275d87e
 
 
 
 
 
 
 
 
 
 
 
a1c8c28
 
 
 
 
 
 
b245107
a1c8c28
 
 
c45af57
a1c8c28
275d87e
a1c8c28
 
 
 
 
 
 
 
 
 
275d87e
a1c8c28
275d87e
a1c8c28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275d87e
a1c8c28
 
 
 
 
 
 
 
275d87e
a1c8c28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
"""
app_core.py
-----------
Ѐункция generate_video(text) β†’ Π°Π±ΡΠΎΠ»ΡŽΡ‚Π½Ρ‹ΠΉ ΠΏΡƒΡ‚ΡŒ ΠΊ Π³ΠΎΡ‚ΠΎΠ²ΠΎΠΌΡƒ MP4.

❗ ΠŸΡ€ΠΎΠΌΠΏΡ‚Ρ‹ PROMPT_JSON ΠΈ DETAILED_PROMPT скопированы Π±Π΅Π· ΠΈΠ·ΠΌΠ΅Π½Π΅Π½ΠΈΠΉ
   ΠΈΠ· вашСго исходного Π½ΠΎΡƒΡ‚Π±ΡƒΠΊΠ°.
"""

# ── стандартныС ───────────────────────────────────────────────
import os, json, textwrap, subprocess, tempfile
from pathlib import Path
from datetime import datetime

# ── сторонниС ─────────────────────────────────────────────────
from openai import OpenAI               # openai-python β‰₯1.33.0
from pydub import AudioSegment
from playwright.sync_api import sync_playwright

# ── Playwright Π±Ρ€Π°ΡƒΠ·Π΅Ρ€ (скачиваСм Ρ‚ΠΎΠ»ΡŒΠΊΠΎ ΠΎΠ΄ΠΈΠ½ Ρ€Π°Π·) ────────────
_pw_flag = Path("/tmp/.pw_chrom_installed")
if not _pw_flag.exists():
    subprocess.run(["playwright", "install", "chromium"], check=True)
    _pw_flag.touch()

# ── OpenAI client (ΠΊΠ»ΡŽΡ‡ бСрётся ΠΈΠ· сСкрСта) ───────────────────
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# ───────────────────  PROMPT_JSON  ────────────────────────────
PROMPT_JSON = textwrap.dedent(r"""
    You are a presentation-outliner.
    The user needs VALID json only β€” no extra commentary. (json!)

    ✦ Rules
      0. Total slides ≀ 10 (including the title slide).
        β€’ If the source text is longer, merge or summarise content to stay within 10 slides.

      1. First slide MUST be type "title":
          { "slide_idx":1, "type":"title", "title":"…", "body":"" }
        (body may stay empty)

      2. Prefer **"list"** whenever possible.
        β€’ Break sentences into concise bullet-points.
        β€’ Use "text" only when the content truly cannot be listed.
        Allowed types:
          "list"  – array, ≀ 5 items   ← _default choice_
          "text"  – short paragraph
          "quote" – short quotation or bold statement
          "code"  – code block, copy verbatim from ``` fences

      3. Preserve every ``` … ``` code block unchanged.

      4. Return this exact schema:
          {
            "slides":[
              { "slide_idx":N, "type":"…", "title":"…", "body":… },
              …               // ≀ 9 more objects after the title slide
            ]
          }

    Output the json only.
""").strip()

# ─────────────────  DETAILED_PROMPT  ──────────────────────────
DETAILED_PROMPT = textwrap.dedent(r"""
  You are a friendly, motivational voice-over writer.
  The user needs VALID json only β€” no extra commentary. (json!)

  Source:
    β€’ "raw_text"  β€” full original article
    β€’ "slides"    β€” list of slide dictionaries (title, type, body)

  Task for EACH slide in order:
    β€’ Write **at least two sentences** (β‰ˆ 25–60 words total).
    β€’ Use the slide’s visible content **and** extra context from raw_text.
    β€’ Keep a welcoming tone: encourage, explain, or add a useful tip.
    β€’ Mention code or quote briefly (β€œIn this code snippet you’ll see …”).
    β€’ First slide  β†’ start with a warm greeting + slide title.
    β€’ Last slide   β†’ quick recap + short friendly goodbye.

  Output exactly:
    { "narration":[ { "slide_idx":N, "voice_text":"..." }, … ] }
""").strip()

# ──────────────────  Π²ΡΠΏΠΎΠΌΠΎΠ³Π°Ρ‚Π΅Π»ΡŒΠ½Ρ‹Π΅ Ρ„ΡƒΠ½ΠΊΡ†ΠΈΠΈ  ─────────────────

def text_to_outline(raw: str) -> list:
    rsp = client.chat.completions.create(
        model="gpt-4o",
        temperature=0.3,
        response_format={"type":"json_object"},
        messages=[
            {"role":"system","content":PROMPT_JSON},
            {"role":"user",  "content":raw}
        ]
    )
    return json.loads(rsp.choices[0].message.content)["slides"]

def make_narration(raw: str, slides: list) -> list:
    rsp = client.chat.completions.create(
        model="gpt-4o",
        temperature=0.7,
        response_format={"type":"json_object"},
        messages=[
            {"role":"system","content":DETAILED_PROMPT},
            {"role":"user",  "content":json.dumps(
                {"raw_text":raw,"slides":slides}, ensure_ascii=False)}
        ]
    )
    return json.loads(rsp.choices[0].message.content)["narration"]

def build_html(slide: dict) -> str:
    import html as _h
    t, body = slide["type"], slide["body"]
    title   = _h.escape(slide["title"])
    if t=="title":
        cont=f"<h1>{title}</h1>"
    elif t=="list":
        items="\n".join(f"<li>{_h.escape(str(it))}</li>" for it in body)
        cont=f"<h1>{title}</h1><ul>{items}</ul>"
    elif t=="quote":
        cont=f"<blockquote>β€œ{_h.escape(str(body))}”</blockquote>"
    elif t=="code":
        code=_h.escape(str(body).strip().lstrip("`").rstrip("`"))
        cont=f"<h1>{title}</h1><pre><code>{code}</code></pre>"
    else:
        cont=f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"

    HTML_WRAP = """
    <!DOCTYPE html><html><head><meta charset="utf-8">
    <link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
    <style>
     body{{margin:0;width:1280px;height:720px;display:flex;flex-direction:column;
          justify-content:center;align-items:center;font-family:'PT Root UI',Arial,sans-serif;
          background:#fff;color:#000}}
     h1{{font-size:60px;margin:0 0 40px;color:#6C63FF}}
     p{{font-size:36px;margin:0}}
     ul{{font-size:34px;margin:0;padding-left:40px;text-align:left}}
     li{{margin:12px 0}}
     blockquote{{font-size:40px;font-style:italic;margin:0;border-left:6px solid #6C63FF;padding-left:24px}}
     pre{{font-size:28px;margin:0;padding:24px;background:#f5f5f5;border-radius:10px;overflow-x:auto}}
    </style></head><body><div style="max-width:1000px;text-align:center">{content}</div></body></html>
    """
    return HTML_WRAP.format(content=cont)

# ────────────────────  generate_video  ────────────────────────
def generate_video(text: str) -> str:
    """
    ВызываСтся Gradio. Π’ΠΎΠ·Π²Ρ€Π°Ρ‰Π°Π΅Ρ‚ ΠΏΡƒΡ‚ΡŒ ΠΊ MP4.
    """
    work = Path(tempfile.mkdtemp(prefix="ppt2vid_"))
    slides_dir = work / "slides"; audio_dir = work / "audio"
    slides_dir.mkdir(); audio_dir.mkdir()

    # 1. outline
    slides = text_to_outline(text)

    # 2. HTML β†’ PNG
    html_paths=[]
    for s in slides:
        h=slides_dir/f"slide_{s['slide_idx']:03}.html"
        h.write_text(build_html(s),encoding="utf-8"); html_paths.append(h)

    with sync_playwright() as p:
        br=p.chromium.launch()
        pg=br.new_page(viewport={"width":1280,"height":720})
        pngs=[]
        for h in html_paths:
            pg.goto(h.as_uri())
            img=h.with_suffix(".png"); pg.screenshot(path=img)
            pngs.append(img)
        br.close()
    durations=[]

    # 3. narration + TTS
    narr=make_narration(text,slides)
    wavs=[]
    for n in narr:
        wav=audio_dir/f"slide_{n['slide_idx']:03}.wav"
        speech=client.audio.speech.create(
            model="tts-1", voice="alloy",
            input=n["voice_text"], response_format="wav")
        speech.stream_to_file(wav); wavs.append(wav)

        dur = AudioSegment.from_file(wav).duration_seconds
        durations.append(round(dur,2))

    combo=AudioSegment.empty()
    for w in sorted(wavs):
        combo += AudioSegment.from_file(w)
    wav = audio_dir/"narration.wav"
    combo.export(wav,"wav")

    # 4. ffmpeg concat
    concat=slides_dir/"slides.txt"
    with concat.open("w") as f:
        for img,d in zip(sorted(pngs),durations):
            f.write(f"file '{img}'\n"); f.write(f"duration {d}\n")
        f.write(f"file '{pngs[-1]}'\n")

    mp4=work/"output.mp4"
    subprocess.run([
        "ffmpeg","-y","-f","concat","-safe","0","-i",concat,
        "-i",wav,"-c:v","libx264","-pix_fmt","yuv420p",
        "-c:a","aac","-shortest",mp4], check=True)

    return str(mp4)