mariapar commited on
Commit
b245107
Β·
verified Β·
1 Parent(s): c45af57

Update app_core.py

Browse files
Files changed (1) hide show
  1. app_core.py +254 -117
app_core.py CHANGED
@@ -1,21 +1,26 @@
1
- # app_core.py ────────────────────────────────────────────────────────────
2
  """
3
- ΠžΠ΄Π½ΠΎΡ„Π°ΠΉΠ»ΠΎΠ²Ρ‹ΠΉ ΠΊΠΎΠ½Π²Π΅ΠΉΠ΅Ρ€:
4
- raw text β†’ outline β†’ HTML+PNG β†’ OpenAI-TTS β†’ MP4
5
 
6
- Π’ΠΠ–ΠΠž: PROMPT_JSON ΠΈ DETAILED_PROMPT оставлСны 1-Π²-1
 
 
 
 
 
7
  """
8
 
9
  # ─ стандартныС ─
10
- import os, json, textwrap, subprocess, tempfile, shutil
11
  from pathlib import Path
12
  from datetime import datetime
13
 
14
- # ─ third-party ─
15
- import openai
16
  from openai import OpenAI
17
  from pydub import AudioSegment
18
  from playwright.sync_api import sync_playwright
 
19
 
20
  # ─────────────────────────────────────────────────────────────
21
  # 0. Playwright Π±Ρ€Π°ΡƒΠ·Π΅Ρ€ (устанавливаСм 1 Ρ€Π°Π· Π±Π΅Π· sudo)
@@ -26,10 +31,10 @@ if not _pw_flag.exists():
26
  _pw_flag.touch()
27
 
28
  # ─────────────────────────────────────────────────────────────
29
- # 1. П Π  О М П Π’ Π« (Ρ‚ΠΎΡ‡Π½ΠΎ ΠΊΠ°ΠΊ Π² вашСм Ρ„Π°ΠΉΠ»Π΅)
30
  # ─────────────────────────────────────────────────────────────
31
  PROMPT_JSON = textwrap.dedent("""
32
- You are a presentation-outliner.
33
  The user needs VALID json only β€” no extra commentary. (json!)
34
 
35
  ✦ Rules
@@ -41,13 +46,13 @@ PROMPT_JSON = textwrap.dedent("""
41
  (body may stay empty)
42
 
43
  2. Prefer **"list"** whenever possible.
44
- β€’ Break sentences into concise bullet-points.
45
- β€’ Use "text" only when the content truly cannot be listed.
46
- Allowed types:
47
- "list" – array, ≀ 5 items ← default
48
- "text" – short paragraph
49
- "quote" – short quotation or bold statement
50
- "code" – code block, copy verbatim from ``` fences
51
 
52
  3. Preserve every ``` … ``` code block unchanged.
53
 
@@ -60,10 +65,11 @@ PROMPT_JSON = textwrap.dedent("""
60
  }
61
 
62
  Output the json only.
 
63
  """).strip()
64
 
65
  DETAILED_PROMPT = textwrap.dedent("""
66
- You are a friendly, motivational voice-over writer.
67
  The user needs VALID json only β€” no extra commentary. (json!)
68
 
69
  Source:
@@ -71,7 +77,7 @@ DETAILED_PROMPT = textwrap.dedent("""
71
  β€’ "slides" β€” list of slide dictionaries (title, type, body)
72
 
73
  Task for EACH slide in order:
74
- β€’ Write **at least two sentences** (β‰ˆ 25–60 words total).
75
  β€’ Use the slide’s visible content **and** extra context from raw_text.
76
  β€’ Keep a welcoming tone: encourage, explain, or add a useful tip.
77
  β€’ Mention code or quote briefly (β€œIn this code snippet you’ll see …”).
@@ -83,115 +89,246 @@ DETAILED_PROMPT = textwrap.dedent("""
83
  """).strip()
84
 
85
  # ─────────────────────────────────────────────────────────────
86
- # 2. Π’ΡΠΏΠΎΠΌΠΎΠ³Π°Ρ‚Π΅Π»ΡŒΠ½Ρ‹Π΅ Ρ„ΡƒΠ½ΠΊΡ†ΠΈΠΈ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  # ─────────────────────────────────────────────────────────────
88
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
89
 
90
- def text_to_outline(raw: str) -> list:
91
- rsp = client.chat.completions.create(
92
- model="gpt-4o",
 
 
93
  temperature=0.3,
94
- response_format={"type":"json_object"},
95
- messages=[{"role":"system","content":PROMPT_JSON},
96
- {"role":"user", "content":raw}]
 
 
 
97
  )
98
- return json.loads(rsp.choices[0].message.content)["slides"]
 
 
99
 
100
  def build_slide_html(slide: dict) -> str:
101
- import html as _h
102
- title = _h.escape(slide["title"]); t=slide["type"]; body=slide["body"]
103
-
104
- if t=="title":
105
- content=f"<h1>{title}</h1>"
106
- elif t=="list":
107
- items="\n".join(f"<li>{_h.escape(str(i))}</li>" for i in body)
108
- content=f"<h1>{title}</h1><ul>{items}</ul>"
109
- elif t=="quote":
110
- content=f"<blockquote>β€œ{_h.escape(str(body))}”</blockquote>"
111
- elif t=="code":
112
- code=_h.escape(str(body).strip().lstrip("`").rstrip("`"))
113
- content=f"<h1>{title}</h1><pre><code>{code}</code></pre>"
114
- else:
115
- content=f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"
116
-
117
- HTML_BASE = """
118
- <!DOCTYPE html><html><head><meta charset="utf-8">
119
- <link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
120
- <style>
121
- body{{margin:0;width:1280px;height:720px;display:flex;flex-direction:column;
122
- justify-content:center;align-items:center;font-family:'PT Root UI',Arial,sans-serif;
123
- background:#fff;color:#000}}
124
- h1{{font-size:60px;margin:0 0 40px;color:#6C63FF}}
125
- p{{font-size:36px;margin:0}}
126
- ul{{font-size:34px;margin:0;padding-left:40px;text-align:left}}
127
- li{{margin:12px 0}}
128
- blockquote{{font-size:40px;font-style:italic;margin:0;border-left:6px solid #6C63FF;padding-left:24px}}
129
- pre{{font-size:28px;margin:0;padding:24px;background:#f5f5f5;border-radius:10px;overflow-x:auto}}
130
- </style></head><body><div style="max-width:1000px;text-align:center">{content}</div></body></html>
131
- """
132
- return HTML_BASE.format(content=content)
133
-
134
- def make_narration(raw: str, slides: list) -> list:
135
- rsp = client.chat.completions.create(
136
- model="gpt-4o",
 
 
 
 
 
 
 
 
 
137
  temperature=0.8,
138
- response_format={"type":"json_object"},
139
- messages=[{"role":"system","content":DETAILED_PROMPT},
140
- {"role":"user", "content":json.dumps({"raw_text":raw,"slides":slides},ensure_ascii=False)}]
 
 
 
 
 
 
141
  )
142
- return json.loads(rsp.choices[0].message.content)["narration"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  # ─────────────────────────────────────────────────────────────
145
- # 3. Главная ЀУНКЦИЯ для Gradio
146
  # ─────────────────────────────────────────────────────────────
147
- def generate_video(text: str) -> str:
148
- """Π’ΠΎΠ·Π²Ρ€Π°Ρ‰Π°Π΅Ρ‚ Π°Π±ΡΠΎΠ»ΡŽΡ‚Π½Ρ‹ΠΉ ΠΏΡƒΡ‚ΡŒ ΠΊ output.mp4"""
149
- work = Path(tempfile.mkdtemp(prefix="ppt2vid_"))
150
- slides_dir = work/"slides"; audio_dir=work/"audio"
151
- slides_dir.mkdir(); audio_dir.mkdir()
152
 
153
- # 1 outline
154
- slides = text_to_outline(text)
 
 
 
 
155
 
156
- # 2 HTML + PNG
157
- htmls=[]
158
- for s in slides:
159
- h=slides_dir/f"slide_{s['slide_idx']:03}.html"
160
- h.write_text(build_slide_html(s),encoding="utf-8"); htmls.append(h)
161
- with sync_playwright() as p:
162
- browser=p.chromium.launch()
163
- page=browser.new_page(viewport={"width":1280,"height":720})
164
- pngs=[]
165
- for h in htmls:
166
- page.goto(h.as_uri())
167
- img=h.with_suffix(".png")
168
- page.screenshot(path=img); pngs.append(img)
169
- browser.close()
170
- durations=[5.0]*len(pngs) # фикс 5 сСк Π½Π° слайд (Π±Π΅Π· ускорСния)
171
-
172
- # 3 TTS
173
- narr=make_narration(text,slides)
174
- wavs=[]
175
- for n in narr:
176
- wav=audio_dir/f"slide_{n['slide_idx']:03}.wav"
177
- speech=client.audio.speech.create(
178
- model="tts-1",voice="alloy",
179
- input=n["voice_text"],response_format="wav")
180
- speech.stream_to_file(wav); wavs.append(wav)
181
-
182
- mix=AudioSegment.empty()
183
- for w in sorted(wavs): mix+=AudioSegment.from_file(w)
184
- wav=audio_dir/"narration.wav"; mix.export(wav,"wav")
185
-
186
- # 4 slides.txt + ffmpeg
187
- concat=slides_dir/"slides.txt"
188
- with concat.open("w") as f:
189
- for img,d in zip(sorted(pngs),durations):
190
- f.write(f"file '{img}'\n"); f.write(f"duration {d}\n")
191
- f.write(f"file '{pngs[-1]}'\n")
192
-
193
- mp4=work/"output.mp4"
194
- subprocess.run(["ffmpeg","-y","-f","concat","-safe","0","-i",concat,
195
- "-i",wav,"-c:v","libx264","-pix_fmt","yuv420p",
196
- "-c:a","aac","-shortest",mp4],check=True)
197
- return str(mp4)
 
 
1
  """
2
+ ΠžΠ΄Π½ΠΎΡ„Π°ΠΉΠ»ΠΎΠ²Ρ‹ΠΉ ΠΊΠΎΠ½Π²Π΅ΠΉΠ΅Ρ€ для Gradio:
3
+ raw text β†’ outline β†’ HTML+PNG β†’ OpenAI‑TTS β†’ MP4
4
 
5
+ Π’ΠΠ–ΠΠž: PROMPT_JSON ΠΈ DETAILED_PROMPT оставлСны 1‑в‑1.
6
+
7
+ Запуск:
8
+ python app_core.py # Π»ΠΎΠΊΠ°Π»ΡŒΠ½Ρ‹ΠΉ запуск Gradio
9
+ # ΠΈΠ»ΠΈ
10
+ import app_core; app_core.main()
11
  """
12
 
13
  # ─ стандартныС ─
14
+ import os, json, textwrap, subprocess, tempfile, shutil, html, asyncio
15
  from pathlib import Path
16
  from datetime import datetime
17
 
18
+ # ─ third‑party ─
19
+ import openai # β‰₯β€―1.33.0
20
  from openai import OpenAI
21
  from pydub import AudioSegment
22
  from playwright.sync_api import sync_playwright
23
+ import gradio as gr
24
 
25
  # ─────────────────────────────────────────────────────────────
26
  # 0. Playwright Π±Ρ€Π°ΡƒΠ·Π΅Ρ€ (устанавливаСм 1 Ρ€Π°Π· Π±Π΅Π· sudo)
 
31
  _pw_flag.touch()
32
 
33
  # ─────────────────────────────────────────────────────────────
34
+ # 1. System prompts (оставлСны Π±Π΅Π· ΠΈΠ·ΠΌΠ΅Π½Π΅Π½ΠΈΠΉ)
35
  # ─────────────────────────────────────────────────────────────
36
  PROMPT_JSON = textwrap.dedent("""
37
+ You are a presentation‑outliner.
38
  The user needs VALID json only β€” no extra commentary. (json!)
39
 
40
  ✦ Rules
 
46
  (body may stay empty)
47
 
48
  2. Prefer **"list"** whenever possible.
49
+ + β€’ Break sentences into concise bullet‑points.
50
+ + β€’ Use "text" only when the content truly cannot be listed.
51
+ + Allowed types:
52
+ + "list" – array, ≀ 5 items ← _default choice_
53
+ + "text" – short paragraph
54
+ + "quote" – short quotation or bold statement
55
+ + "code" – code block, copy verbatim from ``` fences
56
 
57
  3. Preserve every ``` … ``` code block unchanged.
58
 
 
65
  }
66
 
67
  Output the json only.
68
+
69
  """).strip()
70
 
71
  DETAILED_PROMPT = textwrap.dedent("""
72
+ You are a friendly, motivational voice‑over writer.
73
  The user needs VALID json only β€” no extra commentary. (json!)
74
 
75
  Source:
 
77
  β€’ "slides" β€” list of slide dictionaries (title, type, body)
78
 
79
  Task for EACH slide in order:
80
+ β€’ Write **at least two sentences** (β‰ˆβ€―25–60 words total).
81
  β€’ Use the slide’s visible content **and** extra context from raw_text.
82
  β€’ Keep a welcoming tone: encourage, explain, or add a useful tip.
83
  β€’ Mention code or quote briefly (β€œIn this code snippet you’ll see …”).
 
89
  """).strip()
90
 
91
  # ─────────────────────────────────────────────────────────────
92
+ # 2. HTML/CSS шаблон слайдов (бСз измСнСний)
93
+ # ─────────────────────────────────────────────────────────────
94
+ HTML_BASE = """
95
+ <!DOCTYPE html>
96
+ <html>
97
+ <head>
98
+ <meta charset=\"utf-8\">
99
+ <title>{title}</title>
100
+
101
+ <!-- Hyperskill brand‑like styling -->
102
+ <link href=\"https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap\" rel=\"stylesheet\">
103
+
104
+ <style>
105
+ :root {{
106
+ --grad-from:#4BFFDF;
107
+ --grad-to:#7AB7FE;
108
+ --accent:#6C63FF;
109
+ --text-dark:#000;
110
+ --bg-light:#fff;
111
+ }}
112
+
113
+ body {{
114
+ margin:0;
115
+ width:1280px;height:720px;
116
+ display:flex;flex-direction:column;
117
+ justify-content:center;align-items:center;
118
+ font-family:'PT Root UI',Arial,sans-serif;
119
+ color:var(--text-dark);
120
+ background:var(--bg-light);
121
+ }}
122
+
123
+ .wrap {{max-width:1000px;text-align:center;padding:0 40px;}}
124
+
125
+ h1 {{
126
+ font-size:60px;font-weight:700;margin:0 0 40px;
127
+ color:var(--accent);
128
+ }}
129
+
130
+ p {{font-size:36px;margin:0;}}
131
+
132
+ ul {{
133
+ font-size:34px;text-align:left;margin:0 auto;padding-left:40px;
134
+ }}
135
+ li {{margin:12px 0;}}
136
+
137
+ blockquote {{
138
+ font-size:40px;font-style:italic;margin:0;
139
+ border-left:6px solid var(--accent);padding-left:24px;
140
+ }}
141
+
142
+ pre {{
143
+ font-size:28px;line-height:1.35;margin:0;padding:24px;
144
+ background:#f5f5f5;border-radius:10px;text-align:left;overflow-x:auto;
145
+ }}
146
+ </style>
147
+ </head>
148
+ <body>
149
+ <div class=\"wrap\">
150
+ {content}
151
+ </div>
152
+ </body>
153
+ </html>
154
+ """
155
+
156
+ # ─────────────────────────────────────────────────────────────
157
+ # 3. Π€ΡƒΠ½ΠΊΡ†ΠΈΠΈ ΠΊΠΎΠ½Π²Π΅ΠΉΠ΅Ρ€Π°
158
  # ─────────────────────────────────────────────────────────────
159
+ client = OpenAI() # ΠΊΠ»ΡŽΡ‡ бСрётся ΠΈΠ· OPENAI_API_KEY
160
 
161
+
162
+ def text_to_outline(raw_text: str, model: str = "gpt-4o") -> list:
163
+ """GPT β†’ список словарСй слайдов"""
164
+ resp = client.chat.completions.create(
165
+ model=model,
166
  temperature=0.3,
167
+ response_format={"type": "json_object"},
168
+ messages=[
169
+ {"role": "system", "content": PROMPT_JSON},
170
+ {"role": "user", "content": raw_text}
171
+ ],
172
+ max_tokens=2048,
173
  )
174
+ slides = json.loads(resp.choices[0].message.content)["slides"]
175
+ return slides
176
+
177
 
178
  def build_slide_html(slide: dict) -> str:
179
+ t, body = slide["type"], slide["body"]
180
+ title = html.escape(slide["title"])
181
+
182
+ if t == "title":
183
+ content = f"<h1>{title}</h1>"
184
+ elif t == "list":
185
+ items = "\n".join(f"<li>{html.escape(str(it))}</li>" for it in body)
186
+ content = f"<h1>{title}</h1><ul>{items}</ul>"
187
+ elif t == "quote":
188
+ content = f"<blockquote>β€œ{html.escape(str(body))}”</blockquote>"
189
+ elif t == "code":
190
+ code = html.escape(str(body).strip().lstrip("`").rstrip("`"))
191
+ content = f"<h1>{title}</h1><pre><code>{code}</code></pre>"
192
+ else: # text
193
+ content = f"<h1>{title}</h1><p>{html.escape(str(body))}</p>"
194
+
195
+ return HTML_BASE.format(title=title, content=content)
196
+
197
+
198
+ def save_html(slides: list, slides_dir: Path) -> list:
199
+ html_paths = []
200
+ for s in slides:
201
+ f = slides_dir / f"slide_{s['slide_idx']:03}.html"
202
+ f.write_text(build_slide_html(s), encoding="utf-8")
203
+ html_paths.append(f)
204
+ return html_paths
205
+
206
+
207
+ def html_to_png(html_paths: list):
208
+ png_paths = []
209
+ with sync_playwright() as p:
210
+ browser = p.chromium.launch()
211
+ page = browser.new_page(viewport={"width":1280, "height":720})
212
+ for f in html_paths:
213
+ page.goto(f.as_uri())
214
+ png_path = f.with_suffix(".png")
215
+ page.screenshot(path=png_path)
216
+ png_paths.append(png_path)
217
+ browser.close()
218
+ return png_paths
219
+
220
+
221
+ def generate_narration(raw_text: str, slides: list, model: str = "gpt-4o") -> list:
222
+ resp = client.chat.completions.create(
223
+ model=model,
224
  temperature=0.8,
225
+ response_format={"type": "json_object"},
226
+ messages=[
227
+ {"role": "system", "content": DETAILED_PROMPT},
228
+ {"role": "user", "content": json.dumps({
229
+ "raw_text": raw_text,
230
+ "slides": slides
231
+ }, ensure_ascii=False)}
232
+ ],
233
+ max_tokens=2048,
234
  )
235
+ return json.loads(resp.choices[0].message.content)["narration"]
236
+
237
+
238
+ def tts_narration(narration_list: list, audio_dir: Path):
239
+ audio_dir.mkdir(exist_ok=True)
240
+ wav_paths, durations = [], []
241
+
242
+ for item in narration_list:
243
+ idx, text = item["slide_idx"], item["voice_text"]
244
+ speech = client.audio.speech.create(
245
+ model="tts-1",
246
+ voice="alloy",
247
+ input=text,
248
+ response_format="wav"
249
+ )
250
+ wav_path = audio_dir / f"slide_{idx:03}.wav"
251
+ speech.stream_to_file(wav_path)
252
+ wav_paths.append(wav_path)
253
+
254
+ snd = AudioSegment.from_file(wav_path)
255
+ durations.append(round(snd.duration_seconds, 2))
256
+
257
+ # glue together
258
+ combined = AudioSegment.empty()
259
+ for w in sorted(wav_paths):
260
+ combined += AudioSegment.from_file(w)
261
+
262
+ final_wav = audio_dir / "narration.wav"
263
+ combined.export(final_wav, format="wav")
264
+
265
+ return final_wav, durations
266
+
267
+
268
+ def assemble_video(slides_dir: Path, audio_path: Path, durations: list, output_mp4: Path):
269
+ png_files = sorted(slides_dir.glob("slide_*.png"))
270
+ if not png_files:
271
+ raise RuntimeError("PNG slides not found")
272
+
273
+ concat_file = slides_dir / "slides.txt"
274
+ with concat_file.open("w") as f:
275
+ for img, dur in zip(png_files, durations):
276
+ f.write(f"file '{img}'\n")
277
+ f.write(f"duration {dur}\n")
278
+ f.write(f"file '{png_files[-1]}'\n") # repeat last frame
279
+
280
+ ffmpeg_cmd = [
281
+ "ffmpeg", "-y",
282
+ "-f", "concat", "-safe", "0", "-i", str(concat_file),
283
+ "-i", str(audio_path),
284
+ "-c:v", "libx264", "-pix_fmt", "yuv420p",
285
+ "-c:a", "aac", "-shortest",
286
+ str(output_mp4)
287
+ ]
288
+ subprocess.run(ffmpeg_cmd, check=True)
289
+ return output_mp4
290
 
291
  # ─────────────────────────────────────────────────────────────
292
+ # 4. Главная функция ΠΊΠΎΠ½Π²Π΅ΠΉΠ΅Ρ€Π° (вызываСтся ΠΈΠ· Gradio)
293
  # ─────────────────────────────────────────────────────────────
 
 
 
 
 
294
 
295
+ def process_pipeline(raw_text: str) -> str:
296
+ """ΠŸΠΎΠ»Π½Ρ‹ΠΉ запуск: Π²ΠΎΠ·Π²Ρ€Π°Ρ‰Π°Π΅Ρ‚ ΠΏΡƒΡ‚ΡŒ ΠΊ MP4"""
297
+ run_dir = Path(tempfile.mkdtemp(prefix="ai_presentation_"))
298
+ slides_dir = run_dir / "slides"
299
+ audio_dir = run_dir / "audio"
300
+ slides_dir.mkdir()
301
 
302
+ # 1. GPT β†’ outline
303
+ slides = text_to_outline(raw_text)
304
+
305
+ # 2. outline β†’ HTML β†’ PNG
306
+ html_paths = save_html(slides, slides_dir)
307
+ html_to_png(html_paths)
308
+
309
+ # 3. GPT β†’ narration β†’ TTS
310
+ narration = generate_narration(raw_text, slides)
311
+ wav_path, durations = tts_narration(narration, audio_dir)
312
+
313
+ # 4. PNG + WAV β†’ MP4
314
+ output_mp4 = run_dir / "output.mp4"
315
+ assemble_video(slides_dir, wav_path, durations, output_mp4)
316
+
317
+ return str(output_mp4)
318
+
319
+ # ─────────────────────────────────────────────────────────────
320
+ # 5. Gradio UI
321
+ # ─────────────────────────────────────────────────────────────
322
+
323
+ def main():
324
+ iface = gr.Interface(
325
+ fn=process_pipeline,
326
+ inputs=gr.Textbox(lines=20, label="Raw article / script text"),
327
+ outputs=gr.File(label="Generated presentation (MP4)"),
328
+ title="AI Presentation Generator",
329
+ description="One‑click conversion of raw text into narrated video slides."
330
+ )
331
+ iface.launch()
332
+
333
+ if __name__ == "__main__":
334
+ main()