mariapar commited on
Commit
76160db
·
verified ·
1 Parent(s): 7129f76

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +12 -0
  2. app_core.py +189 -0
  3. packages.txt +1 -0
  4. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from app_core import generate_video
3
+
4
+ demo = gr.Interface(
5
+ fn=generate_video,
6
+ inputs=gr.Textbox(lines=14, label="Paste your lesson text here"),
7
+ outputs=gr.Video(label="Generated MP4"),
8
+ title="AI Presentation Generator"
9
+ )
10
+
11
+ if __name__ == "__main__":
12
+ demo.launch()
app_core.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app_core.py
3
+ -----------
4
+ Pure-Python module that turns raw text into an MP4:
5
+
6
+ text ➜ outline ➜ HTML+PNG ➜ TTS (OpenAI) ➜ MP4
7
+ """
8
+
9
+ # -------- standard libs --------
10
+ import os, json, subprocess, shutil, textwrap, tempfile
11
+ from pathlib import Path
12
+ from datetime import datetime
13
+
14
+ # -------- third-party --------
15
+ from openai import OpenAI # ≥ 1.33.0
16
+ from pydub import AudioSegment
17
+ from playwright.sync_api import sync_playwright # headless screenshots
18
+
19
+ # ------------------------------------------------------------------
20
+ # 0. GLOBAL CONFIG (edit if you wish)
21
+ # ------------------------------------------------------------------
22
+ MODEL_CHAT = "gpt-4o-mini" # outline + narration
23
+ MODEL_TTS = "tts-1" # OpenAI TTS voice
24
+ VOICE_ID = "alloy"
25
+ MAX_SLIDES = 10
26
+ AUDIO_SPEED = 1.5 # 1.0 = normal, 1.5 ≈ 50 % faster
27
+
28
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
29
+
30
+ # ------------------------------------------------------------------
31
+ # 1. OUTLINE PROMPT
32
+ # ------------------------------------------------------------------
33
+ PROMPT_JSON = textwrap.dedent(f"""
34
+ You are a presentation-outliner. (json!)
35
+
36
+ • Maximum slides: {MAX_SLIDES} (incl. title)
37
+ • First slide = "title".
38
+ • Prefer "list" slides whenever possible (≤ 5 bullets).
39
+ • Allowed types: "list" | "text" | "quote" | "code"
40
+ • Preserve ``` code ``` verbatim.
41
+
42
+ Return **only**
43
+ {{
44
+ "slides":[
45
+ {{ "slide_idx":N, "type":"…", "title":"…", "body":… }},
46
+
47
+ ]}
48
+ }}
49
+ """).strip()
50
+
51
+ def text_to_outline(raw_text: str) -> list:
52
+ rsp = client.chat.completions.create(
53
+ model = MODEL_CHAT,
54
+ temperature = 0.3,
55
+ response_format = {"type":"json_object"},
56
+ messages=[
57
+ {"role":"system","content":PROMPT_JSON},
58
+ {"role":"user", "content":raw_text}
59
+ ])
60
+ return json.loads(rsp.choices[0].message.content)["slides"]
61
+
62
+ # ------------------------------------------------------------------
63
+ # 2. HTML SLIDE TEMPLATE (white bg, brand colours)
64
+ # ------------------------------------------------------------------
65
+ HTML_BASE = """
66
+ <!DOCTYPE html><html><head><meta charset="utf-8">
67
+ <link href="https://fonts.googleapis.com/css2?family=PT+Root+UI:wght@400;700&display=swap" rel="stylesheet">
68
+ <style>
69
+ body{{margin:0;width:1280px;height:720px;display:flex;flex-direction:column;
70
+ justify-content:center;align-items:center;font-family:'PT Root UI',Arial,sans-serif;
71
+ background:#fff;color:#000}}
72
+ h1{{font-size:60px;margin:0 0 40px;color:#6C63FF}}
73
+ p{{font-size:36px;margin:0}}
74
+ ul{{font-size:34px;margin:0;padding-left:40px;text-align:left}}
75
+ li{{margin:12px 0}}
76
+ blockquote{{font-size:40px;font-style:italic;margin:0;border-left:6px solid #6C63FF;padding-left:24px}}
77
+ pre{{font-size:28px;margin:0;padding:24px;background:#f5f5f5;border-radius:10px;overflow-x:auto}}
78
+ </style></head><body><div style="max-width:1000px;text-align:center">{content}</div></body></html>
79
+ """
80
+
81
+ import html as _h
82
+ def slide_to_html(slide: dict) -> str:
83
+ t, body = slide["type"], slide["body"]; title=_h.escape(slide["title"])
84
+ if t=="title":
85
+ c=f"<h1>{title}</h1>"
86
+ elif t=="list":
87
+ items="\n".join(f"<li>{_h.escape(str(i))}</li>" for i in body)
88
+ c=f"<h1>{title}</h1><ul>{items}</ul>"
89
+ elif t=="quote":
90
+ c=f"<blockquote>“{_h.escape(str(body))}”</blockquote>"
91
+ elif t=="code":
92
+ code=_h.escape(str(body).strip().lstrip("`").rstrip("`"))
93
+ c=f"<h1>{title}</h1><pre><code>{code}</code></pre>"
94
+ else:
95
+ c=f"<h1>{title}</h1><p>{_h.escape(str(body))}</p>"
96
+ return HTML_BASE.format(content=c)
97
+
98
+ # ------------------------------------------------------------------
99
+ # 3. NARRATION PROMPT
100
+ # ------------------------------------------------------------------
101
+ PROMPT_NARR = textwrap.dedent("""
102
+ You are a friendly narrator. (json!)
103
+
104
+ For every slide write 2-3 warm sentences (25–60 words) that match its content.
105
+ Return:
106
+ { "narration":[ { "slide_idx":N, "voice_text":"..." }, … ] }
107
+ """).strip()
108
+
109
+ def narration(raw_text:str, slides:list) -> list:
110
+ rsp = client.chat.completions.create(
111
+ model = MODEL_CHAT,
112
+ temperature = 0.7,
113
+ response_format={"type":"json_object"},
114
+ messages=[
115
+ {"role":"system","content":PROMPT_NARR},
116
+ {"role":"user", "content":json.dumps({"raw_text":raw_text,"slides":slides},ensure_ascii=False)}
117
+ ])
118
+ return json.loads(rsp.choices[0].message.content)["narration"]
119
+
120
+ # ------------------------------------------------------------------
121
+ # 4. MAIN PIPELINE
122
+ # ------------------------------------------------------------------
123
+ def generate_video(text: str) -> str:
124
+ """
125
+ Entry-point for Gradio:
126
+ returns absolute path to MP4 in a temp directory.
127
+ """
128
+ # temp workdir
129
+ tmp_root = Path(tempfile.mkdtemp(prefix="ai_vid_"))
130
+ slides_dir = tmp_root/"slides"; audio_dir = tmp_root/"audio"
131
+ slides_dir.mkdir(); audio_dir.mkdir()
132
+
133
+ # 1. outline
134
+ slides = text_to_outline(text)
135
+
136
+ # 2. build HTML
137
+ html_paths=[]
138
+ for s in slides:
139
+ p=slides_dir/f"slide_{s['slide_idx']:03}.html"
140
+ p.write_text(slide_to_html(s),encoding="utf-8"); html_paths.append(p)
141
+
142
+ # 2b. HTML→PNG screenshots
143
+ with sync_playwright() as p:
144
+ br=p.chromium.launch()
145
+ page=br.new_page(viewport={"width":1280,"height":720})
146
+ png_paths=[]; durations=[]
147
+ for html in html_paths:
148
+ page.goto(html.as_uri()); png=html.with_suffix(".png")
149
+ page.screenshot(path=png); png_paths.append(png)
150
+ br.close()
151
+ # rough equal timing: 5 s per slide
152
+ durations=[5.0]*len(png_paths)
153
+
154
+ # 3. TTS
155
+ narr = narration(text,slides)
156
+ wav_paths=[]
157
+ for item in narr:
158
+ speech = client.audio.speech.create(
159
+ model=MODEL_TTS, voice=VOICE_ID,
160
+ input=item["voice_text"], response_format="wav")
161
+ w=audio_dir/f"slide_{item['slide_idx']:03}.wav"
162
+ speech.stream_to_file(w); wav_paths.append(w)
163
+
164
+ combined=AudioSegment.empty()
165
+ for w in sorted(wav_paths): combined+=AudioSegment.from_file(w)
166
+ wav=audio_dir/"narration.wav"; combined.export(wav,"wav")
167
+
168
+ # 3b. speed-up
169
+ if AUDIO_SPEED!=1.0:
170
+ fast=audio_dir/"_fast.wav"
171
+ subprocess.run(["ffmpeg","-y","-i",wav,"-filter:a",
172
+ f"atempo={AUDIO_SPEED}",fast],check=True)
173
+ shutil.move(fast,wav)
174
+ durations=[round(d/AUDIO_SPEED,2) for d in durations]
175
+
176
+ # 4. slides.txt for ffmpeg
177
+ concat=slides_dir/"slides.txt"
178
+ with concat.open("w") as f:
179
+ for img,d in zip(png_paths,durations):
180
+ f.write(f"file '{img}'\n"); f.write(f"duration {d}\n")
181
+ f.write(f"file '{png_paths[-1]}'\n")
182
+
183
+ output = tmp_root/"output.mp4"
184
+ subprocess.run([
185
+ "ffmpeg","-y","-f","concat","-safe","0","-i",concat,
186
+ "-i",wav,"-c:v","libx264","-pix_fmt","yuv420p",
187
+ "-c:a","aac","-shortest",output],check=True)
188
+
189
+ return str(output)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openai>=1.33.0
2
+ playwright
3
+ pydub
4
+ gradio