wahab5763 commited on
Commit
22ceee6
·
verified ·
1 Parent(s): 3ac66aa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -0
app.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import os
4
+ import re
5
+ import gradio as gr
6
+ import torch
7
+ from torch import cuda
8
+ from math import isclose
9
+ import whisper
10
+ from PyPDF2 import PdfReader
11
+ from PIL import Image
12
+ from diffusers import StableDiffusionPipeline
13
+ from gtts import gTTS
14
+ from moviepy.editor import (
15
+ ImageClip,
16
+ AudioFileClip,
17
+ TextClip,
18
+ CompositeVideoClip,
19
+ concatenate_videoclips
20
+ )
21
+ from moviepy.video.fx.all import resize
22
+
23
+ ######################################
24
+ # 1) SETUP AND MODEL LOADING
25
+ ######################################
26
+
27
+ # Check for GPU
28
+ device = "cuda" if cuda.is_available() else "cpu"
29
+ print(f"Using device: {device}")
30
+
31
+ # Load Stable Diffusion
32
+ pipe = StableDiffusionPipeline.from_pretrained(
33
+ "stabilityai/stable-diffusion-2",
34
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32
35
+ )
36
+ pipe.to(device)
37
+
38
+ # (Optional) memory optimizations for low VRAM
39
+ pipe.enable_attention_slicing()
40
+ pipe.enable_sequential_cpu_offload()
41
+
42
+ # Load Whisper (not actually used here for transcription, but included if needed)
43
+ whisper_model = whisper.load_model("small")
44
+
45
+ # Make output folders
46
+ os.makedirs("images", exist_ok=True)
47
+ os.makedirs("videos", exist_ok=True)
48
+
49
+
50
+ ######################################
51
+ # 2) CORE PDF-TO-VIDEO FUNCTION
52
+ ######################################
53
+
54
+ def unify_text_no_newlines(text):
55
+ """Replace any sequence of whitespace/newlines with a single space."""
56
+ return re.sub(r"\s+", " ", text).strip()
57
+
58
+ def split_into_sentences(text):
59
+ """Split text into sentences by period. Adjust to your needs."""
60
+ parts = re.split(r'\.\s*', text)
61
+ # Clean them up
62
+ sentences = [p.strip() for p in parts if p.strip()]
63
+ return sentences
64
+
65
+ def repeating_zoom(t, base=1.0, amplitude=0.1, period=4.0):
66
+ """
67
+ Continuously zoom in/out in a triangular wave:
68
+ - base=1.0 => no zoom at the center
69
+ - amplitude=0.1 => up to 1.1, down to 1.0, etc.
70
+ - period=4s => every 4s completes one in/out cycle
71
+ """
72
+ cp = (t % period) / period
73
+ if cp < 0.5:
74
+ # 0..0.5 => scale from base..(base+amplitude)
75
+ up = cp / 0.5 # in [0..1]
76
+ scale = base + amplitude * up
77
+ else:
78
+ # 0.5..1 => scale from (base+amplitude)..base
79
+ down = 1 - ((cp - 0.5) / 0.5)
80
+ scale = base + amplitude * down
81
+ return max(0.01, scale)
82
+
83
+ def add_subtitles(video_clip, text, duration):
84
+ """Overlay word-by-word subtitles at the bottom."""
85
+ words = text.split()
86
+ if not words:
87
+ return video_clip
88
+
89
+ word_duration = duration / len(words)
90
+ subclips = []
91
+ for i, w in enumerate(words):
92
+ start_t = i * word_duration
93
+ txt_clip = (
94
+ TextClip(
95
+ w, fontsize=36, color='white',
96
+ font='Arial', bg_color='black', method='caption'
97
+ )
98
+ .set_start(start_t)
99
+ .set_duration(word_duration)
100
+ .set_position(("center", "bottom"))
101
+ )
102
+ subclips.append(txt_clip)
103
+ final = CompositeVideoClip([video_clip, *subclips])
104
+ return final.set_duration(duration)
105
+
106
+ def process_pdf_to_video(pdf_file_path):
107
+ """
108
+ 1) Extract text from PDF (remove newlines).
109
+ 2) Split into sentences.
110
+ 3) For each sentence, generate image, TTS, clip.
111
+ 4) Concatenate final video.
112
+ 5) Return final MP4 path.
113
+ """
114
+ # 1) Extract text
115
+ reader = PdfReader(pdf_file_path)
116
+ raw_text = []
117
+ for page in reader.pages:
118
+ page_text = page.extract_text() or ""
119
+ raw_text.append(page_text)
120
+ text = unify_text_no_newlines(" ".join(raw_text))
121
+
122
+ # 2) Split sentences
123
+ sentences = split_into_sentences(text)
124
+ if not sentences:
125
+ raise ValueError("No text found in PDF.")
126
+
127
+ # Basic Ghibli prompt
128
+ base_prompt = "Ghibli-style art, soft lighting, whimsical characters, serene environment"
129
+ clips = []
130
+
131
+ # 3) Generate data for each sentence
132
+ for idx, sentence in enumerate(sentences):
133
+ if not sentence:
134
+ continue
135
+
136
+ # Prompt for Stable Diffusion
137
+ prompt = f"{base_prompt}, {sentence}"
138
+ # Generate image
139
+ image = pipe(
140
+ prompt=prompt,
141
+ num_inference_steps=20
142
+ ).images[0]
143
+ img_path = f"images/clip_{idx+1}.png"
144
+ image.save(img_path)
145
+
146
+ # TTS
147
+ audio_path = f"videos/tts_{idx+1}.mp3"
148
+ tts = gTTS(sentence, lang='en')
149
+ tts.save(audio_path)
150
+
151
+ # Create Clip
152
+ audio_clip = AudioFileClip(audio_path)
153
+ duration = audio_clip.duration
154
+ if duration < 0.1:
155
+ continue
156
+
157
+ img_clip = ImageClip(img_path).set_duration(duration)
158
+
159
+ # Apply indefinite zoom in/out
160
+ zoom_clip = img_clip.fx(
161
+ resize,
162
+ lambda t: repeating_zoom(t, base=1.0, amplitude=0.1, period=4.0)
163
+ ).set_audio(audio_clip)
164
+
165
+ # Add subtitles
166
+ final_clip = add_subtitles(zoom_clip, sentence, duration)
167
+ clips.append(final_clip)
168
+
169
+ # 4) Concatenate all
170
+ if not clips:
171
+ raise ValueError("No valid clips generated.")
172
+
173
+ combined = concatenate_videoclips(clips, method="compose")
174
+ # Resize to 1280x720
175
+ combined_16_9 = combined.resize((1280, 720))
176
+
177
+ # 5) Write out final MP4
178
+ final_path = "videos/final_video.mp4"
179
+ combined_16_9.write_videofile(final_path, fps=24, codec="libx264")
180
+ return final_path
181
+
182
+
183
+ ######################################
184
+ # 3) GRADIO INTERFACE
185
+ ######################################
186
+
187
+ def generate_video_from_pdf(pdf_file):
188
+ """
189
+ This is the function called by Gradio.
190
+ pdf_file is a Gradio 'tempfile' object with .name referencing local path.
191
+ """
192
+ if not pdf_file:
193
+ return "No PDF uploaded."
194
+ try:
195
+ final_video_path = process_pdf_to_video(pdf_file.name)
196
+ return final_video_path # Gradio can display as a video if we return the path
197
+ except Exception as e:
198
+ return f"Error: {str(e)}"
199
+
200
+
201
+ # Build the Gradio UI
202
+ with gr.Blocks() as demo:
203
+ gr.Markdown("# PDF to Ghibli-Style Video")
204
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
205
+ generate_btn = gr.Button("Generate Video")
206
+ video_output = gr.Video(label="Output Video")
207
+
208
+ # When button is clicked, call generate_video_from_pdf
209
+ generate_btn.click(
210
+ fn=generate_video_from_pdf,
211
+ inputs=pdf_input,
212
+ outputs=video_output
213
+ )
214
+
215
+ # Launch the Gradio app
216
+ def start_app():
217
+ # Note: On Hugging Face Spaces, you typically do 'demo.launch()'
218
+ # without blocking the main thread.
219
+ demo.launch(server_name="0.0.0.0", server_port=7860)
220
+
221
+ if __name__ == "__main__":
222
+ start_app()