Spaces:

wahab5763
/

TvApp

Build error

App Files Files Community

wahab5763 commited on Apr 12, 2025

Commit

22ceee6

verified ·

1 Parent(s): 3ac66aa

Create app.py

Browse files

Files changed (1) hide show

app.py +222 -0

app.py ADDED Viewed

	@@ -0,0 +1,222 @@

+# app.py
+import os
+import re
+import gradio as gr
+import torch
+from torch import cuda
+from math import isclose
+import whisper
+from PyPDF2 import PdfReader
+from PIL import Image
+from diffusers import StableDiffusionPipeline
+from gtts import gTTS
+from moviepy.editor import (
+    ImageClip,
+    AudioFileClip,
+    TextClip,
+    CompositeVideoClip,
+    concatenate_videoclips
+)
+from moviepy.video.fx.all import resize
+######################################
+# 1) SETUP AND MODEL LOADING
+######################################
+# Check for GPU
+device = "cuda" if cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Load Stable Diffusion
+pipe = StableDiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-2",
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32
+)
+pipe.to(device)
+# (Optional) memory optimizations for low VRAM
+pipe.enable_attention_slicing()
+pipe.enable_sequential_cpu_offload()
+# Load Whisper (not actually used here for transcription, but included if needed)
+whisper_model = whisper.load_model("small")
+# Make output folders
+os.makedirs("images", exist_ok=True)
+os.makedirs("videos", exist_ok=True)
+######################################
+# 2) CORE PDF-TO-VIDEO FUNCTION
+######################################
+def unify_text_no_newlines(text):
+    """Replace any sequence of whitespace/newlines with a single space."""
+    return re.sub(r"\s+", " ", text).strip()
+def split_into_sentences(text):
+    """Split text into sentences by period. Adjust to your needs."""
+    parts = re.split(r'\.\s*', text)
+    # Clean them up
+    sentences = [p.strip() for p in parts if p.strip()]
+    return sentences
+def repeating_zoom(t, base=1.0, amplitude=0.1, period=4.0):
+    """
+    Continuously zoom in/out in a triangular wave:
+      - base=1.0 => no zoom at the center
+      - amplitude=0.1 => up to 1.1, down to 1.0, etc.
+      - period=4s => every 4s completes one in/out cycle
+    """
+    cp = (t % period) / period
+    if cp < 0.5:
+        # 0..0.5 => scale from base..(base+amplitude)
+        up = cp / 0.5  # in [0..1]
+        scale = base + amplitude * up
+    else:
+        # 0.5..1 => scale from (base+amplitude)..base
+        down = 1 - ((cp - 0.5) / 0.5)
+        scale = base + amplitude * down
+    return max(0.01, scale)
+def add_subtitles(video_clip, text, duration):
+    """Overlay word-by-word subtitles at the bottom."""
+    words = text.split()
+    if not words:
+        return video_clip
+    word_duration = duration / len(words)
+    subclips = []
+    for i, w in enumerate(words):
+        start_t = i * word_duration
+        txt_clip = (
+            TextClip(
+                w, fontsize=36, color='white',
+                font='Arial', bg_color='black', method='caption'
+            )
+            .set_start(start_t)
+            .set_duration(word_duration)
+            .set_position(("center", "bottom"))
+        )
+        subclips.append(txt_clip)
+    final = CompositeVideoClip([video_clip, *subclips])
+    return final.set_duration(duration)
+def process_pdf_to_video(pdf_file_path):
+    """
+    1) Extract text from PDF (remove newlines).
+    2) Split into sentences.
+    3) For each sentence, generate image, TTS, clip.
+    4) Concatenate final video.
+    5) Return final MP4 path.
+    """
+    # 1) Extract text
+    reader = PdfReader(pdf_file_path)
+    raw_text = []
+    for page in reader.pages:
+        page_text = page.extract_text() or ""
+        raw_text.append(page_text)
+    text = unify_text_no_newlines(" ".join(raw_text))
+    # 2) Split sentences
+    sentences = split_into_sentences(text)
+    if not sentences:
+        raise ValueError("No text found in PDF.")
+    # Basic Ghibli prompt
+    base_prompt = "Ghibli-style art, soft lighting, whimsical characters, serene environment"
+    clips = []
+    # 3) Generate data for each sentence
+    for idx, sentence in enumerate(sentences):
+        if not sentence:
+            continue
+        # Prompt for Stable Diffusion
+        prompt = f"{base_prompt}, {sentence}"
+        # Generate image
+        image = pipe(
+            prompt=prompt,
+            num_inference_steps=20
+        ).images[0]
+        img_path = f"images/clip_{idx+1}.png"
+        image.save(img_path)
+        # TTS
+        audio_path = f"videos/tts_{idx+1}.mp3"
+        tts = gTTS(sentence, lang='en')
+        tts.save(audio_path)
+        # Create Clip
+        audio_clip = AudioFileClip(audio_path)
+        duration = audio_clip.duration
+        if duration < 0.1:
+            continue
+        img_clip = ImageClip(img_path).set_duration(duration)
+        # Apply indefinite zoom in/out
+        zoom_clip = img_clip.fx(
+            resize,
+            lambda t: repeating_zoom(t, base=1.0, amplitude=0.1, period=4.0)
+        ).set_audio(audio_clip)
+        # Add subtitles
+        final_clip = add_subtitles(zoom_clip, sentence, duration)
+        clips.append(final_clip)
+    # 4) Concatenate all
+    if not clips:
+        raise ValueError("No valid clips generated.")
+    combined = concatenate_videoclips(clips, method="compose")
+    # Resize to 1280x720
+    combined_16_9 = combined.resize((1280, 720))
+    # 5) Write out final MP4
+    final_path = "videos/final_video.mp4"
+    combined_16_9.write_videofile(final_path, fps=24, codec="libx264")
+    return final_path
+######################################
+# 3) GRADIO INTERFACE
+######################################
+def generate_video_from_pdf(pdf_file):
+    """
+    This is the function called by Gradio.
+    pdf_file is a Gradio 'tempfile' object with .name referencing local path.
+    """
+    if not pdf_file:
+        return "No PDF uploaded."
+    try:
+        final_video_path = process_pdf_to_video(pdf_file.name)
+        return final_video_path  # Gradio can display as a video if we return the path
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Build the Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# PDF to Ghibli-Style Video")
+    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+    generate_btn = gr.Button("Generate Video")
+    video_output = gr.Video(label="Output Video")
+    # When button is clicked, call generate_video_from_pdf
+    generate_btn.click(
+        fn=generate_video_from_pdf,
+        inputs=pdf_input,
+        outputs=video_output
+    )
+# Launch the Gradio app
+def start_app():
+    # Note: On Hugging Face Spaces, you typically do 'demo.launch()'
+    # without blocking the main thread.
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+if __name__ == "__main__":
+    start_app()