Unity / app.py
Yashu1064's picture
Create app.py
8da4163 verified
import gradio as gr
import torch
from diffusers import StableDiffusionPipeline
from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip
from moviepy.video.fx.all import resize
from PIL import Image, ImageDraw
import numpy as np
import os
from TTS.api import TTS
import random
device = "cpu"
# Load SDXL (lighter config for CPU)
pipe = StableDiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float32,
use_safetensors=True
).to(device)
# Load Coqui TTS
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
def generate_script(prompt):
hooks = [
"They laugh when you start.",
"They doubt your discipline.",
"They mock your silence.",
"But they fear your results."
]
random.shuffle(hooks)
return hooks
def generate_image(text, index):
prompt = f"dark cinematic, luxury wealth, night city, dramatic lighting, 9:16 portrait, {text}"
image = pipe(prompt, height=1024, width=576).images[0]
path = f"scene_{index}.png"
image.save(path)
return path
def add_subtitle(image_path, text):
img = Image.open(image_path)
draw = ImageDraw.Draw(img)
w, h = img.size
draw.text((40, h - 180), text, fill="white")
new_path = image_path.replace(".png", "_text.png")
img.save(new_path)
return new_path
def create_kenburns_clip(image_path, duration=5):
clip = ImageClip(image_path).set_duration(duration)
clip = clip.resize(height=1280)
clip = clip.fx(resize, lambda t: 1 + 0.05 * t) # Slow zoom
return clip
def generate_voice(script):
full_text = " ".join(script)
tts.tts_to_file(text=full_text, file_path="voice.wav")
return "voice.wav"
def create_video(image_paths, voice_path):
clips = []
for img in image_paths:
clip = create_kenburns_clip(img)
clips.append(clip)
final = concatenate_videoclips(clips, method="compose")
if os.path.exists("music.mp3"):
bg_music = AudioFileClip("music.mp3").volumex(0.3)
voice = AudioFileClip(voice_path)
final = final.set_audio(voice.audio_fadein(1))
else:
voice = AudioFileClip(voice_path)
final = final.set_audio(voice)
output_path = "final_dark_money.mp4"
final.write_videofile(output_path, fps=24)
return output_path
def generate_video(prompt):
scenes = generate_script(prompt)
images = []
for i, scene in enumerate(scenes):
img = generate_image(scene, i)
img_with_text = add_subtitle(img, scene)
images.append(img_with_text)
voice = generate_voice(scenes)
video = create_video(images, voice)
return video
iface = gr.Interface(
fn=generate_video,
inputs=gr.Textbox(label="Enter Dark Money Motivation Prompt"),
outputs=gr.Video(label="Generated Reel"),
title="🔥 Dark Money AI Shorts Generator PRO"
)
iface.launch()