import gradio as gr from gtts import gTTS from moviepy.editor import * from diffusers import StableDiffusionPipeline import torch import os import random # Ensure output folders exist os.makedirs("outputs/images", exist_ok=True) os.makedirs("outputs/audio", exist_ok=True) os.makedirs("outputs/videos", exist_ok=True) os.makedirs("bg_music", exist_ok=True) device = "cuda" if torch.cuda.is_available() else "cpu" sd_pipeline = StableDiffusionPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32 ).to(device) def get_next_index(folder, prefix, ext): files = [f for f in os.listdir(folder) if f.startswith(prefix) and f.endswith(ext)] if not files: return 1 nums = [int(f.split("_")[-1].replace(ext, "")) for f in files if f.split("_")[-1].replace(ext, "").isdigit()] return max(nums, default=0) + 1 def pick_random_music(): files = [f"bg_music/{f}" for f in os.listdir("bg_music") if f.endswith(".mp3")] if not files: return None, None choice = random.choice(files) return choice, os.path.basename(choice) # ------------------------------- # Main Generator # ------------------------------- def generate_visual_affirmation(text, prompt, bg_music=None): img_index = get_next_index("outputs/images", "bg", ".png") voice_index = get_next_index("outputs/audio", "voice", ".mp3") video_index = get_next_index("outputs/videos", "affirmation", ".mp4") img_path = f"outputs/images/bg_{img_index}.png" voice_path = f"outputs/audio/voice_{voice_index}.mp3" out_path = f"outputs/videos/affirmation_{video_index}.mp4" # Generate Image image = sd_pipeline(prompt).images[0] image.save(img_path) # Generate Voice tts = gTTS(text=text, lang="en") tts.save(voice_path) # Pick Music chosen_music = None chosen_name = "No background music" if bg_music: chosen_music = bg_music chosen_name = os.path.basename(bg_music) else: chosen_music, chosen_name = pick_random_music() # Build Video duration = 10 txt_clip = TextClip(text, fontsize=50, color="white", size=(720, 480), method="caption").set_duration(duration).set_position("center") img_clip = ImageClip(img_path).set_duration(duration).resize((720, 480)) audio = AudioFileClip(voice_path) final_audio = audio if chosen_music: bg_audio = AudioFileClip(chosen_music).volumex(0.3) final_audio = CompositeAudioClip([audio, bg_audio]) video = CompositeVideoClip([img_clip, txt_clip.set_audio(final_audio)], size=(720, 480)) video.write_videofile(out_path, fps=24, codec="libx264", audio_codec="aac") # Return + store for re-roll return out_path, out_path, voice_path, img_path, f"🎶 Background Music: {chosen_name}", text, prompt, voice_path, img_path # ------------------------------- # Re-roll Background Music # ------------------------------- def reroll_music(text, prompt, voice_path, img_path): video_index = get_next_index("outputs/videos", "affirmation", ".mp4") out_path = f"outputs/videos/affirmation_{video_index}.mp4" # Pick new music chosen_music, chosen_name = pick_random_music() # Build new video duration = 10 txt_clip = TextClip(text, fontsize=50, color="white", size=(720, 480), method="caption").set_duration(duration).set_position("center") img_clip = ImageClip(img_path).set_duration(duration).resize((720, 480)) audio = AudioFileClip(voice_path) final_audio = audio if chosen_music: bg_audio = AudioFileClip(chosen_music).volumex(0.3) final_audio = CompositeAudioClip([audio, bg_audio]) video = CompositeVideoClip([img_clip, txt_clip.set_audio(final_audio)], size=(720, 480)) video.write_videofile(out_path, fps=24, codec="libx264", audio_codec="aac") return out_path, out_path, f"🎶 Background Music: {chosen_name}" # ------------------------------- # Gradio UI # ------------------------------- with gr.Blocks() as demo: gr.Markdown("## 🎨 Visual Affirmations Generator with Re-roll Music") with gr.Row(): with gr.Column(): text_in = gr.Textbox(label="Your Affirmation") prompt_in = gr.Textbox(label="Visual Prompt (e.g. sunset over ocean)") music_in = gr.Audio(label="Optional Background Music (leave empty to auto-pick)", type="filepath") btn = gr.Button("Generate Visual Video") reroll_btn = gr.Button("Re-roll Music 🎶") with gr.Column(): out_video = gr.Video(label="Generated Video") download_video = gr.File(label="Download Video") download_audio = gr.File(label="Download Audio") download_image = gr.File(label="Download Image") music_info = gr.Label(label="Music Info") # Hidden states (keep text/prompt/audio/img for re-roll) text_state = gr.State() prompt_state = gr.State() voice_state = gr.State() img_state = gr.State() btn.click( fn=generate_visual_affirmation, inputs=[text_in, prompt_in, music_in], outputs=[out_video, download_video, download_audio, download_image, music_info, text_state, prompt_state, voice_state, img_state] ) reroll_btn.click( fn=reroll_music, inputs=[text_state, prompt_state, voice_state, img_state], outputs=[out_video, download_video, music_info] ) if __name__ == "__main__": demo.launch()