import streamlit as st from pathlib import Path import torch from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer from diffusers import StableDiffusionPipeline from TTS.api import TTS import cv2 import numpy as np from PIL import Image import tempfile import os from moviepy.editor import * import base64 class VideoGenerator: def __init__(self): # Initialize text generation model self.text_model = AutoModelForCausalLM.from_pretrained( "facebook/opt-1.3b", torch_dtype=torch.float16, device_map="auto" ) self.text_tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b") # Initialize image generation model self.image_generator = StableDiffusionPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 ).to("cuda") # Initialize TTS model self.tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False) # Create temp directory self.temp_dir = Path(tempfile.mkdtemp()) def generate_script(self, prompt): """Generate detailed script with facts and scenes""" input_ids = self.text_tokenizer( f"Generate a detailed video script with facts about: {prompt}. Include scene descriptions.", return_tensors="pt" ).input_ids.to("cuda") outputs = self.text_model.generate( input_ids, max_length=500, temperature=0.7, num_return_sequences=1 ) script = self.text_tokenizer.decode(outputs[0], skip_special_tokens=True) return script def generate_scene_images(self, scene_descriptions): """Generate images for each scene using Stable Diffusion""" image_paths = [] for i, desc in enumerate(scene_descriptions): image = self.image_generator(desc).images[0] path = self.temp_dir / f"scene_{i}.png" image.save(path) image_paths.append(path) return image_paths def generate_voiceover(self, script): """Generate voice narration using TTS""" audio_path = self.temp_dir / "voiceover.wav" self.tts.tts_to_file(script, file_path=str(audio_path)) return audio_path def create_video(self, image_paths, audio_path, duration_per_image=5): """Combine images and audio into video""" clips = [] for img_path in image_paths: clip = ImageClip(str(img_path)).set_duration(duration_per_image) clips.append(clip) video = concatenate_videoclips(clips) audio = AudioFileClip(str(audio_path)) # Adjust video duration to match audio video = video.set_duration(audio.duration) final_video = video.set_audio(audio) output_path = self.temp_dir / "output_video.mp4" final_video.write_videofile(str(output_path), fps=24) return output_path def main(): st.set_page_config(page_title="AI Video Generator", layout="wide") st.title("🎬 AI Text-to-Video Generator") # Initialize session state if 'video_generator' not in st.session_state: st.session_state.video_generator = VideoGenerator() # Input section st.header("Enter Your Topic") text_input = st.text_area( "What would you like to create a video about?", height=100, placeholder="Example: Explain the process of photosynthesis in plants..." ) # Generation settings st.header("Video Settings") col1, col2 = st.columns(2) with col1: video_length = st.slider("Approximate video length (seconds)", 30, 300, 60) with col2: style = st.selectbox( "Video style", ["Educational", "Documentary", "Engaging", "Professional"] ) # Generate button if st.button("🎥 Generate Video"): if text_input: with st.spinner("🤖 Generating your video..."): try: # Progress bar progress_bar = st.progress(0) progress_text = st.empty() # Generate script progress_text.text("Generating script...") script = st.session_state.video_generator.generate_script(text_input) progress_bar.progress(25) # Extract scene descriptions progress_text.text("Processing scenes...") scenes = [s.strip() for s in script.split("Scene:") if s.strip()] progress_bar.progress(40) # Generate images progress_text.text("Creating visuals...") image_paths = st.session_state.video_generator.generate_scene_images(scenes) progress_bar.progress(60) # Generate voiceover progress_text.text("Generating voiceover...") audio_path = st.session_state.video_generator.generate_voiceover(script) progress_bar.progress(80) # Create video progress_text.text("Composing final video...") video_path = st.session_state.video_generator.create_video( image_paths, audio_path, duration_per_image=video_length/len(scenes) ) progress_bar.progress(100) progress_text.text("Video generation complete!") # Display results st.header("Generated Content") # Show script with st.expander("📝 Generated Script"): st.write(script) # Show video st.header("🎥 Your Video") video_file = open(str(video_path), 'rb') video_bytes = video_file.read() st.video(video_bytes) # Download button st.download_button( label="Download Video", data=video_bytes, file_name="generated_video.mp4", mime="video/mp4" ) except Exception as e: st.error(f"An error occurred: {str(e)}") else: st.warning("Please enter some text to generate a video!") if __name__ == "__main__": main()