Spaces:

shaheerawan3
/

VisionCraft-AI

Sleeping

File size: 11,006 Bytes

import streamlit as st
from pathlib import Path
import torch
from transformers import pipeline
from PIL import Image, ImageDraw, ImageFont
import tempfile
import os
from moviepy.editor import *
import numpy as np
from gtts import gTTS
import textwrap
from concurrent.futures import ThreadPoolExecutor
import io
import unicodedata
import re

class FastVideoGenerator:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        
        # Initialize text generation with efficient model
        self.text_generator = pipeline(
            'text-generation',
            model='distilgpt2',
            device=0 if self.device == "cuda" else -1
        )
        
        # Create temp directory
        self.temp_dir = Path(tempfile.mkdtemp())
        
        # Theme colors with opacity for better text visibility
        self.themes = {
            'Professional': {
                'bg': (245, 245, 245),
                'text': (33, 33, 33),
                'accent': (0, 102, 204),
                'overlay': (255, 255, 255, 180)
            },
            'Creative': {
                'bg': (255, 240, 245),
                'text': (51, 51, 51),
                'accent': (255, 64, 129),
                'overlay': (255, 255, 255, 180)
            },
            'Educational': {
                'bg': (240, 249, 255),
                'text': (25, 25, 25),
                'accent': (0, 151, 167),
                'overlay': (255, 255, 255, 180)
            }
        }

        # Pre-load font
        try:
            self.font = ImageFont.truetype("arial.ttf", 40)
        except:
            self.font = ImageFont.load_default()
            
        # Add text cleaner
        self.text_cleaner = re.compile(r'[^\x00-\x7F]+')

    @staticmethod
    def clean_text(text):
        """Clean text to handle encoding issues"""
        # Normalize unicode characters
        text = unicodedata.normalize('NFKD', text)
        # Replace special characters with standard ASCII
        text = text.encode('ascii', 'ignore').decode('ascii')
        # Replace common special characters
        replacements = {
            '–': '-',  # en dash
            '—': '-',  # em dash
            ''': "'",  # curly quote
            ''': "'",  # curly quote
            '"': '"',  # curly quote
            '"': '"',  # curly quote
            '…': '...' # ellipsis
        }
        for old, new in replacements.items():
            text = text.replace(old, new)
        return text

    @staticmethod
    @st.cache_data
    def generate_script_cached(prompt, style, length, temperature=0.7):
        """Cached script generation with proper text cleaning"""
        style_prompts = {
            'Professional': "Write a clear, professional video script about:",
            'Creative': "Write an engaging, creative video script about:",
            'Educational': "Write an informative educational video script about:"
        }
        
        prompt = FastVideoGenerator.clean_text(prompt)
        
        with pipeline('text-generation', model='distilgpt2') as generator:
            output = generator(
                f"{style_prompts[style]} {prompt}. Make it {length} seconds long.",
                max_length=min(length * 3, 1000),
                num_return_sequences=1,
                temperature=temperature
            )
        
        script = output[0]['generated_text']
        script = script.replace(style_prompts[style], '').strip()
        return FastVideoGenerator.clean_text(script)

    def create_frame_fast(self, text, theme, frame_number, total_frames, size=(1280, 720)):
        """Create frame with cleaned text"""
        # Clean text before rendering
        text = self.clean_text(text)
        
        # Create frame
        frame = np.full((size[1], size[0], 3), theme['bg'], dtype=np.uint8)
        img = Image.fromarray(frame)
        draw = ImageDraw.Draw(img)
        
        # Wrap text for better presentation
        wrapped_text = textwrap.fill(text, width=50)
        
        # Calculate text position
        text_bbox = draw.textbbox((0, 0), wrapped_text, font=self.font)
        text_x = (size[0] - (text_bbox[2] - text_bbox[0])) // 2
        text_y = (size[1] - (text_bbox[3] - text_bbox[1])) // 2
        
        # Draw text with background for better readability
        text_bg = Image.new('RGBA', size, (0, 0, 0, 0))
        text_draw = ImageDraw.Draw(text_bg)
        text_draw.text((text_x, text_y), wrapped_text, fill=theme['text'], font=self.font)
        
        # Add progress bar
        progress = frame_number / total_frames
        bar_width = int(1000 * progress)
        draw.rectangle([140, 650, 1140, 660], fill=(200,200,200))
        draw.rectangle([140, 650, 140+bar_width, 660], fill=theme['accent'])
        
        return np.array(img)

    def generate_audio_chunks(self, script, chunk_size=1000):
        """Generate audio with cleaned text"""
        # Clean text before TTS
        script = self.clean_text(script)
        chunks = textwrap.wrap(script, chunk_size)
        audio_paths = []
        
        for i, chunk in enumerate(chunks):
            chunk_path = self.temp_dir / f"audio_chunk_{i}.mp3"
            try:
                tts = gTTS(text=chunk, lang='en', slow=False)
                tts.save(str(chunk_path))
                audio_paths.append(chunk_path)
            except Exception as e:
                # If TTS fails, try with further cleaning
                cleaned_chunk = re.sub(r'[^a-zA-Z0-9\s.,!?-]', '', chunk)
                tts = gTTS(text=cleaned_chunk, lang='en', slow=False)
                tts.save(str(chunk_path))
                audio_paths.append(chunk_path)
            
        return audio_paths

    def create_optimized_video(self, script, theme, duration=30):
        """Create video with optimized processing"""
        fps = 24
        total_frames = duration * fps
        
        # Create frames efficiently
        def make_frame(t):
            frame_number = int(t * fps)
            return self.create_frame_fast(
                script,
                self.themes[theme],
                frame_number,
                total_frames
            )
        
        # Generate video with reduced memory usage
        clip = VideoClip(make_frame, duration=duration)
        
        # Generate audio in background while processing video
        with ThreadPoolExecutor() as executor:
            future_audio = executor.submit(self.generate_audio_chunks, script)
            
            # Process video
            output_path = self.temp_dir / "output_video.mp4"
            temp_video = self.temp_dir / "temp_video.mp4"
            
            # Write video without audio first
            clip.write_videofile(
                str(temp_video),
                fps=fps,
                codec='libx264',
                audio=False,
                preset='ultrafast'
            )
            
            # Get audio paths and combine audio
            audio_paths = future_audio.result()
            audio_clips = [AudioFileClip(str(path)) for path in audio_paths]
            final_audio = concatenate_audioclips(audio_clips)
            
            # Combine video and audio
            video = VideoFileClip(str(temp_video))
            final_clip = video.set_audio(final_audio)
            final_clip.write_videofile(str(output_path), fps=fps, codec='libx264')
            
            # Cleanup
            video.close()
            final_clip.close()
            for clip in audio_clips:
                clip.close()
            
            return output_path

def main():
    st.set_page_config(
        page_title="⚡ Fast Video Generator",
        layout="wide",
        initial_sidebar_state="expanded"
    )

    # Custom CSS
    st.markdown("""
        <style>
        .stButton>button {
            width: 100%;
            height: 3em;
            background-color: #FF4B4B;
            color: white;
        }
        .stProgress > div > div > div > div {
            background-color: #FF4B4B;
        }
        </style>
    """, unsafe_allow_html=True)

    if 'video_generator' not in st.session_state:
        st.session_state.video_generator = FastVideoGenerator()

    with st.sidebar:
        st.title("🎮 Video Settings")
        
        theme = st.selectbox(
            "Theme Style",
            ["Professional", "Creative", "Educational"],
            help="Choose the visual style of your video"
        )
        
        duration = st.slider(
            "Duration (seconds)",
            min_value=30,
            max_value=300,
            value=60,
            step=30,
            help="Videos up to 5 minutes supported"
        )
        
        quality = st.select_slider(
            "Generation Speed",
            options=["High Quality", "Balanced", "Fast"],
            value="Balanced",
            help="Faster generation may reduce video quality"
        )

    st.title("⚡ Fast Video Generator")
    st.markdown("Create longer videos with optimized performance!")

    text_input = st.text_area(
        "Video Topic",
        height=100,
        placeholder="Enter your topic here..."
    )

    if st.button("🎬 Generate Video", use_container_width=True):
        if text_input:
            try:
                progress_bar = st.progress(0)
                status = st.empty()
                
                # Script generation
                status.text("✍️ Creating script...")
                script = FastVideoGenerator.generate_script_cached(
                    text_input, theme, duration
                )
                progress_bar.progress(30)
                
                # Video creation
                status.text("🎨 Generating video...")
                video_path = st.session_state.video_generator.create_optimized_video(
                    script, theme, duration
                )
                progress_bar.progress(100)
                status.text("✨ Video ready!")
                
                # Display results
                tab1, tab2 = st.tabs(["📽️ Video", "📝 Script"])
                
                with tab1:
                    st.video(str(video_path))
                    with open(str(video_path), 'rb') as f:
                        st.download_button(
                            "⬇️ Download Video",
                            f,
                            file_name="generated_video.mp4",
                            mime="video/mp4"
                        )
                
                with tab2:
                    st.markdown("### Generated Script")
                    st.write(script)
                
            except Exception as e:
                st.error(f"💥 Error: {str(e)}")
                st.error("Please try again with different settings")
        else:
            st.warning("⚠️ Please enter a topic first!")

if __name__ == "__main__":
    main()