VisionCraft-AI / app.py
shaheerawan3's picture
Update app.py
ee4b9c0 verified
raw
history blame
11 kB
import streamlit as st
from pathlib import Path
import torch
from transformers import pipeline
from PIL import Image, ImageDraw, ImageFont
import tempfile
import os
from moviepy.editor import *
import numpy as np
from gtts import gTTS
import textwrap
from concurrent.futures import ThreadPoolExecutor
import io
import unicodedata
import re
class FastVideoGenerator:
def __init__(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize text generation with efficient model
self.text_generator = pipeline(
'text-generation',
model='distilgpt2',
device=0 if self.device == "cuda" else -1
)
# Create temp directory
self.temp_dir = Path(tempfile.mkdtemp())
# Theme colors with opacity for better text visibility
self.themes = {
'Professional': {
'bg': (245, 245, 245),
'text': (33, 33, 33),
'accent': (0, 102, 204),
'overlay': (255, 255, 255, 180)
},
'Creative': {
'bg': (255, 240, 245),
'text': (51, 51, 51),
'accent': (255, 64, 129),
'overlay': (255, 255, 255, 180)
},
'Educational': {
'bg': (240, 249, 255),
'text': (25, 25, 25),
'accent': (0, 151, 167),
'overlay': (255, 255, 255, 180)
}
}
# Pre-load font
try:
self.font = ImageFont.truetype("arial.ttf", 40)
except:
self.font = ImageFont.load_default()
# Add text cleaner
self.text_cleaner = re.compile(r'[^\x00-\x7F]+')
@staticmethod
def clean_text(text):
"""Clean text to handle encoding issues"""
# Normalize unicode characters
text = unicodedata.normalize('NFKD', text)
# Replace special characters with standard ASCII
text = text.encode('ascii', 'ignore').decode('ascii')
# Replace common special characters
replacements = {
'โ€“': '-', # en dash
'โ€”': '-', # em dash
''': "'", # curly quote
''': "'", # curly quote
'"': '"', # curly quote
'"': '"', # curly quote
'โ€ฆ': '...' # ellipsis
}
for old, new in replacements.items():
text = text.replace(old, new)
return text
@staticmethod
@st.cache_data
def generate_script_cached(prompt, style, length, temperature=0.7):
"""Cached script generation with proper text cleaning"""
style_prompts = {
'Professional': "Write a clear, professional video script about:",
'Creative': "Write an engaging, creative video script about:",
'Educational': "Write an informative educational video script about:"
}
prompt = FastVideoGenerator.clean_text(prompt)
with pipeline('text-generation', model='distilgpt2') as generator:
output = generator(
f"{style_prompts[style]} {prompt}. Make it {length} seconds long.",
max_length=min(length * 3, 1000),
num_return_sequences=1,
temperature=temperature
)
script = output[0]['generated_text']
script = script.replace(style_prompts[style], '').strip()
return FastVideoGenerator.clean_text(script)
def create_frame_fast(self, text, theme, frame_number, total_frames, size=(1280, 720)):
"""Create frame with cleaned text"""
# Clean text before rendering
text = self.clean_text(text)
# Create frame
frame = np.full((size[1], size[0], 3), theme['bg'], dtype=np.uint8)
img = Image.fromarray(frame)
draw = ImageDraw.Draw(img)
# Wrap text for better presentation
wrapped_text = textwrap.fill(text, width=50)
# Calculate text position
text_bbox = draw.textbbox((0, 0), wrapped_text, font=self.font)
text_x = (size[0] - (text_bbox[2] - text_bbox[0])) // 2
text_y = (size[1] - (text_bbox[3] - text_bbox[1])) // 2
# Draw text with background for better readability
text_bg = Image.new('RGBA', size, (0, 0, 0, 0))
text_draw = ImageDraw.Draw(text_bg)
text_draw.text((text_x, text_y), wrapped_text, fill=theme['text'], font=self.font)
# Add progress bar
progress = frame_number / total_frames
bar_width = int(1000 * progress)
draw.rectangle([140, 650, 1140, 660], fill=(200,200,200))
draw.rectangle([140, 650, 140+bar_width, 660], fill=theme['accent'])
return np.array(img)
def generate_audio_chunks(self, script, chunk_size=1000):
"""Generate audio with cleaned text"""
# Clean text before TTS
script = self.clean_text(script)
chunks = textwrap.wrap(script, chunk_size)
audio_paths = []
for i, chunk in enumerate(chunks):
chunk_path = self.temp_dir / f"audio_chunk_{i}.mp3"
try:
tts = gTTS(text=chunk, lang='en', slow=False)
tts.save(str(chunk_path))
audio_paths.append(chunk_path)
except Exception as e:
# If TTS fails, try with further cleaning
cleaned_chunk = re.sub(r'[^a-zA-Z0-9\s.,!?-]', '', chunk)
tts = gTTS(text=cleaned_chunk, lang='en', slow=False)
tts.save(str(chunk_path))
audio_paths.append(chunk_path)
return audio_paths
def create_optimized_video(self, script, theme, duration=30):
"""Create video with optimized processing"""
fps = 24
total_frames = duration * fps
# Create frames efficiently
def make_frame(t):
frame_number = int(t * fps)
return self.create_frame_fast(
script,
self.themes[theme],
frame_number,
total_frames
)
# Generate video with reduced memory usage
clip = VideoClip(make_frame, duration=duration)
# Generate audio in background while processing video
with ThreadPoolExecutor() as executor:
future_audio = executor.submit(self.generate_audio_chunks, script)
# Process video
output_path = self.temp_dir / "output_video.mp4"
temp_video = self.temp_dir / "temp_video.mp4"
# Write video without audio first
clip.write_videofile(
str(temp_video),
fps=fps,
codec='libx264',
audio=False,
preset='ultrafast'
)
# Get audio paths and combine audio
audio_paths = future_audio.result()
audio_clips = [AudioFileClip(str(path)) for path in audio_paths]
final_audio = concatenate_audioclips(audio_clips)
# Combine video and audio
video = VideoFileClip(str(temp_video))
final_clip = video.set_audio(final_audio)
final_clip.write_videofile(str(output_path), fps=fps, codec='libx264')
# Cleanup
video.close()
final_clip.close()
for clip in audio_clips:
clip.close()
return output_path
def main():
st.set_page_config(
page_title="โšก Fast Video Generator",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
<style>
.stButton>button {
width: 100%;
height: 3em;
background-color: #FF4B4B;
color: white;
}
.stProgress > div > div > div > div {
background-color: #FF4B4B;
}
</style>
""", unsafe_allow_html=True)
if 'video_generator' not in st.session_state:
st.session_state.video_generator = FastVideoGenerator()
with st.sidebar:
st.title("๐ŸŽฎ Video Settings")
theme = st.selectbox(
"Theme Style",
["Professional", "Creative", "Educational"],
help="Choose the visual style of your video"
)
duration = st.slider(
"Duration (seconds)",
min_value=30,
max_value=300,
value=60,
step=30,
help="Videos up to 5 minutes supported"
)
quality = st.select_slider(
"Generation Speed",
options=["High Quality", "Balanced", "Fast"],
value="Balanced",
help="Faster generation may reduce video quality"
)
st.title("โšก Fast Video Generator")
st.markdown("Create longer videos with optimized performance!")
text_input = st.text_area(
"Video Topic",
height=100,
placeholder="Enter your topic here..."
)
if st.button("๐ŸŽฌ Generate Video", use_container_width=True):
if text_input:
try:
progress_bar = st.progress(0)
status = st.empty()
# Script generation
status.text("โœ๏ธ Creating script...")
script = FastVideoGenerator.generate_script_cached(
text_input, theme, duration
)
progress_bar.progress(30)
# Video creation
status.text("๐ŸŽจ Generating video...")
video_path = st.session_state.video_generator.create_optimized_video(
script, theme, duration
)
progress_bar.progress(100)
status.text("โœจ Video ready!")
# Display results
tab1, tab2 = st.tabs(["๐Ÿ“ฝ๏ธ Video", "๐Ÿ“ Script"])
with tab1:
st.video(str(video_path))
with open(str(video_path), 'rb') as f:
st.download_button(
"โฌ‡๏ธ Download Video",
f,
file_name="generated_video.mp4",
mime="video/mp4"
)
with tab2:
st.markdown("### Generated Script")
st.write(script)
except Exception as e:
st.error(f"๐Ÿ’ฅ Error: {str(e)}")
st.error("Please try again with different settings")
else:
st.warning("โš ๏ธ Please enter a topic first!")
if __name__ == "__main__":
main()