textvideo / streamlit_app.py
nandan133's picture
Update streamlit_app.py
76b5f5c verified
# streamlit_app.py
import streamlit as st
import torch
from diffusers import CogVideoXPipeline
from diffusers.utils import export_to_video
from pathlib import Path
st.set_page_config(page_title="Text-to-Video Generator", page_icon="๐ŸŽฌ", layout="wide")
TEMP_DIR = Path("/tmp/video_gen")
TEMP_DIR.mkdir(exist_ok=True, parents=True)
@st.cache_resource
def load_model(model_name):
"""Load and cache the model"""
try:
with st.spinner(f"Loading {model_name}... First load may take 10-15 minutes."):
pipe = CogVideoXPipeline.from_pretrained(
model_name,
torch_dtype=torch.float16,
cache_dir="/tmp/huggingface_cache"
)
if torch.cuda.is_available():
pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()
pipe.enable_vae_tiling()
return pipe
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return None
# Title
st.title("๐ŸŽฌ AI Video Generator")
st.markdown("Generate videos from text descriptions")
# Sidebar - Model Selection
with st.sidebar:
st.header("๐Ÿค– Model Selection")
model_options = {
"CogVideoX-5B (Recommended)": "THUDM/CogVideoX-5b",
"CogVideoX-2B (Faster)": "THUDM/CogVideoX-2b",
"CogVideoX-5B-I2V (Image to Video)": "THUDM/CogVideoX-5b-I2V",
}
selected_model_name = st.selectbox(
"Choose Model",
options=list(model_options.keys()),
index=0
)
selected_model = model_options[selected_model_name]
st.divider()
st.header("โš™๏ธ Generation Settings")
num_frames = st.slider("Frames", 16, 49, 49, help="More frames = longer video")
num_inference_steps = st.slider("Quality Steps", 20, 100, 50, help="More steps = better quality")
guidance_scale = st.slider("Guidance Scale", 1.0, 20.0, 6.0, 0.5)
seed = st.number_input("Seed (-1 = random)", value=-1, step=1)
fps = st.slider("FPS", 4, 16, 8)
# GPU Check
col1, col2 = st.columns(2)
with col1:
if torch.cuda.is_available():
st.success(f"โœ… GPU: {torch.cuda.get_device_name(0)}")
vram = torch.cuda.get_device_properties(0).total_memory / 1024**3
st.info(f"๐Ÿ’พ VRAM: {vram:.1f} GB")
if vram < 16:
st.warning("โš ๏ธ Low VRAM. Use CogVideoX-2B or reduce frames.")
else:
st.error("โŒ No GPU found! This requires CUDA GPU.")
st.stop()
with col2:
st.info(f"๐Ÿ”ฅ PyTorch: {torch.__version__}")
st.info(f"๐ŸŽฎ CUDA: {torch.version.cuda if torch.cuda.is_available() else 'N/A'}")
# Prompt Input
st.subheader("๐Ÿ“ Your Video Description")
prompt = st.text_area(
"Describe your video",
value="A cat walking through a beautiful garden, photorealistic, 4k, cinematic",
height=120,
help="Be descriptive for best results"
)
# Example Prompts
with st.expander("๐Ÿ’ก Example Prompts"):
examples = [
"A cat walking on the grass, realistic style, high quality",
"A panda playing guitar in a bamboo forest, cinematic lighting",
"Waves crashing on a beach at sunset, aerial view, 4k",
"A futuristic car driving through a neon city at night",
"Flowers blooming in time-lapse, macro photography, vibrant colors",
]
cols = st.columns(2)
for i, example in enumerate(examples):
with cols[i % 2]:
if st.button(f"๐Ÿ“‹ {example[:30]}...", key=f"ex_{i}", use_container_width=True):
st.session_state.selected_prompt = example
st.rerun()
if 'selected_prompt' in st.session_state:
prompt = st.session_state.selected_prompt
del st.session_state.selected_prompt
# Generate Button
if st.button("๐ŸŽฌ Generate Video", type="primary", use_container_width=True):
if not prompt.strip():
st.error("Please enter a prompt!")
st.stop()
# Load model
pipe = load_model(selected_model)
if pipe is None:
st.error("Failed to load model!")
st.stop()
# Progress tracking
progress_bar = st.progress(0)
status = st.empty()
try:
status.info("๐ŸŽจ Preparing generation...")
progress_bar.progress(10)
# Generator for reproducibility
if seed != -1:
generator = torch.Generator(device="cuda").manual_seed(int(seed))
else:
generator = None
status.info(f"๐ŸŽฌ Generating {num_frames} frames... This may take 5-10 minutes...")
progress_bar.progress(20)
# Generate video
with torch.inference_mode():
video_frames = pipe(
prompt=prompt,
num_frames=num_frames,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
generator=generator,
).frames[0]
progress_bar.progress(80)
status.info("๐Ÿ’พ Encoding video...")
# Save video
output_path = TEMP_DIR / f"video_{abs(hash(prompt + str(seed)))}.mp4"
export_to_video(video_frames, str(output_path), fps=fps)
progress_bar.progress(100)
status.success("โœ… Generation complete!")
# Display results
st.balloons()
col1, col2 = st.columns([3, 1])
with col1:
st.video(str(output_path))
with col2:
st.markdown("### ๐Ÿ“Š Info")
st.write(f"**Frames:** {num_frames}")
st.write(f"**Steps:** {num_inference_steps}")
st.write(f"**Guidance:** {guidance_scale}")
st.write(f"**Seed:** {seed if seed != -1 else 'Random'}")
st.write(f"**FPS:** {fps}")
st.write(f"**Duration:** ~{num_frames/fps:.1f}s")
# Download
with open(output_path, "rb") as f:
st.download_button(
"๐Ÿ“ฅ Download MP4",
f,
file_name=f"generated_{seed if seed != -1 else 'random'}.mp4",
mime="video/mp4",
use_container_width=True
)
# Show prompt
with st.expander("๐Ÿ“ Generation Details"):
st.write(f"**Model:** {selected_model_name}")
st.write(f"**Prompt:** {prompt}")
except torch.cuda.OutOfMemoryError:
progress_bar.empty()
status.empty()
st.error("โŒ Out of GPU memory! Try:\n- Reduce number of frames\n- Use CogVideoX-2B model\n- Lower inference steps")
except Exception as e:
progress_bar.empty()
status.empty()
st.error(f"โŒ Error: {str(e)}")
st.exception(e)
# Footer
st.markdown("---")
st.markdown(
"""
<div style='text-align: center; color: #666;'>
<p>Built with Streamlit โ€ข Powered by CogVideoX & Hugging Face ๐Ÿค—</p>
</div>
""",
unsafe_allow_html=True
)