AI_VID / video_generator.py
arif670's picture
Update video_generator.py
16d1003 verified
import os
import numpy as np
import logging
from PIL import Image
from moviepy.editor import ImageSequenceClip, AudioFileClip
from moviepy.config import change_settings
import torch
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
def generate_video_pipeline(
prompt: str,
text_to_image_model,
image_to_video_model,
tts_model,
output_dir: str = "outputs",
duration: int = 5,
fps: int = 24
):
os.makedirs(output_dir, exist_ok=True)
image_path = None
audio_path = None
try:
# Generate image with device awareness
image_result = text_to_image_model(prompt)
image = image_result.images[0]
# Convert to CPU-compatible format if needed
if image_to_video_model.device.type == "cpu":
image = image.float()
image_path = os.path.join(output_dir, "frame.png")
image.save(image_path)
# Video generation
video_frames = image_to_video_model(
prompt,
image=image,
num_frames=int(duration * fps),
num_inference_steps=25,
generator=torch.Generator(
device=image_to_video_model.device
)
).frames
# Create video
video_path = os.path.join(output_dir, "output.mp4")
clip = ImageSequenceClip(
[np.array(frame) for frame in video_frames],
fps=fps
)
# Audio handling
if tts_model and prompt:
audio = tts_model.generate(prompt)
audio_path = os.path.join(output_dir, "audio.wav")
audio.save(audio_path)
clip = clip.set_audio(AudioFileClip(audio_path))
clip.write_videofile(video_path,
codec="libx264",
audio_codec="aac",
logger=None)
return video_path
except Exception as e:
logger.error(f"Generation failed: {str(e)}")
raise
finally:
# Safe cleanup
for path in [image_path, audio_path]:
if path and os.path.exists(path):
try:
os.remove(path)
except Exception as e:
logger.warning(f"Cleanup failed: {str(e)}")