| import os |
| import numpy as np |
| import logging |
| from PIL import Image |
| from moviepy.editor import ImageSequenceClip, AudioFileClip |
| from moviepy.config import change_settings |
| import torch |
|
|
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
| change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"}) |
|
|
| def generate_video_pipeline( |
| prompt: str, |
| text_to_image_model, |
| image_to_video_model, |
| tts_model, |
| output_dir: str = "outputs", |
| duration: int = 5, |
| fps: int = 24 |
| ): |
| os.makedirs(output_dir, exist_ok=True) |
| image_path = None |
| audio_path = None |
|
|
| try: |
| |
| image_result = text_to_image_model(prompt) |
| image = image_result.images[0] |
| |
| |
| if image_to_video_model.device.type == "cpu": |
| image = image.float() |
| |
| image_path = os.path.join(output_dir, "frame.png") |
| image.save(image_path) |
|
|
| |
| video_frames = image_to_video_model( |
| prompt, |
| image=image, |
| num_frames=int(duration * fps), |
| num_inference_steps=25, |
| generator=torch.Generator( |
| device=image_to_video_model.device |
| ) |
| ).frames |
|
|
| |
| video_path = os.path.join(output_dir, "output.mp4") |
| clip = ImageSequenceClip( |
| [np.array(frame) for frame in video_frames], |
| fps=fps |
| ) |
|
|
| |
| if tts_model and prompt: |
| audio = tts_model.generate(prompt) |
| audio_path = os.path.join(output_dir, "audio.wav") |
| audio.save(audio_path) |
| clip = clip.set_audio(AudioFileClip(audio_path)) |
|
|
| clip.write_videofile(video_path, |
| codec="libx264", |
| audio_codec="aac", |
| logger=None) |
| return video_path |
|
|
| except Exception as e: |
| logger.error(f"Generation failed: {str(e)}") |
| raise |
|
|
| finally: |
| |
| for path in [image_path, audio_path]: |
| if path and os.path.exists(path): |
| try: |
| os.remove(path) |
| except Exception as e: |
| logger.warning(f"Cleanup failed: {str(e)}") |