DeepDream-MLX / dream_video.py
NickMystic's picture
Upload folder using huggingface_hub
2dd52ce verified
#!/usr/bin/env python3
import argparse
import os
import time
import numpy as np
import mlx.core as mx
import scipy.ndimage as nd
from PIL import Image
from dream import deepdream, load_image, deprocess, get_weights_path
from mlx_googlenet import GoogLeNet
from mlx_resnet50 import ResNet50
from mlx_vgg16 import VGG16
from mlx_vgg19 import VGG19
from mlx_alexnet import AlexNet
def run_video_dream(args):
print(f"--- DeepDream Video Generator ---")
print(f"Model: {args.model}")
print(f"Zoom: {args.zoom_factor}")
print(f"Frames: {args.frames}")
# 1. Load Model
if args.model == "vgg16":
model = VGG16()
default_layers = ["relu4_3"]
elif args.model == "vgg19":
model = VGG19()
default_layers = ["relu4_4"]
elif args.model == "resnet50":
model = ResNet50()
default_layers = ["layer4_2"]
elif args.model == "alexnet":
model = AlexNet()
default_layers = ["relu5"]
else:
model = GoogLeNet()
default_layers = ["inception4c"]
weights = get_weights_path(args.model, args.weights)
if not os.path.exists(weights):
print(f"Error: Weights {weights} not found.")
return
print(f"Loading weights: {weights}")
model.load_npz(weights)
# 2. Prepare Input
img_np = load_image(args.input, args.width)
# 3. Prepare Output Dir
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
current_img = img_np.astype(np.float32)
# 4. Loop
for i in range(args.frames):
start_t = time.time()
# Dream
dreamed = deepdream(
model,
current_img,
layers=args.layers or default_layers,
steps=args.steps,
lr=args.lr,
num_octaves=args.octaves,
scale=args.scale,
jitter=args.jitter,
smoothing=args.smoothing
)
# Save Frame
frame_name = f"frame_{i:04d}.jpg"
out_path = os.path.join(args.output_dir, frame_name)
Image.fromarray(dreamed).save(out_path)
elapsed = time.time() - start_t
print(f"Frame {i+1}/{args.frames}: {frame_name} ({elapsed:.2f}s)")
# Transform for next frame (Zoom)
# Zooming involves:
# 1. Scaling up by zoom_factor
# 2. Cropping back to original size (center crop)
if i < args.frames - 1:
# dreamed is (H, W, 3) uint8
# Convert back to float for zoom to avoid precision loss
next_input = dreamed.astype(np.float32)
# Scipy Zoom (order=1 is bilinear, usually sufficient and fast)
# Zoom H and W dimensions, keep Channel dimension (zoom=1)
zf = args.zoom_factor
next_input = nd.zoom(next_input, (zf, zf, 1), order=1)
# Crop Center
h_new, w_new, _ = next_input.shape
h_orig, w_orig, _ = img_np.shape
start_h = (h_new - h_orig) // 2
start_w = (w_new - w_orig) // 2
current_img = next_input[start_h:start_h+h_orig, start_w:start_w+w_orig, :]
print(f"\nDone! Frames saved to {args.output_dir}/\n")
print(f"To create video (requires ffmpeg):")
print(f"ffmpeg -framerate 15 -i {args.output_dir}/frame_%04d.jpg -c:v libx264 -pix_fmt yuv420p video.mp4")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--input", required=True)
parser.add_argument("--output_dir", default="frames")
parser.add_argument("--frames", type=int, default=30)
parser.add_argument("--zoom_factor", type=float, default=1.05)
# Shared dream args
parser.add_argument("--width", type=int, default=None)
parser.add_argument("--model", default="googlenet")
parser.add_argument("--weights", default=None)
parser.add_argument("--layers", nargs="+ ")
parser.add_argument("--steps", type=int, default=5) # Fewer steps for video usually smoother
parser.add_argument("--lr", type=float, default=0.05)
parser.add_argument("--octaves", type=int, default=2) # Fewer octaves for speed
parser.add_argument("--scale", type=float, default=1.4)
parser.add_argument("--jitter", type=int, default=32)
parser.add_argument("--smoothing", type=float, default=0.5)
args = parser.parse_args()
run_video_dream(args)