|
|
|
|
|
import argparse |
|
|
import os |
|
|
import time |
|
|
import numpy as np |
|
|
import mlx.core as mx |
|
|
import scipy.ndimage as nd |
|
|
from PIL import Image |
|
|
from dream import deepdream, load_image, deprocess, get_weights_path |
|
|
from mlx_googlenet import GoogLeNet |
|
|
from mlx_resnet50 import ResNet50 |
|
|
from mlx_vgg16 import VGG16 |
|
|
from mlx_vgg19 import VGG19 |
|
|
from mlx_alexnet import AlexNet |
|
|
|
|
|
def run_video_dream(args): |
|
|
print(f"--- DeepDream Video Generator ---") |
|
|
print(f"Model: {args.model}") |
|
|
print(f"Zoom: {args.zoom_factor}") |
|
|
print(f"Frames: {args.frames}") |
|
|
|
|
|
|
|
|
if args.model == "vgg16": |
|
|
model = VGG16() |
|
|
default_layers = ["relu4_3"] |
|
|
elif args.model == "vgg19": |
|
|
model = VGG19() |
|
|
default_layers = ["relu4_4"] |
|
|
elif args.model == "resnet50": |
|
|
model = ResNet50() |
|
|
default_layers = ["layer4_2"] |
|
|
elif args.model == "alexnet": |
|
|
model = AlexNet() |
|
|
default_layers = ["relu5"] |
|
|
else: |
|
|
model = GoogLeNet() |
|
|
default_layers = ["inception4c"] |
|
|
|
|
|
weights = get_weights_path(args.model, args.weights) |
|
|
if not os.path.exists(weights): |
|
|
print(f"Error: Weights {weights} not found.") |
|
|
return |
|
|
|
|
|
print(f"Loading weights: {weights}") |
|
|
model.load_npz(weights) |
|
|
|
|
|
|
|
|
img_np = load_image(args.input, args.width) |
|
|
|
|
|
|
|
|
if not os.path.exists(args.output_dir): |
|
|
os.makedirs(args.output_dir) |
|
|
|
|
|
current_img = img_np.astype(np.float32) |
|
|
|
|
|
|
|
|
for i in range(args.frames): |
|
|
start_t = time.time() |
|
|
|
|
|
|
|
|
dreamed = deepdream( |
|
|
model, |
|
|
current_img, |
|
|
layers=args.layers or default_layers, |
|
|
steps=args.steps, |
|
|
lr=args.lr, |
|
|
num_octaves=args.octaves, |
|
|
scale=args.scale, |
|
|
jitter=args.jitter, |
|
|
smoothing=args.smoothing |
|
|
) |
|
|
|
|
|
|
|
|
frame_name = f"frame_{i:04d}.jpg" |
|
|
out_path = os.path.join(args.output_dir, frame_name) |
|
|
Image.fromarray(dreamed).save(out_path) |
|
|
|
|
|
elapsed = time.time() - start_t |
|
|
print(f"Frame {i+1}/{args.frames}: {frame_name} ({elapsed:.2f}s)") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if i < args.frames - 1: |
|
|
|
|
|
|
|
|
next_input = dreamed.astype(np.float32) |
|
|
|
|
|
|
|
|
|
|
|
zf = args.zoom_factor |
|
|
next_input = nd.zoom(next_input, (zf, zf, 1), order=1) |
|
|
|
|
|
|
|
|
h_new, w_new, _ = next_input.shape |
|
|
h_orig, w_orig, _ = img_np.shape |
|
|
|
|
|
start_h = (h_new - h_orig) // 2 |
|
|
start_w = (w_new - w_orig) // 2 |
|
|
|
|
|
current_img = next_input[start_h:start_h+h_orig, start_w:start_w+w_orig, :] |
|
|
|
|
|
print(f"\nDone! Frames saved to {args.output_dir}/\n") |
|
|
print(f"To create video (requires ffmpeg):") |
|
|
print(f"ffmpeg -framerate 15 -i {args.output_dir}/frame_%04d.jpg -c:v libx264 -pix_fmt yuv420p video.mp4") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument("--input", required=True) |
|
|
parser.add_argument("--output_dir", default="frames") |
|
|
parser.add_argument("--frames", type=int, default=30) |
|
|
parser.add_argument("--zoom_factor", type=float, default=1.05) |
|
|
|
|
|
|
|
|
parser.add_argument("--width", type=int, default=None) |
|
|
parser.add_argument("--model", default="googlenet") |
|
|
parser.add_argument("--weights", default=None) |
|
|
parser.add_argument("--layers", nargs="+ ") |
|
|
parser.add_argument("--steps", type=int, default=5) |
|
|
parser.add_argument("--lr", type=float, default=0.05) |
|
|
parser.add_argument("--octaves", type=int, default=2) |
|
|
parser.add_argument("--scale", type=float, default=1.4) |
|
|
parser.add_argument("--jitter", type=int, default=32) |
|
|
parser.add_argument("--smoothing", type=float, default=0.5) |
|
|
|
|
|
args = parser.parse_args() |
|
|
run_video_dream(args) |
|
|
|