Upload folder using huggingface_hub
Browse files- .gitignore +12 -0
- README.md +168 -0
- config.json +30 -0
- dream.py +358 -0
- export_googlenet_npz.py +21 -0
- export_resnet50_npz.py +23 -0
- export_vgg16_npz.py +23 -0
- export_vgg19_npz.py +23 -0
- googlenet_mlx.npz +3 -0
- inference.py +76 -0
- mlx_googlenet.py +147 -0
- mlx_resnet50.py +153 -0
- mlx_vgg16.py +91 -0
- mlx_vgg19.py +104 -0
- requirements.txt +4 -0
- resnet50_mlx.npz +3 -0
- tf_inception_v1.py +79 -0
- vgg16_mlx.npz +3 -0
- vgg19_mlx.npz +3 -0
.gitignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.DS_Store
|
| 4 |
+
*.jpg
|
| 5 |
+
*.png
|
| 6 |
+
*.gif
|
| 7 |
+
!assets/
|
| 8 |
+
!input/
|
| 9 |
+
*.jpg
|
| 10 |
+
venv/
|
| 11 |
+
pics/
|
| 12 |
+
Agents.md
|
README.md
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
model_name: DeepDream-MLX
|
| 3 |
+
model_description: Native, hardware-accelerated DeepDream for Apple Silicon.
|
| 4 |
+
language: en
|
| 5 |
+
library_name: mlx
|
| 6 |
+
license: apache-2.0
|
| 7 |
+
tags:
|
| 8 |
+
- mlx
|
| 9 |
+
- computer-vision
|
| 10 |
+
- art
|
| 11 |
+
- generative
|
| 12 |
+
- deepdream
|
| 13 |
+
pipeline_tag: image-to-image
|
| 14 |
+
---
|
| 15 |
+
# DeepDream-MLX
|
| 16 |
+
|
| 17 |
+
<img src="assets/deepdream_header.jpg" alt="DeepDream Header" width="100%"/>
|
| 18 |
+
|
| 19 |
+
**Status:** Fast. Native.
|
| 20 |
+
**Vibe:** 2015 Hallucinations // 2025 Silicon.
|
| 21 |
+
|
| 22 |
+
## ⚡️ Instant Gratification
|
| 23 |
+
|
| 24 |
+
```bash
|
| 25 |
+
# 1. Install Dependencies
|
| 26 |
+
pip install mlx numpy pillow scipy
|
| 27 |
+
|
| 28 |
+
# 2. Dream (VGG16 Default)
|
| 29 |
+
python dream.py --input love.jpg
|
| 30 |
+
|
| 31 |
+
# 3. Dream (All Models)
|
| 32 |
+
python dream.py --input love.jpg --model all
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
## 🔮 The Lineage
|
| 36 |
+
|
| 37 |
+
VGG and GoogLeNet: Cousins from the 2012 Big Bang. One went **Deep**, the other went **Wide**. We ported them all.
|
| 38 |
+
|
| 39 |
+
```text
|
| 40 |
+
╔═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗
|
| 41 |
+
║ THE CONVOLUTIONAL ANCESTRY ║
|
| 42 |
+
╠═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╣
|
| 43 |
+
║ ║
|
| 44 |
+
║ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ║
|
| 45 |
+
║ ┃ LeNet-5 (1998) ┃ (The Grandfather) ║
|
| 46 |
+
║ ┗━━━━━━━━━━━━┳━━━━━━━━━━━━━┛ ║
|
| 47 |
+
║ │ ║
|
| 48 |
+
║ ▼ ║
|
| 49 |
+
║ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ║
|
| 50 |
+
║ ┃ AlexNet (2012) ┃ (The Ignition) ║
|
| 51 |
+
║ ┗━━━━━━━━━━━━┳━━━━━━━━━━━━━┛ ║
|
| 52 |
+
║ │ ║
|
| 53 |
+
║ ╔══════════════════╩════════════════════════════════════════════════════════════════════════════════╗ ║
|
| 54 |
+
║ ║ ║ ║
|
| 55 |
+
║ ▼ ▼ ▼ ║
|
| 56 |
+
║ ║
|
| 57 |
+
║ ╔══════════════════════════════════╗ ╔══════════════════════════════════╗ ╔═════════════════════════════════╗ ║
|
| 58 |
+
║ ║ THE OXFORD BRANCH ║ ║ THE GOOGLE BRANCH ║ ║ THE RESIDUAL REVOLUTION ║ ║
|
| 59 |
+
║ ║ (Philosophy: "Deeper") ║ ║ (Philosophy: "Wider") ║ ║ (Philosophy: "Identity") ║ ║
|
| 60 |
+
║ ╚═════════════════╦════════════════╝ ╚═════════════════╦════════════════╝ ╚════════════════════╦════════════╝ ║
|
| 61 |
+
║ │ │ │ ║
|
| 62 |
+
║ ┌─────────┴─────────┐ │ │ ║
|
| 63 |
+
║ │ │ │ │ ║
|
| 64 |
+
║ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓ ║
|
| 65 |
+
║ ┃ VGG16 ┃ ┃ VGG19 ┃ ┃Inception┃ ┃ ResNet ┃ ║
|
| 66 |
+
║ ┃ ┃ ┃ ┃ ┃ V1 ┃ ┃ 50 ┃ ║
|
| 67 |
+
║ ┗━━━━┳━━━━┛ ┗━━━━┳━━━━┛ ┗━━━━┳━━━━┛ ┗━━━━┳━━━━┛ ║
|
| 68 |
+
║ │ │ │ │ ║
|
| 69 |
+
║ (The Painter) (The Stylist) (The Hallucinator) (The Modernist) ║
|
| 70 |
+
║ │ │ │ │ ║
|
| 71 |
+
║ ▼ ▼ ▼ ▼ ║
|
| 72 |
+
║ vgg16_mlx.npz vgg19_mlx.npz googlenet_mlx.npz resnet50_mlx.npz ║
|
| 73 |
+
║ ║
|
| 74 |
+
╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
## 🧠 The Models
|
| 78 |
+
|
| 79 |
+
* **VGG16:** General purpose image features.
|
| 80 |
+
* **GoogLeNet (InceptionV1):** The classic DeepDream model.
|
| 81 |
+
* **VGG19:** Deeper VGG features.
|
| 82 |
+
* **ResNet50:** Modern deep features.
|
| 83 |
+
|
| 84 |
+
## 🧪 Recipes
|
| 85 |
+
|
| 86 |
+
Copy-paste these to get the exact looks from the header.
|
| 87 |
+
|
| 88 |
+
### 1. Classic Inception Patterns (GoogLeNet)
|
| 89 |
+
*This setup targets various Inception layers for recognizable DeepDream shapes.*
|
| 90 |
+
|
| 91 |
+
```bash
|
| 92 |
+
python dream.py --input love.jpg \
|
| 93 |
+
--model googlenet \
|
| 94 |
+
--steps 22 \
|
| 95 |
+
--lr 0.061 \
|
| 96 |
+
--octaves 4 \
|
| 97 |
+
--scale 1.8 \
|
| 98 |
+
--jitter 26 \
|
| 99 |
+
--smoothing 0.08 \
|
| 100 |
+
--layers inception3a inception4e inception5b
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
### 2. Rich Textures (VGG16)
|
| 104 |
+
*A VGG16 run for detailed, painterly results.*
|
| 105 |
+
|
| 106 |
+
```bash
|
| 107 |
+
python dream.py --input love.jpg \
|
| 108 |
+
--model vgg16 \
|
| 109 |
+
--steps 24 \
|
| 110 |
+
--lr 0.07 \
|
| 111 |
+
--octaves 4 \
|
| 112 |
+
--scale 1.8 \
|
| 113 |
+
--jitter 36 \
|
| 114 |
+
--smoothing 0.19 \
|
| 115 |
+
--layers relu4_2
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
### 3. Layered Patterns (VGG19)
|
| 119 |
+
*A VGG19 run for complex, stylized outputs.*
|
| 120 |
+
|
| 121 |
+
```bash
|
| 122 |
+
python dream.py --input love.jpg \
|
| 123 |
+
--model vgg19 \
|
| 124 |
+
--steps 14 \
|
| 125 |
+
--lr 0.045 \
|
| 126 |
+
--octaves 2 \
|
| 127 |
+
--scale 1.5 \
|
| 128 |
+
--jitter 27 \
|
| 129 |
+
--smoothing 0.41 \
|
| 130 |
+
--layers relu5_2
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### 4. Different VGG16 Vision
|
| 134 |
+
*Another VGG16 setting, exploring alternative features.*
|
| 135 |
+
|
| 136 |
+
```bash
|
| 137 |
+
python dream.py --input love.jpg \
|
| 138 |
+
--model vgg16 \
|
| 139 |
+
--steps 24 \
|
| 140 |
+
--lr 0.069 \
|
| 141 |
+
--octaves 4 \
|
| 142 |
+
--scale 1.8 \
|
| 143 |
+
--jitter 10 \
|
| 144 |
+
--smoothing 0.41 \
|
| 145 |
+
--layers relu5_1
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
### 5. Sharp Abstract Forms (ResNet50)
|
| 149 |
+
*Modern features from ResNet50 for distinct, edgy results.*
|
| 150 |
+
|
| 151 |
+
```bash
|
| 152 |
+
python dream.py --input love.jpg \
|
| 153 |
+
--model resnet50 \
|
| 154 |
+
--steps 22 \
|
| 155 |
+
--lr 0.13 \
|
| 156 |
+
--octaves 4 \
|
| 157 |
+
--scale 2 \
|
| 158 |
+
--jitter 83 \
|
| 159 |
+
--smoothing 0.47 \
|
| 160 |
+
--layers layer3_2 layer3_5
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
## 💾 Weight Conversion
|
| 164 |
+
|
| 165 |
+
We took 10-year-old model weights from PyTorch/Torchvision (often based on original Caffe implementations) and converted them directly into optimized MLX `.npz` arrays. Our custom `export_*.py` scripts handle this. This brings these classic architectures to **Apple Silicon**, clean and efficient.
|
| 166 |
+
|
| 167 |
+
---
|
| 168 |
+
*NickMystic
|
config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "DeepDream-MLX-Models",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"GoogleNet",
|
| 5 |
+
"VGG16",
|
| 6 |
+
"VGG19"
|
| 7 |
+
],
|
| 8 |
+
"model_type": "feature-extractor",
|
| 9 |
+
"framework": "mlx",
|
| 10 |
+
"task_specific_params": {
|
| 11 |
+
"deepdream": {
|
| 12 |
+
"description": "Models converted for DeepDream applications on Apple Silicon using MLX.",
|
| 13 |
+
"input_image_size": [224, 224],
|
| 14 |
+
"num_channels": 3,
|
| 15 |
+
"image_channel_order": "HWC",
|
| 16 |
+
"image_mean": [0.485, 0.456, 0.406],
|
| 17 |
+
"image_std": [0.229, 0.224, 0.225]
|
| 18 |
+
}
|
| 19 |
+
},
|
| 20 |
+
"license": "other",
|
| 21 |
+
"tags": [
|
| 22 |
+
"deepdream",
|
| 23 |
+
"mlx",
|
| 24 |
+
"computer-vision",
|
| 25 |
+
"googlenet",
|
| 26 |
+
"vgg16",
|
| 27 |
+
"vgg19",
|
| 28 |
+
"feature-extraction"
|
| 29 |
+
]
|
| 30 |
+
}
|
dream.py
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
import mlx.core as mx
|
| 7 |
+
import mlx.nn as nn
|
| 8 |
+
import numpy as np
|
| 9 |
+
import scipy.ndimage as nd
|
| 10 |
+
from mlx_resnet50 import ResNet50
|
| 11 |
+
from PIL import Image
|
| 12 |
+
|
| 13 |
+
from mlx_googlenet import GoogLeNet
|
| 14 |
+
from mlx_vgg16 import VGG16
|
| 15 |
+
from mlx_vgg19 import VGG19
|
| 16 |
+
|
| 17 |
+
IMAGENET_MEAN = mx.array([0.485, 0.456, 0.406])
|
| 18 |
+
IMAGENET_STD = mx.array([0.229, 0.224, 0.225])
|
| 19 |
+
LOWER_IMAGE_BOUND = (-IMAGENET_MEAN / IMAGENET_STD).reshape(1, 1, 1, 3)
|
| 20 |
+
UPPER_IMAGE_BOUND = ((1.0 - IMAGENET_MEAN) / IMAGENET_STD).reshape(1, 1, 1, 3)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def load_image(path, target_width=None):
|
| 24 |
+
img = Image.open(path).convert("RGB")
|
| 25 |
+
if target_width:
|
| 26 |
+
w, h = img.size
|
| 27 |
+
scale = target_width / w
|
| 28 |
+
new_h = int(h * scale)
|
| 29 |
+
img = img.resize((target_width, new_h), Image.LANCZOS)
|
| 30 |
+
return np.array(img)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def preprocess(img_np):
|
| 34 |
+
x = mx.array(img_np, dtype=mx.float32) / 255.0
|
| 35 |
+
x = (x - IMAGENET_MEAN) / IMAGENET_STD
|
| 36 |
+
x = x[None, ...] # NHWC
|
| 37 |
+
return x
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def deprocess(x):
|
| 41 |
+
x = x[0]
|
| 42 |
+
x = x * IMAGENET_STD + IMAGENET_MEAN
|
| 43 |
+
x = mx.clip(x, 0.0, 1.0)
|
| 44 |
+
x = (x * 255.0).astype(mx.uint8)
|
| 45 |
+
return np.array(x)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def resize_bilinear(x, new_h, new_w):
|
| 49 |
+
b, h, w, c = x.shape
|
| 50 |
+
out = mx.zeros((b, new_h, new_w, c))
|
| 51 |
+
for bi in range(b):
|
| 52 |
+
for ci in range(c):
|
| 53 |
+
out[bi, :, :, ci] = mx.array(
|
| 54 |
+
nd.zoom(np.array(x[bi, :, :, ci]), zoom=(new_h / h, new_w / w), order=1)
|
| 55 |
+
)
|
| 56 |
+
return out
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def gaussian_kernel(sigma, truncate=4.0, fixed_radius=None):
|
| 60 |
+
"""Generates a 1D Gaussian kernel."""
|
| 61 |
+
if fixed_radius is not None:
|
| 62 |
+
radius = fixed_radius
|
| 63 |
+
else:
|
| 64 |
+
radius = int(truncate * sigma + 0.5)
|
| 65 |
+
|
| 66 |
+
x = mx.arange(-radius, radius + 1)
|
| 67 |
+
kernel = mx.exp(-0.5 * (x / sigma) ** 2)
|
| 68 |
+
kernel = kernel / kernel.sum()
|
| 69 |
+
return kernel
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def gaussian_blur_2d(x, sigma, fixed_radius=None):
|
| 73 |
+
"""Applies Gaussian blur using separable 1D convolutions in MLX."""
|
| 74 |
+
kernel = gaussian_kernel(sigma, fixed_radius=fixed_radius)
|
| 75 |
+
kernel = kernel.astype(x.dtype)
|
| 76 |
+
k_size = kernel.shape[0]
|
| 77 |
+
C = x.shape[-1]
|
| 78 |
+
|
| 79 |
+
k_x = kernel.reshape(1, 1, k_size, 1)
|
| 80 |
+
k_x = mx.repeat(k_x, C, axis=0)
|
| 81 |
+
k_y = kernel.reshape(1, k_size, 1, 1)
|
| 82 |
+
k_y = mx.repeat(k_y, C, axis=0)
|
| 83 |
+
|
| 84 |
+
pad = k_size // 2
|
| 85 |
+
|
| 86 |
+
x = mx.conv2d(x, k_x, stride=1, padding=(0, pad), groups=C)
|
| 87 |
+
x = mx.conv2d(x, k_y, stride=1, padding=(pad, 0), groups=C)
|
| 88 |
+
return x
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def smooth_gradients(grad, sigma, fixed_radius=None):
|
| 92 |
+
"""Cascade 3 Gaussian blurs (sigma multipliers 0.5/1/2) using native MLX ops."""
|
| 93 |
+
sigmas = [sigma * 0.5, sigma * 1.0, sigma * 2.0]
|
| 94 |
+
smoothed = []
|
| 95 |
+
for s in sigmas:
|
| 96 |
+
smoothed.append(gaussian_blur_2d(grad, s, fixed_radius=fixed_radius))
|
| 97 |
+
|
| 98 |
+
g_total = smoothed[0]
|
| 99 |
+
for i in range(1, len(smoothed)):
|
| 100 |
+
g_total = g_total + smoothed[i]
|
| 101 |
+
return g_total / len(smoothed)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def get_pyramid_shapes(base_shape, num_octaves, scale):
|
| 105 |
+
h, w = base_shape
|
| 106 |
+
shapes = []
|
| 107 |
+
for level in range(num_octaves):
|
| 108 |
+
exponent = level - num_octaves + 1
|
| 109 |
+
nh = max(1, int(round(h * (scale**exponent))))
|
| 110 |
+
nw = max(1, int(round(w * (scale**exponent))))
|
| 111 |
+
shapes.append((nh, nw))
|
| 112 |
+
return shapes
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def deepdream(
|
| 116 |
+
model,
|
| 117 |
+
img_np,
|
| 118 |
+
layers,
|
| 119 |
+
steps,
|
| 120 |
+
lr,
|
| 121 |
+
num_octaves,
|
| 122 |
+
scale,
|
| 123 |
+
jitter=32,
|
| 124 |
+
smoothing=0.5,
|
| 125 |
+
guide_img_np=None,
|
| 126 |
+
):
|
| 127 |
+
img = preprocess(img_np)
|
| 128 |
+
base_h, base_w = img.shape[1:3]
|
| 129 |
+
pyramid_shapes = get_pyramid_shapes((base_h, base_w), num_octaves, scale)
|
| 130 |
+
|
| 131 |
+
for level, (nh, nw) in enumerate(pyramid_shapes):
|
| 132 |
+
img = resize_bilinear(img, nh, nw)
|
| 133 |
+
|
| 134 |
+
guide_features = {}
|
| 135 |
+
if guide_img_np is not None:
|
| 136 |
+
guide_resized = resize_bilinear(preprocess(guide_img_np), nh, nw)
|
| 137 |
+
_, guide_features = model.forward_with_endpoints(guide_resized)
|
| 138 |
+
|
| 139 |
+
def loss_fn(x):
|
| 140 |
+
endpoints = model.forward_with_endpoints(x)[1]
|
| 141 |
+
loss = mx.zeros(())
|
| 142 |
+
for name in layers:
|
| 143 |
+
act = endpoints[name]
|
| 144 |
+
if guide_img_np is not None:
|
| 145 |
+
guide_act = guide_features[name]
|
| 146 |
+
loss = loss + mx.mean(act * guide_act)
|
| 147 |
+
else:
|
| 148 |
+
loss = loss + mx.mean(act * act)
|
| 149 |
+
return loss / len(layers)
|
| 150 |
+
|
| 151 |
+
# Calculate max radius needed for static compilation
|
| 152 |
+
max_effective_sigma = 2.0 * (2.0 + smoothing)
|
| 153 |
+
fixed_radius = int(4.0 * max_effective_sigma + 0.5)
|
| 154 |
+
|
| 155 |
+
@mx.compile
|
| 156 |
+
def update_step(x, sigma):
|
| 157 |
+
loss, grads = mx.value_and_grad(loss_fn)(x)
|
| 158 |
+
g = smooth_gradients(grads, sigma, fixed_radius=fixed_radius)
|
| 159 |
+
g = g - mx.mean(g)
|
| 160 |
+
g = g / (mx.std(g) + 1e-8)
|
| 161 |
+
x = x + lr * g
|
| 162 |
+
x = mx.minimum(mx.maximum(x, LOWER_IMAGE_BOUND), UPPER_IMAGE_BOUND)
|
| 163 |
+
return x, loss
|
| 164 |
+
|
| 165 |
+
for it in range(steps):
|
| 166 |
+
ox, oy = np.random.randint(-jitter, jitter + 1, 2)
|
| 167 |
+
rolled = mx.roll(mx.roll(img, ox, axis=1), oy, axis=2)
|
| 168 |
+
|
| 169 |
+
sigma_val = ((it + 1) / steps) * 2.0 + smoothing
|
| 170 |
+
|
| 171 |
+
rolled, loss = update_step(rolled, mx.array(sigma_val))
|
| 172 |
+
|
| 173 |
+
img = mx.roll(mx.roll(rolled, -ox, axis=1), -oy, axis=2)
|
| 174 |
+
|
| 175 |
+
return deprocess(img)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def run_dream_for_model(model_name, args, img_np):
|
| 179 |
+
print(f"--- Running DeepDream with {model_name} ---")
|
| 180 |
+
|
| 181 |
+
# Notebook presets
|
| 182 |
+
PRESETS = {
|
| 183 |
+
"nb14": {
|
| 184 |
+
"layers": ["relu3_3"],
|
| 185 |
+
"steps": 10,
|
| 186 |
+
"lr": 0.06,
|
| 187 |
+
"octaves": 6,
|
| 188 |
+
"scale": 1.4,
|
| 189 |
+
"jitter": 32,
|
| 190 |
+
"smoothing": 0.5,
|
| 191 |
+
},
|
| 192 |
+
"nb20": {
|
| 193 |
+
"layers": ["relu4_2"],
|
| 194 |
+
"steps": 10,
|
| 195 |
+
"lr": 0.06,
|
| 196 |
+
"octaves": 6,
|
| 197 |
+
"scale": 1.4,
|
| 198 |
+
"jitter": 32,
|
| 199 |
+
"smoothing": 0.5,
|
| 200 |
+
},
|
| 201 |
+
"nb28": {
|
| 202 |
+
"layers": ["relu5_3"],
|
| 203 |
+
"steps": 10,
|
| 204 |
+
"lr": 0.06,
|
| 205 |
+
"octaves": 6,
|
| 206 |
+
"scale": 1.4,
|
| 207 |
+
"jitter": 32,
|
| 208 |
+
"smoothing": 0.5,
|
| 209 |
+
},
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
# Defaults
|
| 213 |
+
current_layers = args.layers
|
| 214 |
+
current_steps = args.steps
|
| 215 |
+
current_lr = args.lr
|
| 216 |
+
current_octaves = args.octaves
|
| 217 |
+
current_scale = args.scale
|
| 218 |
+
current_jitter = args.jitter
|
| 219 |
+
current_smoothing = args.smoothing
|
| 220 |
+
|
| 221 |
+
# Model specific logic
|
| 222 |
+
if model_name == "vgg16":
|
| 223 |
+
model = VGG16()
|
| 224 |
+
weights = args.weights or "vgg16_mlx.npz"
|
| 225 |
+
default_layers = ["relu4_3"]
|
| 226 |
+
if args.preset:
|
| 227 |
+
p = PRESETS[args.preset]
|
| 228 |
+
# Apply preset overrides
|
| 229 |
+
current_layers = p["layers"]
|
| 230 |
+
current_steps = p["steps"]
|
| 231 |
+
current_lr = p["lr"]
|
| 232 |
+
current_octaves = p["octaves"]
|
| 233 |
+
current_scale = p["scale"]
|
| 234 |
+
current_jitter = p["jitter"]
|
| 235 |
+
current_smoothing = p["smoothing"]
|
| 236 |
+
|
| 237 |
+
elif model_name == "vgg19":
|
| 238 |
+
model = VGG19()
|
| 239 |
+
weights = args.weights or "vgg19_mlx.npz"
|
| 240 |
+
default_layers = ["relu4_4"]
|
| 241 |
+
if args.preset and args.preset in PRESETS:
|
| 242 |
+
p = PRESETS[args.preset]
|
| 243 |
+
current_layers = p["layers"]
|
| 244 |
+
current_steps = p["steps"]
|
| 245 |
+
current_lr = p["lr"]
|
| 246 |
+
current_octaves = p["octaves"]
|
| 247 |
+
current_scale = p["scale"]
|
| 248 |
+
current_jitter = p["jitter"]
|
| 249 |
+
current_smoothing = p["smoothing"]
|
| 250 |
+
|
| 251 |
+
elif model_name == "resnet50":
|
| 252 |
+
model = ResNet50()
|
| 253 |
+
weights = args.weights or "resnet50_mlx.npz"
|
| 254 |
+
default_layers = ["layer4_2"]
|
| 255 |
+
|
| 256 |
+
else: # googlenet
|
| 257 |
+
model = GoogLeNet()
|
| 258 |
+
weights = args.weights or "googlenet_mlx.npz"
|
| 259 |
+
default_layers = ["inception3b", "inception4c", "inception4d"]
|
| 260 |
+
|
| 261 |
+
if not os.path.exists(weights):
|
| 262 |
+
print(f"Error: Weights NPZ not found: {weights}. Skipping {model_name}.")
|
| 263 |
+
return
|
| 264 |
+
|
| 265 |
+
model.load_npz(weights)
|
| 266 |
+
|
| 267 |
+
guide_img_np = None
|
| 268 |
+
if args.guide:
|
| 269 |
+
print(f"Using guide image: {args.guide}")
|
| 270 |
+
guide_img_np = load_image(args.guide, args.width)
|
| 271 |
+
|
| 272 |
+
start_time = time.time()
|
| 273 |
+
start_timestamp = datetime.now()
|
| 274 |
+
|
| 275 |
+
dreamed = deepdream(
|
| 276 |
+
model,
|
| 277 |
+
img_np,
|
| 278 |
+
layers=current_layers or default_layers,
|
| 279 |
+
steps=current_steps,
|
| 280 |
+
lr=current_lr,
|
| 281 |
+
num_octaves=current_octaves,
|
| 282 |
+
scale=current_scale,
|
| 283 |
+
jitter=current_jitter,
|
| 284 |
+
smoothing=current_smoothing,
|
| 285 |
+
guide_img_np=guide_img_np,
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
end_time = time.time()
|
| 289 |
+
elapsed = end_time - start_time
|
| 290 |
+
|
| 291 |
+
if args.output:
|
| 292 |
+
out = args.output
|
| 293 |
+
else:
|
| 294 |
+
base_name = os.path.splitext(os.path.basename(args.input))[0]
|
| 295 |
+
formatted_time = f"{elapsed:.2f}s"
|
| 296 |
+
formatted_date = start_timestamp.strftime("%m%d")
|
| 297 |
+
formatted_timestamp = start_timestamp.strftime("%H%M%S")
|
| 298 |
+
out = f"{base_name}_dream_{model_name}_{formatted_time}_{formatted_date}_{formatted_timestamp}.jpg"
|
| 299 |
+
|
| 300 |
+
Image.fromarray(dreamed).save(out)
|
| 301 |
+
print(f"Saved {out}\n")
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
def parse_args():
|
| 305 |
+
p = argparse.ArgumentParser(description="DeepDream with MLX (Compiled)")
|
| 306 |
+
p.add_argument("--input", required=True, help="Input image path")
|
| 307 |
+
p.add_argument("--output", help="Output image path (optional)")
|
| 308 |
+
p.add_argument("--guide", help="Guide image for guided dreaming")
|
| 309 |
+
|
| 310 |
+
p.add_argument("--width", type=int, default=None, help="Resize input to width (maintains aspect ratio)")
|
| 311 |
+
p.add_argument("--img_width", type=int, help="Alias for --width", dest="width") # Alias
|
| 312 |
+
|
| 313 |
+
p.add_argument(
|
| 314 |
+
"--model",
|
| 315 |
+
choices=["vgg16", "vgg19", "googlenet", "resnet50", "all"],
|
| 316 |
+
default="vgg16",
|
| 317 |
+
help="Model to use. 'all' runs all models.",
|
| 318 |
+
)
|
| 319 |
+
p.add_argument("--preset", choices=["nb14", "nb20", "nb28"], help="VGG16 presets")
|
| 320 |
+
|
| 321 |
+
p.add_argument("--layers", nargs="+", help="Layers to maximize")
|
| 322 |
+
p.add_argument("--steps", type=int, default=10, help="Gradient ascent steps per octave")
|
| 323 |
+
p.add_argument("--lr", type=float, default=0.09, help="Learning rate (step size)")
|
| 324 |
+
|
| 325 |
+
p.add_argument("--octaves", type=int, default=4, help="Number of image octaves")
|
| 326 |
+
p.add_argument("--pyramid_size", type=int, dest="octaves", help="Alias for --octaves") # Alias
|
| 327 |
+
|
| 328 |
+
p.add_argument("--scale", type=float, default=1.8, help="Octave scale factor")
|
| 329 |
+
p.add_argument("--pyramid_ratio", type=float, dest="scale", help="Alias for --scale") # Alias
|
| 330 |
+
p.add_argument("--octave_scale", type=float, dest="scale", help="Alias for --scale") # Alias
|
| 331 |
+
|
| 332 |
+
p.add_argument("--jitter", type=int, default=32, help="Jitter amount (pixels)")
|
| 333 |
+
|
| 334 |
+
p.add_argument("--smoothing", type=float, default=0.5, help="Gradient smoothing strength")
|
| 335 |
+
p.add_argument("--smoothing_coefficient", type=float, dest="smoothing", help="Alias for --smoothing") # Alias
|
| 336 |
+
|
| 337 |
+
p.add_argument("--weights", help="Custom weights path")
|
| 338 |
+
|
| 339 |
+
return p.parse_args()
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def main():
|
| 343 |
+
args = parse_args()
|
| 344 |
+
img_np = load_image(args.input, args.width)
|
| 345 |
+
|
| 346 |
+
if args.model == 'all':
|
| 347 |
+
models = ["vgg16", "vgg19", "googlenet", "resnet50"]
|
| 348 |
+
if args.output:
|
| 349 |
+
print("Warning: --output argument ignored because --model='all' was selected.")
|
| 350 |
+
args.output = None
|
| 351 |
+
for m in models:
|
| 352 |
+
run_dream_for_model(m, args, img_np)
|
| 353 |
+
else:
|
| 354 |
+
run_dream_for_model(args.model, args, img_np)
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
if __name__ == "__main__":
|
| 358 |
+
main()
|
export_googlenet_npz.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Export torchvision GoogLeNet (Inception V1) weights to an .npz for MLX.
|
| 3 |
+
Run this in a PyTorch+torchvision env:
|
| 4 |
+
python export_googlenet_npz.py
|
| 5 |
+
It writes models/googlenet_mlx.npz
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
from torchvision.models import googlenet, GoogLeNet_Weights
|
| 11 |
+
|
| 12 |
+
def main():
|
| 13 |
+
model = googlenet(weights=GoogLeNet_Weights.IMAGENET1K_V1)
|
| 14 |
+
state = model.state_dict()
|
| 15 |
+
os.makedirs("models", exist_ok=True)
|
| 16 |
+
out_path = os.path.join("models", "googlenet_mlx.npz")
|
| 17 |
+
np.savez(out_path, **{k: v.cpu().numpy() for k, v in state.items()})
|
| 18 |
+
print(f"Saved {out_path} with {len(state)} tensors.")
|
| 19 |
+
|
| 20 |
+
if __name__ == "__main__":
|
| 21 |
+
main()
|
export_resnet50_npz.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Export torchvision ResNet50 weights to an .npz for MLX.
|
| 3 |
+
Run this in a PyTorch+torchvision env:
|
| 4 |
+
python export_resnet50_npz.py
|
| 5 |
+
It writes models/resnet50_mlx.npz
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
from torchvision.models import resnet50, ResNet50_Weights
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
|
| 15 |
+
state = model.state_dict()
|
| 16 |
+
os.makedirs("models", exist_ok=True)
|
| 17 |
+
out_path = os.path.join("models", "resnet50_mlx.npz")
|
| 18 |
+
np.savez(out_path, **{k: v.cpu().numpy() for k, v in state.items()})
|
| 19 |
+
print(f"Saved {out_path} with {len(state)} tensors.")
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
if __name__ == "__main__":
|
| 23 |
+
main()
|
export_vgg16_npz.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Export torchvision VGG16 weights to an .npz for MLX.
|
| 3 |
+
Run this in a PyTorch+torchvision env:
|
| 4 |
+
python export_vgg16_npz.py
|
| 5 |
+
It writes models/vgg16_mlx.npz
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
from torchvision.models import vgg16, VGG16_Weights
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
|
| 15 |
+
state = model.state_dict()
|
| 16 |
+
os.makedirs("models", exist_ok=True)
|
| 17 |
+
out_path = os.path.join("models", "vgg16_mlx.npz")
|
| 18 |
+
np.savez(out_path, **{k: v.cpu().numpy() for k, v in state.items()})
|
| 19 |
+
print(f"Saved {out_path} with {len(state)} tensors.")
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
if __name__ == "__main__":
|
| 23 |
+
main()
|
export_vgg19_npz.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Export torchvision VGG19 weights to an .npz for MLX.
|
| 3 |
+
Run this in a PyTorch+torchvision env:
|
| 4 |
+
python export_vgg19_npz.py
|
| 5 |
+
It writes models/vgg19_mlx.npz
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
from torchvision.models import vgg19, VGG19_Weights
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
model = vgg19(weights=VGG19_Weights.IMAGENET1K_V1)
|
| 15 |
+
state = model.state_dict()
|
| 16 |
+
os.makedirs("models", exist_ok=True)
|
| 17 |
+
out_path = os.path.join("models", "vgg19_mlx.npz")
|
| 18 |
+
np.savez(out_path, **{k: v.cpu().numpy() for k, v in state.items()})
|
| 19 |
+
print(f"Saved {out_path} with {len(state)} tensors.")
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
if __name__ == "__main__":
|
| 23 |
+
main()
|
googlenet_mlx.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:835f92d1b0cf9c4f2977b59603f03f0e96ffb9e055a668e77b45aea166e14c14
|
| 3 |
+
size 26661322
|
inference.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import mlx.core as mx
|
| 2 |
+
import numpy as np
|
| 3 |
+
from PIL import Image
|
| 4 |
+
from mlx_googlenet import GoogLeNet
|
| 5 |
+
# from mlx_vgg16 import VGG16 # Uncomment to use VGG16
|
| 6 |
+
# from mlx_vgg19 import VGG19 # Uncomment to use VGG19
|
| 7 |
+
|
| 8 |
+
def preprocess_image(image_path: str, target_size=(224, 224)):
|
| 9 |
+
"""
|
| 10 |
+
Loads and preprocesses an image for MLX models.
|
| 11 |
+
Resizes, normalizes, and converts to HWC MLX array.
|
| 12 |
+
"""
|
| 13 |
+
mean = np.array([0.485, 0.456, 0.406])
|
| 14 |
+
std = np.array([0.229, 0.224, 0.225])
|
| 15 |
+
|
| 16 |
+
image = Image.open(image_path).convert("RGB")
|
| 17 |
+
image = image.resize(target_size)
|
| 18 |
+
image = np.array(image, dtype=np.float32) / 255.0 # Scale to [0, 1]
|
| 19 |
+
|
| 20 |
+
# Normalize
|
| 21 |
+
image = (image - mean) / std
|
| 22 |
+
|
| 23 |
+
# Add batch dimension (B, H, W, C) and convert to MLX array
|
| 24 |
+
image = mx.array(image[np.newaxis, ...])
|
| 25 |
+
return image
|
| 26 |
+
|
| 27 |
+
def main():
|
| 28 |
+
# Path to a dummy input image. You might need to create one or use an existing one.
|
| 29 |
+
# For example, you can create a dummy 224x224 black image with:
|
| 30 |
+
# `convert -size 224x224 xc:black dummy_input.png` (if ImageMagick is installed)
|
| 31 |
+
# Or simply have an image named 'dummy_input.png' in the same directory.
|
| 32 |
+
input_image_path = "dummy_input.png"
|
| 33 |
+
|
| 34 |
+
# --- Load and preprocess image ---
|
| 35 |
+
try:
|
| 36 |
+
input_image = preprocess_image(input_image_path)
|
| 37 |
+
print(f"Preprocessed image shape: {input_image.shape}")
|
| 38 |
+
except FileNotFoundError:
|
| 39 |
+
print(f"Error: Input image '{input_image_path}' not found.")
|
| 40 |
+
print("Please create a dummy_input.png or replace the path with an existing image.")
|
| 41 |
+
return
|
| 42 |
+
|
| 43 |
+
# --- Load GoogleNet model and weights ---
|
| 44 |
+
print("Loading GoogleNet model...")
|
| 45 |
+
model = GoogLeNet()
|
| 46 |
+
try:
|
| 47 |
+
model.load_npz("googlenet_mlx.npz")
|
| 48 |
+
print("GoogleNet weights loaded successfully.")
|
| 49 |
+
except FileNotFoundError:
|
| 50 |
+
print("Error: googlenet_mlx.npz not found.")
|
| 51 |
+
print("Ensure 'googlenet_mlx.npz' is in the same directory as this script.")
|
| 52 |
+
return
|
| 53 |
+
|
| 54 |
+
# --- Perform inference ---
|
| 55 |
+
print("Performing inference...")
|
| 56 |
+
# The GoogleNet model returns a dictionary of activations for DeepDream
|
| 57 |
+
activations = model(input_image)
|
| 58 |
+
print("Inference complete.")
|
| 59 |
+
|
| 60 |
+
# --- Display some output ---
|
| 61 |
+
print("\nGoogleNet Activations (Layer Names and Shapes):")
|
| 62 |
+
for layer_name, output_tensor in activations.items():
|
| 63 |
+
print(f" {layer_name}: {output_tensor.shape}")
|
| 64 |
+
|
| 65 |
+
# You can uncomment and use VGG16/VGG19 similarly:
|
| 66 |
+
# print("\n--- VGG16 Example (uncomment to run) ---")
|
| 67 |
+
# vgg_model = VGG16()
|
| 68 |
+
# vgg_model.load_npz("vgg16_mlx.npz")
|
| 69 |
+
# vgg_activations = vgg_model(input_image)
|
| 70 |
+
# print("VGG16 Activations (Layer Names and Shapes):")
|
| 71 |
+
# for layer_name, output_tensor in vgg_activations.items():
|
| 72 |
+
# print(f" {layer_name}: {output_tensor.shape}")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
main()
|
mlx_googlenet.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Minimal GoogLeNet (Inception V1) in MLX, up to inception4e.
|
| 3 |
+
Loads weights from a torchvision-exported npz (see export_googlenet_npz.py).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import mlx.core as mx
|
| 7 |
+
import mlx.nn as nn
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _conv_bn(in_ch, out_ch, kernel_size, stride=1, padding=0):
|
| 12 |
+
return nn.Sequential(
|
| 13 |
+
nn.Conv2d(
|
| 14 |
+
in_ch,
|
| 15 |
+
out_ch,
|
| 16 |
+
kernel_size=kernel_size,
|
| 17 |
+
stride=stride,
|
| 18 |
+
padding=padding,
|
| 19 |
+
bias=False,
|
| 20 |
+
),
|
| 21 |
+
nn.BatchNorm(out_ch, eps=1e-3, momentum=0.1),
|
| 22 |
+
nn.ReLU(),
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class Inception(nn.Module):
|
| 27 |
+
def __init__(self, in_ch, ch1, ch3r, ch3, ch5r, ch5, pool_proj):
|
| 28 |
+
super().__init__()
|
| 29 |
+
self.branch1 = _conv_bn(in_ch, ch1, 1)
|
| 30 |
+
|
| 31 |
+
self.branch2_1 = _conv_bn(in_ch, ch3r, 1)
|
| 32 |
+
self.branch2_2 = _conv_bn(ch3r, ch3, 3, padding=1)
|
| 33 |
+
|
| 34 |
+
self.branch3_1 = _conv_bn(in_ch, ch5r, 1)
|
| 35 |
+
# The reference torchvision GoogLeNet uses a 3x3 conv here (not 5x5)
|
| 36 |
+
self.branch3_2 = _conv_bn(ch5r, ch5, 3, padding=1)
|
| 37 |
+
|
| 38 |
+
self.branch4_pool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
|
| 39 |
+
self.branch4_2 = _conv_bn(in_ch, pool_proj, 1)
|
| 40 |
+
|
| 41 |
+
def __call__(self, x):
|
| 42 |
+
b1 = self.branch1(x)
|
| 43 |
+
b2 = self.branch2_2(self.branch2_1(x))
|
| 44 |
+
b3 = self.branch3_2(self.branch3_1(x))
|
| 45 |
+
b4 = self.branch4_2(self.branch4_pool(x))
|
| 46 |
+
return mx.concatenate([b1, b2, b3, b4], axis=-1)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class GoogLeNet(nn.Module):
|
| 50 |
+
def __init__(self):
|
| 51 |
+
super().__init__()
|
| 52 |
+
self.conv1 = _conv_bn(3, 64, 7, stride=2, padding=3)
|
| 53 |
+
self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
|
| 54 |
+
|
| 55 |
+
self.conv2 = _conv_bn(64, 64, 1)
|
| 56 |
+
self.conv3 = _conv_bn(64, 192, 3, padding=1)
|
| 57 |
+
self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
|
| 58 |
+
|
| 59 |
+
self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
|
| 60 |
+
self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
|
| 61 |
+
self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
|
| 62 |
+
|
| 63 |
+
self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
|
| 64 |
+
self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
|
| 65 |
+
self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
|
| 66 |
+
self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
|
| 67 |
+
self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
|
| 68 |
+
self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
| 69 |
+
|
| 70 |
+
self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
|
| 71 |
+
self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)
|
| 72 |
+
|
| 73 |
+
def forward_with_endpoints(self, x):
|
| 74 |
+
endpoints = {}
|
| 75 |
+
x = self.conv1(x)
|
| 76 |
+
x = self.maxpool1(x)
|
| 77 |
+
|
| 78 |
+
x = self.conv2(x)
|
| 79 |
+
x = self.conv3(x)
|
| 80 |
+
x = self.maxpool2(x)
|
| 81 |
+
|
| 82 |
+
x = self.inception3a(x)
|
| 83 |
+
endpoints["inception3a"] = x
|
| 84 |
+
x = self.inception3b(x)
|
| 85 |
+
endpoints["inception3b"] = x
|
| 86 |
+
x = self.maxpool3(x)
|
| 87 |
+
|
| 88 |
+
x = self.inception4a(x)
|
| 89 |
+
endpoints["inception4a"] = x
|
| 90 |
+
x = self.inception4b(x)
|
| 91 |
+
endpoints["inception4b"] = x
|
| 92 |
+
x = self.inception4c(x)
|
| 93 |
+
endpoints["inception4c"] = x
|
| 94 |
+
x = self.inception4d(x)
|
| 95 |
+
endpoints["inception4d"] = x
|
| 96 |
+
x = self.inception4e(x)
|
| 97 |
+
endpoints["inception4e"] = x
|
| 98 |
+
x = self.maxpool4(x)
|
| 99 |
+
|
| 100 |
+
x = self.inception5a(x)
|
| 101 |
+
endpoints["inception5a"] = x
|
| 102 |
+
x = self.inception5b(x)
|
| 103 |
+
endpoints["inception5b"] = x
|
| 104 |
+
return x, endpoints
|
| 105 |
+
|
| 106 |
+
def __call__(self, x):
|
| 107 |
+
_, endpoints = self.forward_with_endpoints(x)
|
| 108 |
+
return endpoints
|
| 109 |
+
|
| 110 |
+
def load_npz(self, path: str):
|
| 111 |
+
data = np.load(path)
|
| 112 |
+
|
| 113 |
+
def to_mlx_weight(w):
|
| 114 |
+
# PyTorch Conv2d weights are (out_channels, in_channels, kH, kW)
|
| 115 |
+
# MLX expects channel-last filters: (out_channels, kH, kW, in_channels)
|
| 116 |
+
return np.transpose(w, (0, 2, 3, 1)) if w.ndim == 4 else w
|
| 117 |
+
|
| 118 |
+
def load_conv_bn(prefix, seq_mod: nn.Sequential):
|
| 119 |
+
conv = seq_mod.layers[0]
|
| 120 |
+
bn = seq_mod.layers[1]
|
| 121 |
+
conv.weight = mx.array(to_mlx_weight(data[f"{prefix}.conv.weight"]))
|
| 122 |
+
bn.weight = mx.array(data[f"{prefix}.bn.weight"])
|
| 123 |
+
bn.bias = mx.array(data[f"{prefix}.bn.bias"])
|
| 124 |
+
bn.running_mean = mx.array(data[f"{prefix}.bn.running_mean"])
|
| 125 |
+
bn.running_var = mx.array(data[f"{prefix}.bn.running_var"])
|
| 126 |
+
|
| 127 |
+
load_conv_bn("conv1", self.conv1)
|
| 128 |
+
load_conv_bn("conv2", self.conv2)
|
| 129 |
+
load_conv_bn("conv3", self.conv3)
|
| 130 |
+
|
| 131 |
+
def load_inception(prefix, module: Inception):
|
| 132 |
+
load_conv_bn(f"{prefix}.branch1", module.branch1)
|
| 133 |
+
load_conv_bn(f"{prefix}.branch2.0", module.branch2_1)
|
| 134 |
+
load_conv_bn(f"{prefix}.branch2.1", module.branch2_2)
|
| 135 |
+
load_conv_bn(f"{prefix}.branch3.0", module.branch3_1)
|
| 136 |
+
load_conv_bn(f"{prefix}.branch3.1", module.branch3_2)
|
| 137 |
+
load_conv_bn(f"{prefix}.branch4.1", module.branch4_2)
|
| 138 |
+
|
| 139 |
+
load_inception("inception3a", self.inception3a)
|
| 140 |
+
load_inception("inception3b", self.inception3b)
|
| 141 |
+
load_inception("inception4a", self.inception4a)
|
| 142 |
+
load_inception("inception4b", self.inception4b)
|
| 143 |
+
load_inception("inception4c", self.inception4c)
|
| 144 |
+
load_inception("inception4d", self.inception4d)
|
| 145 |
+
load_inception("inception4e", self.inception4e)
|
| 146 |
+
load_inception("inception5a", self.inception5a)
|
| 147 |
+
load_inception("inception5b", self.inception5b)
|
mlx_resnet50.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResNet50 in MLX for DeepDream.
|
| 3 |
+
Loads weights from a torchvision-exported npz (see export_resnet50_npz.py).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import mlx.core as mx
|
| 7 |
+
import mlx.nn as nn
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
class Bottleneck(nn.Module):
|
| 11 |
+
expansion = 4
|
| 12 |
+
|
| 13 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
| 14 |
+
super().__init__()
|
| 15 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
| 16 |
+
self.bn1 = nn.BatchNorm(planes, eps=1e-5, momentum=0.1)
|
| 17 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
| 18 |
+
self.bn2 = nn.BatchNorm(planes, eps=1e-5, momentum=0.1)
|
| 19 |
+
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
|
| 20 |
+
self.bn3 = nn.BatchNorm(planes * self.expansion, eps=1e-5, momentum=0.1)
|
| 21 |
+
self.relu = nn.ReLU()
|
| 22 |
+
self.downsample = downsample
|
| 23 |
+
|
| 24 |
+
def __call__(self, x):
|
| 25 |
+
identity = x
|
| 26 |
+
|
| 27 |
+
out = self.conv1(x)
|
| 28 |
+
out = self.bn1(out)
|
| 29 |
+
out = self.relu(out)
|
| 30 |
+
|
| 31 |
+
out = self.conv2(out)
|
| 32 |
+
out = self.bn2(out)
|
| 33 |
+
out = self.relu(out)
|
| 34 |
+
|
| 35 |
+
out = self.conv3(out)
|
| 36 |
+
out = self.bn3(out)
|
| 37 |
+
|
| 38 |
+
if self.downsample is not None:
|
| 39 |
+
identity = self.downsample(x)
|
| 40 |
+
|
| 41 |
+
out = out + identity
|
| 42 |
+
out = self.relu(out)
|
| 43 |
+
|
| 44 |
+
return out
|
| 45 |
+
|
| 46 |
+
class ResNet(nn.Module):
|
| 47 |
+
def __init__(self, block, layers):
|
| 48 |
+
super().__init__()
|
| 49 |
+
self.inplanes = 64
|
| 50 |
+
|
| 51 |
+
# Initial layers
|
| 52 |
+
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
|
| 53 |
+
self.bn1 = nn.BatchNorm(self.inplanes, eps=1e-5, momentum=0.1)
|
| 54 |
+
self.relu = nn.ReLU()
|
| 55 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
| 56 |
+
|
| 57 |
+
self.layer1 = self._make_layer(block, 64, layers[0])
|
| 58 |
+
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
| 59 |
+
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
| 60 |
+
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
| 61 |
+
|
| 62 |
+
def _make_layer(self, block, planes, blocks, stride=1):
|
| 63 |
+
downsample = None
|
| 64 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
| 65 |
+
downsample = nn.Sequential(
|
| 66 |
+
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
|
| 67 |
+
nn.BatchNorm(planes * block.expansion, eps=1e-5, momentum=0.1),
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
layers = []
|
| 71 |
+
layers.append(block(self.inplanes, planes, stride, downsample))
|
| 72 |
+
self.inplanes = planes * block.expansion
|
| 73 |
+
for _ in range(1, blocks):
|
| 74 |
+
layers.append(block(self.inplanes, planes))
|
| 75 |
+
|
| 76 |
+
return nn.Sequential(*layers)
|
| 77 |
+
|
| 78 |
+
def forward_with_endpoints(self, x):
|
| 79 |
+
endpoints = {}
|
| 80 |
+
|
| 81 |
+
x = self.conv1(x)
|
| 82 |
+
x = self.bn1(x)
|
| 83 |
+
x = self.relu(x)
|
| 84 |
+
endpoints['conv1'] = x
|
| 85 |
+
|
| 86 |
+
x = self.maxpool(x)
|
| 87 |
+
|
| 88 |
+
# Layer 1
|
| 89 |
+
for i, layer in enumerate(self.layer1.layers):
|
| 90 |
+
x = layer(x)
|
| 91 |
+
endpoints[f'layer1_{i}'] = x
|
| 92 |
+
endpoints['layer1'] = x
|
| 93 |
+
|
| 94 |
+
# Layer 2
|
| 95 |
+
for i, layer in enumerate(self.layer2.layers):
|
| 96 |
+
x = layer(x)
|
| 97 |
+
endpoints[f'layer2_{i}'] = x
|
| 98 |
+
endpoints['layer2'] = x
|
| 99 |
+
|
| 100 |
+
# Layer 3
|
| 101 |
+
for i, layer in enumerate(self.layer3.layers):
|
| 102 |
+
x = layer(x)
|
| 103 |
+
endpoints[f'layer3_{i}'] = x
|
| 104 |
+
endpoints['layer3'] = x
|
| 105 |
+
|
| 106 |
+
# Layer 4
|
| 107 |
+
for i, layer in enumerate(self.layer4.layers):
|
| 108 |
+
x = layer(x)
|
| 109 |
+
endpoints[f'layer4_{i}'] = x
|
| 110 |
+
endpoints['layer4'] = x
|
| 111 |
+
|
| 112 |
+
return x, endpoints
|
| 113 |
+
|
| 114 |
+
def load_npz(self, path: str):
|
| 115 |
+
data = np.load(path)
|
| 116 |
+
|
| 117 |
+
def to_mlx_weight(w):
|
| 118 |
+
return np.transpose(w, (0, 2, 3, 1)) if w.ndim == 4 else w
|
| 119 |
+
|
| 120 |
+
def load_bn(prefix, bn):
|
| 121 |
+
bn.weight = mx.array(data[f"{prefix}.weight"])
|
| 122 |
+
bn.bias = mx.array(data[f"{prefix}.bias"])
|
| 123 |
+
bn.running_mean = mx.array(data[f"{prefix}.running_mean"])
|
| 124 |
+
bn.running_var = mx.array(data[f"{prefix}.running_var"])
|
| 125 |
+
|
| 126 |
+
def load_conv(prefix, conv):
|
| 127 |
+
conv.weight = mx.array(to_mlx_weight(data[f"{prefix}.weight"]))
|
| 128 |
+
|
| 129 |
+
# Initial layers
|
| 130 |
+
load_conv("conv1", self.conv1)
|
| 131 |
+
load_bn("bn1", self.bn1)
|
| 132 |
+
|
| 133 |
+
def load_layer(prefix, layer_mod):
|
| 134 |
+
for i, block in enumerate(layer_mod.layers):
|
| 135 |
+
block_prefix = f"{prefix}.{i}"
|
| 136 |
+
load_conv(f"{block_prefix}.conv1", block.conv1)
|
| 137 |
+
load_bn(f"{block_prefix}.bn1", block.bn1)
|
| 138 |
+
load_conv(f"{block_prefix}.conv2", block.conv2)
|
| 139 |
+
load_bn(f"{block_prefix}.bn2", block.bn2)
|
| 140 |
+
load_conv(f"{block_prefix}.conv3", block.conv3)
|
| 141 |
+
load_bn(f"{block_prefix}.bn3", block.bn3)
|
| 142 |
+
|
| 143 |
+
if block.downsample is not None:
|
| 144 |
+
load_conv(f"{block_prefix}.downsample.0", block.downsample.layers[0])
|
| 145 |
+
load_bn(f"{block_prefix}.downsample.1", block.downsample.layers[1])
|
| 146 |
+
|
| 147 |
+
load_layer("layer1", self.layer1)
|
| 148 |
+
load_layer("layer2", self.layer2)
|
| 149 |
+
load_layer("layer3", self.layer3)
|
| 150 |
+
load_layer("layer4", self.layer4)
|
| 151 |
+
|
| 152 |
+
def ResNet50():
|
| 153 |
+
return ResNet(Bottleneck, [3, 4, 6, 3])
|
mlx_vgg16.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
VGG16 in MLX with endpoints for relu1_2, relu2_2, relu3_3, relu4_2, relu4_3,
|
| 3 |
+
relu5_2, relu5_3. Loads weights from a torchvision-exported npz
|
| 4 |
+
(see export_vgg16_npz.py).
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import mlx.core as mx
|
| 8 |
+
import mlx.nn as nn
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _conv(in_ch, out_ch, kernel_size=3, padding=1):
|
| 13 |
+
return nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, bias=True)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class VGG16(nn.Module):
|
| 17 |
+
def __init__(self):
|
| 18 |
+
super().__init__()
|
| 19 |
+
self.layers = [
|
| 20 |
+
_conv(3, 64), # 0 conv1_1
|
| 21 |
+
nn.ReLU(),
|
| 22 |
+
_conv(64, 64), # 2 conv1_2
|
| 23 |
+
nn.ReLU(),
|
| 24 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 25 |
+
_conv(64, 128), # 5 conv2_1
|
| 26 |
+
nn.ReLU(),
|
| 27 |
+
_conv(128, 128), # 7 conv2_2
|
| 28 |
+
nn.ReLU(),
|
| 29 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 30 |
+
_conv(128, 256), # 10 conv3_1
|
| 31 |
+
nn.ReLU(),
|
| 32 |
+
_conv(256, 256), # 12 conv3_2
|
| 33 |
+
nn.ReLU(),
|
| 34 |
+
_conv(256, 256), # 14 conv3_3
|
| 35 |
+
nn.ReLU(),
|
| 36 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 37 |
+
_conv(256, 512), # 17 conv4_1
|
| 38 |
+
nn.ReLU(),
|
| 39 |
+
_conv(512, 512), # 19 conv4_2
|
| 40 |
+
nn.ReLU(),
|
| 41 |
+
_conv(512, 512), # 21 conv4_3
|
| 42 |
+
nn.ReLU(),
|
| 43 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 44 |
+
_conv(512, 512), # 24 conv5_1
|
| 45 |
+
nn.ReLU(),
|
| 46 |
+
_conv(512, 512), # 26 conv5_2
|
| 47 |
+
nn.ReLU(),
|
| 48 |
+
_conv(512, 512), # 28 conv5_3
|
| 49 |
+
nn.ReLU(),
|
| 50 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
# Layer indices in self.layers corresponding to named endpoints
|
| 54 |
+
self.endpoint_indices = {
|
| 55 |
+
"relu1_2": 3,
|
| 56 |
+
"relu2_2": 8,
|
| 57 |
+
"relu3_3": 15,
|
| 58 |
+
"relu4_1": 18,
|
| 59 |
+
"relu4_2": 20,
|
| 60 |
+
"relu4_3": 22,
|
| 61 |
+
"relu5_1": 25,
|
| 62 |
+
"relu5_2": 27,
|
| 63 |
+
"relu5_3": 29,
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
def forward_with_endpoints(self, x):
|
| 67 |
+
endpoints = {}
|
| 68 |
+
for idx, layer in enumerate(self.layers):
|
| 69 |
+
x = layer(x)
|
| 70 |
+
for name, i in self.endpoint_indices.items():
|
| 71 |
+
if idx == i:
|
| 72 |
+
endpoints[name] = x
|
| 73 |
+
return x, endpoints
|
| 74 |
+
|
| 75 |
+
def __call__(self, x):
|
| 76 |
+
_, endpoints = self.forward_with_endpoints(x)
|
| 77 |
+
return endpoints
|
| 78 |
+
|
| 79 |
+
def load_npz(self, path: str):
|
| 80 |
+
data = np.load(path)
|
| 81 |
+
|
| 82 |
+
def to_mlx_weight(w):
|
| 83 |
+
return np.transpose(w, (0, 2, 3, 1)) if w.ndim == 4 else w
|
| 84 |
+
|
| 85 |
+
conv_indices = [0, 2, 5, 7, 10, 12, 14, 17, 19, 21, 24, 26, 28]
|
| 86 |
+
for idx in conv_indices:
|
| 87 |
+
conv = self.layers[idx]
|
| 88 |
+
weight_key = f"features.{idx}.weight"
|
| 89 |
+
bias_key = f"features.{idx}.bias"
|
| 90 |
+
conv.weight = mx.array(to_mlx_weight(data[weight_key]))
|
| 91 |
+
conv.bias = mx.array(data[bias_key])
|
mlx_vgg19.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
VGG19 in MLX with endpoints for common DeepDream layers.
|
| 3 |
+
Loads weights from a torchvision-exported npz (see export_vgg19_npz.py).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import mlx.core as mx
|
| 7 |
+
import mlx.nn as nn
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _conv(in_ch, out_ch, kernel_size=3, padding=1):
|
| 12 |
+
return nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, bias=True)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class VGG19(nn.Module):
|
| 16 |
+
def __init__(self):
|
| 17 |
+
super().__init__()
|
| 18 |
+
# Mirrors torchvision.models.vgg19(features) layout
|
| 19 |
+
self.layers = [
|
| 20 |
+
_conv(3, 64), # 0 conv1_1
|
| 21 |
+
nn.ReLU(),
|
| 22 |
+
_conv(64, 64), # 2 conv1_2
|
| 23 |
+
nn.ReLU(),
|
| 24 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 25 |
+
|
| 26 |
+
_conv(64, 128), # 5 conv2_1
|
| 27 |
+
nn.ReLU(),
|
| 28 |
+
_conv(128, 128), # 7 conv2_2
|
| 29 |
+
nn.ReLU(),
|
| 30 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 31 |
+
|
| 32 |
+
_conv(128, 256), # 10 conv3_1
|
| 33 |
+
nn.ReLU(),
|
| 34 |
+
_conv(256, 256), # 12 conv3_2
|
| 35 |
+
nn.ReLU(),
|
| 36 |
+
_conv(256, 256), # 14 conv3_3
|
| 37 |
+
nn.ReLU(),
|
| 38 |
+
_conv(256, 256), # 16 conv3_4
|
| 39 |
+
nn.ReLU(),
|
| 40 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 41 |
+
|
| 42 |
+
_conv(256, 512), # 19 conv4_1
|
| 43 |
+
nn.ReLU(),
|
| 44 |
+
_conv(512, 512), # 21 conv4_2
|
| 45 |
+
nn.ReLU(),
|
| 46 |
+
_conv(512, 512), # 23 conv4_3
|
| 47 |
+
nn.ReLU(),
|
| 48 |
+
_conv(512, 512), # 25 conv4_4
|
| 49 |
+
nn.ReLU(),
|
| 50 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 51 |
+
|
| 52 |
+
_conv(512, 512), # 28 conv5_1
|
| 53 |
+
nn.ReLU(),
|
| 54 |
+
_conv(512, 512), # 30 conv5_2
|
| 55 |
+
nn.ReLU(),
|
| 56 |
+
_conv(512, 512), # 32 conv5_3
|
| 57 |
+
nn.ReLU(),
|
| 58 |
+
_conv(512, 512), # 34 conv5_4
|
| 59 |
+
nn.ReLU(),
|
| 60 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
+
self.endpoint_indices = {
|
| 64 |
+
"relu1_2": 3,
|
| 65 |
+
"relu2_2": 8,
|
| 66 |
+
"relu3_2": 13,
|
| 67 |
+
"relu3_3": 15,
|
| 68 |
+
"relu3_4": 17,
|
| 69 |
+
"relu4_1": 20,
|
| 70 |
+
"relu4_2": 22,
|
| 71 |
+
"relu4_3": 24,
|
| 72 |
+
"relu4_4": 26,
|
| 73 |
+
"relu5_1": 29,
|
| 74 |
+
"relu5_2": 31,
|
| 75 |
+
"relu5_3": 33,
|
| 76 |
+
"relu5_4": 35,
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
def forward_with_endpoints(self, x):
|
| 80 |
+
endpoints = {}
|
| 81 |
+
for idx, layer in enumerate(self.layers):
|
| 82 |
+
x = layer(x)
|
| 83 |
+
for name, i in self.endpoint_indices.items():
|
| 84 |
+
if idx == i:
|
| 85 |
+
endpoints[name] = x
|
| 86 |
+
return x, endpoints
|
| 87 |
+
|
| 88 |
+
def __call__(self, x):
|
| 89 |
+
_, endpoints = self.forward_with_endpoints(x)
|
| 90 |
+
return endpoints
|
| 91 |
+
|
| 92 |
+
def load_npz(self, path: str):
|
| 93 |
+
data = np.load(path)
|
| 94 |
+
|
| 95 |
+
def to_mlx_weight(w):
|
| 96 |
+
return np.transpose(w, (0, 2, 3, 1)) if w.ndim == 4 else w
|
| 97 |
+
|
| 98 |
+
conv_indices = [0, 2, 5, 7, 10, 12, 14, 16, 19, 21, 23, 25, 28, 30, 32, 34]
|
| 99 |
+
for idx in conv_indices:
|
| 100 |
+
conv = self.layers[idx]
|
| 101 |
+
weight_key = f"features.{idx}.weight"
|
| 102 |
+
bias_key = f"features.{idx}.bias"
|
| 103 |
+
conv.weight = mx.array(to_mlx_weight(data[weight_key]))
|
| 104 |
+
conv.bias = mx.array(data[bias_key])
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
mlx
|
| 2 |
+
numpy
|
| 3 |
+
Pillow
|
| 4 |
+
scipy
|
resnet50_mlx.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d25d75e904c01e308ef81b57ab48756056d7154b0360a700deb3c22ad9207188
|
| 3 |
+
size 102530262
|
tf_inception_v1.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""TF-Slim InceptionV1 forward callable for TF2 (no KerasTensor issues)."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from typing import Iterable, Tuple, Callable, List
|
| 5 |
+
|
| 6 |
+
import tensorflow as tf
|
| 7 |
+
import tf_slim as slim
|
| 8 |
+
from tf_slim.nets import inception_v1
|
| 9 |
+
|
| 10 |
+
WEIGHTS_URL = "http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz"
|
| 11 |
+
DEFAULT_LAYER_NAMES = (
|
| 12 |
+
"Mixed_4b",
|
| 13 |
+
"Mixed_4c",
|
| 14 |
+
"Mixed_4d",
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _download_checkpoint_if_needed(weights_path: str = None) -> str:
|
| 19 |
+
if weights_path:
|
| 20 |
+
if not os.path.exists(weights_path):
|
| 21 |
+
raise FileNotFoundError(f"Weights path does not exist: {weights_path}")
|
| 22 |
+
return weights_path
|
| 23 |
+
|
| 24 |
+
tar_path = tf.keras.utils.get_file(
|
| 25 |
+
origin=WEIGHTS_URL,
|
| 26 |
+
fname=os.path.basename(WEIGHTS_URL),
|
| 27 |
+
extract=True,
|
| 28 |
+
cache_dir=os.path.expanduser("~/.keras"),
|
| 29 |
+
)
|
| 30 |
+
ckpt_dir = os.path.join(os.path.dirname(tar_path), "inception_v1_2016_08_28")
|
| 31 |
+
ckpt_path = os.path.join(ckpt_dir, "inception_v1.ckpt")
|
| 32 |
+
if not os.path.exists(ckpt_path):
|
| 33 |
+
raise FileNotFoundError(f"Checkpoint not found after download: {ckpt_path}")
|
| 34 |
+
return ckpt_path
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _preprocess_fn(x: tf.Tensor) -> tf.Tensor:
|
| 38 |
+
"""Match TF-Slim InceptionV1 preprocessing: scale to [-1, 1]."""
|
| 39 |
+
x = tf.cast(x, tf.float32)
|
| 40 |
+
return (x / 127.5) - 1.0
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def build_inception_v1_callable(
|
| 44 |
+
layer_names: Iterable[str] = DEFAULT_LAYER_NAMES, weights_path: str = None
|
| 45 |
+
) -> Tuple[Callable[[tf.Tensor], List[tf.Tensor]], Callable[[tf.Tensor], tf.Tensor]]:
|
| 46 |
+
"""
|
| 47 |
+
Returns:
|
| 48 |
+
forward_fn: callable taking NHWC float tensor -> list of endpoints
|
| 49 |
+
preprocess_fn: preprocessing callable
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
layer_names = tuple(layer_names)
|
| 53 |
+
scope_name = "InceptionV1"
|
| 54 |
+
|
| 55 |
+
@tf.function
|
| 56 |
+
def forward_fn(x: tf.Tensor) -> List[tf.Tensor]:
|
| 57 |
+
with tf.compat.v1.variable_scope(scope_name, reuse=tf.compat.v1.AUTO_REUSE):
|
| 58 |
+
with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
|
| 59 |
+
_, endpoints = inception_v1.inception_v1(
|
| 60 |
+
x,
|
| 61 |
+
num_classes=1001,
|
| 62 |
+
is_training=False,
|
| 63 |
+
spatial_squeeze=False,
|
| 64 |
+
)
|
| 65 |
+
return [endpoints[name] for name in layer_names]
|
| 66 |
+
|
| 67 |
+
# Build variables by a dummy call
|
| 68 |
+
_ = forward_fn(tf.zeros([1, 224, 224, 3], dtype=tf.float32))
|
| 69 |
+
|
| 70 |
+
ckpt_path = _download_checkpoint_if_needed(weights_path)
|
| 71 |
+
var_list = tf.compat.v1.get_collection(
|
| 72 |
+
tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope=scope_name
|
| 73 |
+
)
|
| 74 |
+
name_map = {v.name.split(":")[0]: v for v in var_list}
|
| 75 |
+
ckpt = tf.train.Checkpoint(**name_map)
|
| 76 |
+
ckpt.restore(ckpt_path).expect_partial()
|
| 77 |
+
|
| 78 |
+
return forward_fn, _preprocess_fn
|
| 79 |
+
|
vgg16_mlx.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d1d8874dae6011833ea67e5e3613c8575ec61eac7af3ca4b49a22e8c85ad8bd
|
| 3 |
+
size 553438706
|
vgg19_mlx.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c56855e8cf3337ad42a45b15545c8dfb60aaed23d57295101e9f1e9cb1a3429
|
| 3 |
+
size 574679086
|