Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

README.md +3 -4
dream.py +111 -43
export_models.py +93 -0
googlenet_mlx_int8.npz +3 -0
mlx_googlenet.py +26 -9
mlx_resnet50.py +18 -7
mlx_vgg16.py +16 -4
mlx_vgg19.py +16 -4
quantize_experiment.py +182 -0
resnet50_mlx_int8.npz +3 -0
vgg16_mlx_int8.npz +3 -0
vgg19_mlx_int8.npz +3 -0

README.md CHANGED Viewed

@@ -162,7 +162,7 @@ python dream.py --input love.jpg \
 ## 💾 Weight Conversion & Efficiency
-We didn't just wrap existing libs. We wrote custom exporters (`export_*.py`) to rip weights from standard PyTorch/Torchvision archives and serialize them into optimized MLX `.npz` arrays.
 ### 50% Smaller Weights (FP16)
 We now support **Float16** (Half-Precision) weights by default. This cuts model size in half with zero visual loss for DeepDreaming.
@@ -186,9 +186,8 @@ You need to fine-tune the base model on a new dataset.
 **Current Workflow:**
 1.  Train your model in PyTorch (standard ImageNet training or custom dataset).
 2.  Save the `.pth` checkpoint.
-3.  Modify our `export_*.py` scripts to load your custom checkpoint.
-4.  Export to `.npz`.
-5.  Dream.
 *A dedicated `train_dream.py` script is on the roadmap.*

 ## 💾 Weight Conversion & Efficiency
+We didn't just wrap existing libs. We wrote a custom exporter (`export_models.py`) to rip weights from standard PyTorch/Torchvision archives and serialize them into optimized MLX `.npz` arrays.
 ### 50% Smaller Weights (FP16)
 We now support **Float16** (Half-Precision) weights by default. This cuts model size in half with zero visual loss for DeepDreaming.
 **Current Workflow:**
 1.  Train your model in PyTorch (standard ImageNet training or custom dataset).
 2.  Save the `.pth` checkpoint.
+3.  Use `export_models.py` to load your custom checkpoint and export to MLX.
+4.  Dream.
 *A dedicated `train_dream.py` script is on the roadmap.*

dream.py CHANGED Viewed

@@ -7,10 +7,10 @@ import mlx.core as mx
 import mlx.nn as nn
 import numpy as np
 import scipy.ndimage as nd
-from mlx_resnet50 import ResNet50
 from PIL import Image
 from mlx_googlenet import GoogLeNet
 from mlx_vgg16 import VGG16
 from mlx_vgg19 import VGG19
@@ -62,7 +62,7 @@ def gaussian_kernel(sigma, truncate=4.0, fixed_radius=None):
         radius = fixed_radius
     else:
         radius = int(truncate * sigma + 0.5)
     x = mx.arange(-radius, radius + 1)
     kernel = mx.exp(-0.5 * (x / sigma) ** 2)
     kernel = kernel / kernel.sum()
@@ -75,14 +75,14 @@ def gaussian_blur_2d(x, sigma, fixed_radius=None):
     kernel = kernel.astype(x.dtype)
     k_size = kernel.shape[0]
     C = x.shape[-1]
     k_x = kernel.reshape(1, 1, k_size, 1)
     k_x = mx.repeat(k_x, C, axis=0)
     k_y = kernel.reshape(1, k_size, 1, 1)
     k_y = mx.repeat(k_y, C, axis=0)
     pad = k_size // 2
     x = mx.conv2d(x, k_x, stride=1, padding=(0, pad), groups=C)
     x = mx.conv2d(x, k_y, stride=1, padding=(pad, 0), groups=C)
     return x
@@ -94,7 +94,7 @@ def smooth_gradients(grad, sigma, fixed_radius=None):
     smoothed = []
     for s in sigmas:
         smoothed.append(gaussian_blur_2d(grad, s, fixed_radius=fixed_radius))
     g_total = smoothed[0]
     for i in range(1, len(smoothed)):
         g_total = g_total + smoothed[i]
@@ -135,7 +135,7 @@ def deepdream(
         if guide_img_np is not None:
             guide_resized = resize_bilinear(preprocess(guide_img_np), nh, nw)
             _, guide_features = model.forward_with_endpoints(guide_resized)
         def loss_fn(x):
             endpoints = model.forward_with_endpoints(x)[1]
             loss = mx.zeros(())
@@ -165,35 +165,79 @@ def deepdream(
         for it in range(steps):
             ox, oy = np.random.randint(-jitter, jitter + 1, 2)
             rolled = mx.roll(mx.roll(img, ox, axis=1), oy, axis=2)
             sigma_val = ((it + 1) / steps) * 2.0 + smoothing
             rolled, loss = update_step(rolled, mx.array(sigma_val))
             img = mx.roll(mx.roll(rolled, -ox, axis=1), -oy, axis=2)
     return deprocess(img)
 def get_weights_path(model_name, explicit_path=None):
     if explicit_path:
         return explicit_path
-    # 1. Try bf16 (Efficient)
     bf16_path = f"{model_name}_mlx_bf16.npz"
     if os.path.exists(bf16_path):
         return bf16_path
-    # 2. Try standard float32
     fp32_path = f"{model_name}_mlx.npz"
     if os.path.exists(fp32_path):
         return fp32_path
-    return fp32_path # Default fallback for error message
 def run_dream_for_model(model_name, args, img_np):
     print(f"--- Running DeepDream with {model_name} ---")
     # ... (PRESETS dict remains here) ...
     # Notebook presets
     PRESETS = {
@@ -249,7 +293,7 @@ def run_dream_for_model(model_name, args, img_np):
             current_scale = p["scale"]
             current_jitter = p["jitter"]
             current_smoothing = p["smoothing"]
     elif model_name == "vgg19":
         model = VGG19()
         weights = get_weights_path("vgg19", args.weights)
@@ -263,13 +307,13 @@ def run_dream_for_model(model_name, args, img_np):
             current_scale = p["scale"]
             current_jitter = p["jitter"]
             current_smoothing = p["smoothing"]
     elif model_name == "resnet50":
         model = ResNet50()
         weights = get_weights_path("resnet50", args.weights)
         default_layers = ["layer4_2"]
-    else: # googlenet
         model = GoogLeNet()
         weights = get_weights_path("googlenet", args.weights)
         default_layers = ["inception3b", "inception4c", "inception4d"]
@@ -277,7 +321,7 @@ def run_dream_for_model(model_name, args, img_np):
     if not os.path.exists(weights):
         print(f"Error: Weights NPZ not found: {weights}. Skipping {model_name}.")
         return
     print(f"Loading weights from: {weights}")
     model.load_npz(weights)
@@ -301,10 +345,10 @@ def run_dream_for_model(model_name, args, img_np):
         smoothing=current_smoothing,
         guide_img_np=guide_img_np,
     )
     end_time = time.time()
     elapsed = end_time - start_time
     if args.output:
         out = args.output
     else:
@@ -323,10 +367,17 @@ def parse_args():
     p.add_argument("--input", required=True, help="Input image path")
     p.add_argument("--output", help="Output image path (optional)")
     p.add_argument("--guide", help="Guide image for guided dreaming")
-    p.add_argument("--width", type=int, default=None, help="Resize input to width (maintains aspect ratio)")
-    p.add_argument("--img_width", type=int, help="Alias for --width", dest="width") # Alias
     p.add_argument(
         "--model",
         choices=["vgg16", "vgg19", "googlenet", "resnet50", "all"],
@@ -334,25 +385,40 @@ def parse_args():
         help="Model to use. 'all' runs all models.",
     )
     p.add_argument("--preset", choices=["nb14", "nb20", "nb28"], help="VGG16 presets")
     p.add_argument("--layers", nargs="+", help="Layers to maximize")
-    p.add_argument("--steps", type=int, default=10, help="Gradient ascent steps per octave")
     p.add_argument("--lr", type=float, default=0.09, help="Learning rate (step size)")
     p.add_argument("--octaves", type=int, default=4, help="Number of image octaves")
-    p.add_argument("--pyramid_size", type=int, dest="octaves", help="Alias for --octaves") # Alias
     p.add_argument("--scale", type=float, default=1.8, help="Octave scale factor")
-    p.add_argument("--pyramid_ratio", type=float, dest="scale", help="Alias for --scale") # Alias
-    p.add_argument("--octave_scale", type=float, dest="scale", help="Alias for --scale") # Alias
     p.add_argument("--jitter", type=int, default=32, help="Jitter amount (pixels)")
-    p.add_argument("--smoothing", type=float, default=0.5, help="Gradient smoothing strength")
-    p.add_argument("--smoothing_coefficient", type=float, dest="smoothing", help="Alias for --smoothing") # Alias
     p.add_argument("--weights", help="Custom weights path")
     return p.parse_args()
@@ -360,11 +426,13 @@ def main():
     args = parse_args()
     img_np = load_image(args.input, args.width)
-    if args.model == 'all':
         models = ["vgg16", "vgg19", "googlenet", "resnet50"]
         if args.output:
-            print("Warning: --output argument ignored because --model='all' was selected.")
-            args.output = None
         for m in models:
             run_dream_for_model(m, args, img_np)
     else:
@@ -372,4 +440,4 @@ def main():
 if __name__ == "__main__":
-    main()

 import mlx.nn as nn
 import numpy as np
 import scipy.ndimage as nd
 from PIL import Image
 from mlx_googlenet import GoogLeNet
+from mlx_resnet50 import ResNet50
 from mlx_vgg16 import VGG16
 from mlx_vgg19 import VGG19
         radius = fixed_radius
     else:
         radius = int(truncate * sigma + 0.5)
     x = mx.arange(-radius, radius + 1)
     kernel = mx.exp(-0.5 * (x / sigma) ** 2)
     kernel = kernel / kernel.sum()
     kernel = kernel.astype(x.dtype)
     k_size = kernel.shape[0]
     C = x.shape[-1]
     k_x = kernel.reshape(1, 1, k_size, 1)
     k_x = mx.repeat(k_x, C, axis=0)
     k_y = kernel.reshape(1, k_size, 1, 1)
     k_y = mx.repeat(k_y, C, axis=0)
     pad = k_size // 2
     x = mx.conv2d(x, k_x, stride=1, padding=(0, pad), groups=C)
     x = mx.conv2d(x, k_y, stride=1, padding=(pad, 0), groups=C)
     return x
     smoothed = []
     for s in sigmas:
         smoothed.append(gaussian_blur_2d(grad, s, fixed_radius=fixed_radius))
     g_total = smoothed[0]
     for i in range(1, len(smoothed)):
         g_total = g_total + smoothed[i]
         if guide_img_np is not None:
             guide_resized = resize_bilinear(preprocess(guide_img_np), nh, nw)
             _, guide_features = model.forward_with_endpoints(guide_resized)
         def loss_fn(x):
             endpoints = model.forward_with_endpoints(x)[1]
             loss = mx.zeros(())
         for it in range(steps):
             ox, oy = np.random.randint(-jitter, jitter + 1, 2)
             rolled = mx.roll(mx.roll(img, ox, axis=1), oy, axis=2)
             sigma_val = ((it + 1) / steps) * 2.0 + smoothing
             rolled, loss = update_step(rolled, mx.array(sigma_val))
             img = mx.roll(mx.roll(rolled, -ox, axis=1), -oy, axis=2)
     return deprocess(img)
 def get_weights_path(model_name, explicit_path=None):
     if explicit_path:
         return explicit_path
+    # 1. Try int8 (Maximum Efficiency / Smallest)
+    int8_path = f"{model_name}_mlx_int8.npz"
+    if os.path.exists(int8_path):
+        return int8_path
+    # 2. Try bf16 (Standard Efficient)
     bf16_path = f"{model_name}_mlx_bf16.npz"
     if os.path.exists(bf16_path):
         return bf16_path
+    # 3. Try standard float32
     fp32_path = f"{model_name}_mlx.npz"
     if os.path.exists(fp32_path):
         return fp32_path
+    return int8_path # Return preferred default for error message context
 def run_dream_for_model(model_name, args, img_np):
     print(f"--- Running DeepDream with {model_name} ---")
     # ... (PRESETS dict remains here) ...
     # Notebook presets
     PRESETS = {
             current_scale = p["scale"]
             current_jitter = p["jitter"]
             current_smoothing = p["smoothing"]
     elif model_name == "vgg19":
         model = VGG19()
         weights = get_weights_path("vgg19", args.weights)
             current_scale = p["scale"]
             current_jitter = p["jitter"]
             current_smoothing = p["smoothing"]
     elif model_name == "resnet50":
         model = ResNet50()
         weights = get_weights_path("resnet50", args.weights)
         default_layers = ["layer4_2"]
+    else:  # googlenet
         model = GoogLeNet()
         weights = get_weights_path("googlenet", args.weights)
         default_layers = ["inception3b", "inception4c", "inception4d"]
     if not os.path.exists(weights):
         print(f"Error: Weights NPZ not found: {weights}. Skipping {model_name}.")
         return
     print(f"Loading weights from: {weights}")
     model.load_npz(weights)
         smoothing=current_smoothing,
         guide_img_np=guide_img_np,
     )
     end_time = time.time()
     elapsed = end_time - start_time
     if args.output:
         out = args.output
     else:
     p.add_argument("--input", required=True, help="Input image path")
     p.add_argument("--output", help="Output image path (optional)")
     p.add_argument("--guide", help="Guide image for guided dreaming")
+    p.add_argument(
+        "--width",
+        type=int,
+        default=None,
+        help="Resize input to width (maintains aspect ratio)",
+    )
+    p.add_argument(
+        "--img_width", type=int, help="Alias for --width", dest="width"
+    )  # Alias
     p.add_argument(
         "--model",
         choices=["vgg16", "vgg19", "googlenet", "resnet50", "all"],
         help="Model to use. 'all' runs all models.",
     )
     p.add_argument("--preset", choices=["nb14", "nb20", "nb28"], help="VGG16 presets")
     p.add_argument("--layers", nargs="+", help="Layers to maximize")
+    p.add_argument(
+        "--steps", type=int, default=10, help="Gradient ascent steps per octave"
+    )
     p.add_argument("--lr", type=float, default=0.09, help="Learning rate (step size)")
     p.add_argument("--octaves", type=int, default=4, help="Number of image octaves")
+    p.add_argument(
+        "--pyramid_size", type=int, dest="octaves", help="Alias for --octaves"
+    )  # Alias
     p.add_argument("--scale", type=float, default=1.8, help="Octave scale factor")
+    p.add_argument(
+        "--pyramid_ratio", type=float, dest="scale", help="Alias for --scale"
+    )  # Alias
+    p.add_argument(
+        "--octave_scale", type=float, dest="scale", help="Alias for --scale"
+    )  # Alias
     p.add_argument("--jitter", type=int, default=32, help="Jitter amount (pixels)")
+    p.add_argument(
+        "--smoothing", type=float, default=0.5, help="Gradient smoothing strength"
+    )
+    p.add_argument(
+        "--smoothing_coefficient",
+        type=float,
+        dest="smoothing",
+        help="Alias for --smoothing",
+    )  # Alias
     p.add_argument("--weights", help="Custom weights path")
     return p.parse_args()
     args = parse_args()
     img_np = load_image(args.input, args.width)
+    if args.model == "all":
         models = ["vgg16", "vgg19", "googlenet", "resnet50"]
         if args.output:
+            print(
+                "Warning: --output argument ignored because --model='all' was selected."
+            )
+            args.output = None
         for m in models:
             run_dream_for_model(m, args, img_np)
     else:
 if __name__ == "__main__":
+    main()

export_models.py ADDED Viewed

	@@ -0,0 +1,93 @@

+"""
+Unified export script for converting PyTorch models to MLX .npz format.
+Supports VGG16, VGG19, GoogLeNet, and ResNet50.
+Handles both float32 (default) and float16/bfloat16 (efficient) exports.
+Usage:
+    python export_models.py --model all --dtype float16
+    python export_models.py --model vgg16
+"""
+import argparse
+import os
+import numpy as np
+import torch
+import torchvision.models as models
+def get_model_info(model_name):
+    if model_name == "vgg16":
+        return models.vgg16, models.VGG16_Weights.IMAGENET1K_V1
+    elif model_name == "vgg19":
+        return models.vgg19, models.VGG19_Weights.IMAGENET1K_V1
+    elif model_name == "googlenet":
+        return models.googlenet, models.GoogLeNet_Weights.IMAGENET1K_V1
+    elif model_name == "resnet50":
+        return models.resnet50, models.ResNet50_Weights.IMAGENET1K_V1
+    else:
+        raise ValueError(f"Unknown model: {model_name}")
+def export_model(model_name, dtype="float32"):
+    print(f"Exporting {model_name} ({dtype})...")
+    model_fn, weights = get_model_info(model_name)
+    model = model_fn(weights=weights)
+    model.eval()
+    state = model.state_dict()
+    converted_state = {}
+    target_type = np.float32
+    suffix = ""
+    quantize_int8 = False
+    if dtype in ["float16", "bf16", "half"]:
+        target_type = np.float16
+        suffix = "_bf16" # Keep legacy suffix for compatibility with dream.py logic
+    elif dtype == "int8":
+        target_type = np.float16 # Base type for scales/biases
+        suffix = "_int8"
+        quantize_int8 = True
+    for k, v in state.items():
+        v_np = v.cpu().detach().numpy()
+        if quantize_int8 and "weight" in k and v_np.ndim >= 2:
+            # Quantize to INT8
+            v_abs = np.abs(v_np)
+            v_max = np.max(v_abs)
+            # Scale to range [-127, 127]
+            # Avoid div by zero
+            if v_max == 0:
+                scale = 1.0
+            else:
+                scale = v_max / 127.0
+            v_int8 = (v_np / scale).astype(np.int8)
+            converted_state[f"{k}_int8"] = v_int8
+            converted_state[f"{k}_scale"] = np.array(scale).astype(target_type)
+        else:
+            converted_state[k] = v_np.astype(target_type)
+    out_name = f"{model_name}_mlx{suffix}.npz"
+    np.savez(out_name, **converted_state)
+    original_size = sum(v.numel() * 4 for v in state.values()) / (1024*1024)
+    new_size = os.path.getsize(out_name) / (1024*1024)
+    print(f"✅ Saved {out_name}")
+    print(f"   Size: {new_size:.1f} MB (Original: ~{original_size:.1f} MB)")
+def main():
+    parser = argparse.ArgumentParser(description="Export PyTorch models to MLX")
+    parser.add_argument("--model", choices=["vgg16", "vgg19", "googlenet", "resnet50", "all"], default="all")
+    parser.add_argument("--dtype", choices=["float32", "float16", "bf16", "int8"], default="float16", help="Output data type")
+    args = parser.parse_args()
+    models_to_export = ["vgg16", "vgg19", "googlenet", "resnet50"] if args.model == "all" else [args.model]
+    for m in models_to_export:
+        export_model(m, args.dtype)
+if __name__ == "__main__":
+    main()

googlenet_mlx_int8.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0fb7a656a2a69cfbbd42804d38e475d09eade67681fb813b9e8f78f1930da22
+size 6791204

mlx_googlenet.py CHANGED Viewed

@@ -110,19 +110,36 @@ class GoogLeNet(nn.Module):
     def load_npz(self, path: str):
         data = np.load(path)
-        def to_mlx_weight(w):
-            # PyTorch Conv2d weights are (out_channels, in_channels, kH, kW)
-            # MLX expects channel-last filters: (out_channels, kH, kW, in_channels)
-            return np.transpose(w, (0, 2, 3, 1)) if w.ndim == 4 else w
         def load_conv_bn(prefix, seq_mod: nn.Sequential):
             conv = seq_mod.layers[0]
             bn = seq_mod.layers[1]
-            conv.weight = mx.array(to_mlx_weight(data[f"{prefix}.conv.weight"]))
-            bn.weight = mx.array(data[f"{prefix}.bn.weight"])
-            bn.bias = mx.array(data[f"{prefix}.bn.bias"])
-            bn.running_mean = mx.array(data[f"{prefix}.bn.running_mean"])
-            bn.running_var = mx.array(data[f"{prefix}.bn.running_var"])
         load_conv_bn("conv1", self.conv1)
         load_conv_bn("conv2", self.conv2)

     def load_npz(self, path: str):
         data = np.load(path)
+        def load_weight(key, target_module, param_name="weight", transpose=False):
+            # Check for standard float16/32 key
+            if key in data:
+                w = data[key]
+            # Check for int8 quantized key
+            elif f"{key}_int8" in data:
+                w_int8 = data[f"{key}_int8"]
+                scale = data[f"{key}_scale"]
+                # Dequantize
+                w = w_int8.astype(scale.dtype) * scale
+            else:
+                raise ValueError(f"Missing key {key} (or {key}_int8) in npz")
+            # Transpose for Conv2d weights if needed (PyTorch [O,I,H,W] -> MLX [O,H,W,I])
+            if transpose and w.ndim == 4:
+                w = np.transpose(w, (0, 2, 3, 1))
+            # Assign to module
+            target_module[param_name] = mx.array(w)
         def load_conv_bn(prefix, seq_mod: nn.Sequential):
             conv = seq_mod.layers[0]
             bn = seq_mod.layers[1]
+            load_weight(f"{prefix}.conv.weight", conv, transpose=True)
+            load_weight(f"{prefix}.bn.weight", bn)
+            load_weight(f"{prefix}.bn.bias", bn, param_name="bias")
+            load_weight(f"{prefix}.bn.running_mean", bn, param_name="running_mean")
+            load_weight(f"{prefix}.bn.running_var", bn, param_name="running_var")
         load_conv_bn("conv1", self.conv1)
         load_conv_bn("conv2", self.conv2)

mlx_resnet50.py CHANGED Viewed

@@ -114,17 +114,28 @@ class ResNet(nn.Module):
     def load_npz(self, path: str):
         data = np.load(path)
-        def to_mlx_weight(w):
-            return np.transpose(w, (0, 2, 3, 1)) if w.ndim == 4 else w
         def load_bn(prefix, bn):
-            bn.weight = mx.array(data[f"{prefix}.weight"])
-            bn.bias = mx.array(data[f"{prefix}.bias"])
-            bn.running_mean = mx.array(data[f"{prefix}.running_mean"])
-            bn.running_var = mx.array(data[f"{prefix}.running_var"])
         def load_conv(prefix, conv):
-            conv.weight = mx.array(to_mlx_weight(data[f"{prefix}.weight"]))
         # Initial layers
         load_conv("conv1", self.conv1)

     def load_npz(self, path: str):
         data = np.load(path)
+        def load_weight(key, transpose=False):
+            if key in data:
+                w = data[key]
+            elif f"{key}_int8" in data:
+                w_int8 = data[f"{key}_int8"]
+                scale = data[f"{key}_scale"]
+                w = w_int8.astype(scale.dtype) * scale
+            else:
+                raise ValueError(f"Missing key {key} in npz")
+            if transpose and w.ndim == 4:
+                w = np.transpose(w, (0, 2, 3, 1))
+            return mx.array(w)
         def load_bn(prefix, bn):
+            bn.weight = load_weight(f"{prefix}.weight")
+            bn.bias = load_weight(f"{prefix}.bias")
+            bn.running_mean = load_weight(f"{prefix}.running_mean")
+            bn.running_var = load_weight(f"{prefix}.running_var")
         def load_conv(prefix, conv):
+            conv.weight = load_weight(f"{prefix}.weight", transpose=True)
         # Initial layers
         load_conv("conv1", self.conv1)

mlx_vgg16.py CHANGED Viewed

@@ -79,13 +79,25 @@ class VGG16(nn.Module):
     def load_npz(self, path: str):
         data = np.load(path)
-        def to_mlx_weight(w):
-            return np.transpose(w, (0, 2, 3, 1)) if w.ndim == 4 else w
         conv_indices = [0, 2, 5, 7, 10, 12, 14, 17, 19, 21, 24, 26, 28]
         for idx in conv_indices:
             conv = self.layers[idx]
             weight_key = f"features.{idx}.weight"
             bias_key = f"features.{idx}.bias"
-            conv.weight = mx.array(to_mlx_weight(data[weight_key]))
-            conv.bias = mx.array(data[bias_key])

     def load_npz(self, path: str):
         data = np.load(path)
+        def load_weight(key, transpose=False):
+            if key in data:
+                w = data[key]
+            elif f"{key}_int8" in data:
+                w_int8 = data[f"{key}_int8"]
+                scale = data[f"{key}_scale"]
+                w = w_int8.astype(scale.dtype) * scale
+            else:
+                raise ValueError(f"Missing key {key} in npz")
+            if transpose and w.ndim == 4:
+                w = np.transpose(w, (0, 2, 3, 1))
+            return mx.array(w)
         conv_indices = [0, 2, 5, 7, 10, 12, 14, 17, 19, 21, 24, 26, 28]
         for idx in conv_indices:
             conv = self.layers[idx]
             weight_key = f"features.{idx}.weight"
             bias_key = f"features.{idx}.bias"
+            conv.weight = load_weight(weight_key, transpose=True)
+            conv.bias = load_weight(bias_key)

mlx_vgg19.py CHANGED Viewed

@@ -92,13 +92,25 @@ class VGG19(nn.Module):
     def load_npz(self, path: str):
         data = np.load(path)
-        def to_mlx_weight(w):
-            return np.transpose(w, (0, 2, 3, 1)) if w.ndim == 4 else w
         conv_indices = [0, 2, 5, 7, 10, 12, 14, 16, 19, 21, 23, 25, 28, 30, 32, 34]
         for idx in conv_indices:
             conv = self.layers[idx]
             weight_key = f"features.{idx}.weight"
             bias_key = f"features.{idx}.bias"
-            conv.weight = mx.array(to_mlx_weight(data[weight_key]))
-            conv.bias = mx.array(data[bias_key])

     def load_npz(self, path: str):
         data = np.load(path)
+        def load_weight(key, transpose=False):
+            if key in data:
+                w = data[key]
+            elif f"{key}_int8" in data:
+                w_int8 = data[f"{key}_int8"]
+                scale = data[f"{key}_scale"]
+                w = w_int8.astype(scale.dtype) * scale
+            else:
+                raise ValueError(f"Missing key {key} in npz")
+            if transpose and w.ndim == 4:
+                w = np.transpose(w, (0, 2, 3, 1))
+            return mx.array(w)
         conv_indices = [0, 2, 5, 7, 10, 12, 14, 16, 19, 21, 23, 25, 28, 30, 32, 34]
         for idx in conv_indices:
             conv = self.layers[idx]
             weight_key = f"features.{idx}.weight"
             bias_key = f"features.{idx}.bias"
+            conv.weight = load_weight(weight_key, transpose=True)
+            conv.bias = load_weight(bias_key)

quantize_experiment.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import mlx.core as mx
+import mlx.nn as nn
+import numpy as np
+from mlx_googlenet import GoogLeNet
+import os
+def main():
+    print("--- Attempting Extreme Quantization (4-bit / 8-bit) ---")
+    # Load standard model
+    model = GoogLeNet()
+    model.load_npz("googlenet_mlx_bf16.npz")
+    print("Original Weights Loaded.")
+    print("\nStrategy: Quantize weights to INT8 (Storage Optimization)")
+    # We will effectively store weights as (int8_weight, float16_scale)
+    # On load, we will do: weight = int8_weight.astype(fp16) * scale
+    state = model.parameters()
+    compressed_state = {}
+    total_original = 0
+    total_compressed = 0
+    for k, v in state.items():
+        # Flatten keys for parameters() which returns nested dicts if using trees,
+        # but model.parameters() returns nested dict of arrays?
+        # No, mlx model.parameters() returns a dict of {name: array} if flattened?
+        # Actually model.parameters() returns a generator or dict?
+        # model.parameters() returns a dict of arrays recursively?
+        # Let's use flatten logic manually or just iterate what we have.
+        pass
+    # Actually model.state_dict() is better for flat keys
+    # Wait, MLX doesn't have state_dict() like PyTorch exactly?
+    # mlx.nn.utils.tree_flatten(model.parameters()) gives list.
+    # Let's assume we work on the flattened dict structure we used for saving npz
+    # Our export script did: np.savez(out, **{k: v})
+    # Our load_npz in models does: data[key]
+    # So we should load the .npz FILE directly and process it,
+    # rather than traversing the model object which might be complex.
+    data = np.load("googlenet_mlx_bf16.npz")
+    for k in data.files:
+        v = mx.array(data[k])
+        # Check if it's a weight (conv or linear)
+        # Heuristic: name ends in ".weight" and ndim >= 2
+        if "weight" in k and v.ndim >= 2:
+            # Quantize to INT8
+            v_abs = mx.abs(v)
+            v_max = mx.max(v_abs)
+            # Scale to range [-127, 127]
+            # Avoid div by zero
+            scale = v_max / 127.0
+            scale = mx.where(scale == 0, 1.0, scale)
+            v_int8 = (v / scale).astype(mx.int8)
+            # Save components
+            compressed_state[f"{k}_int8"] = np.array(v_int8)
+            compressed_state[f"{k}_scale"] = np.array(scale.astype(mx.float16))
+            original_bytes = v.nbytes
+            new_bytes = v_int8.nbytes + 2 # scale size
+            total_original += original_bytes
+            total_compressed += new_bytes
+        else:
+            # Save as is (float16)
+            compressed_state[k] = np.array(v.astype(mx.float16))
+            total_original += v.nbytes
+            total_compressed += v.nbytes
+    out_name = "googlenet_mlx_int8.npz"
+    np.savez(out_name, **compressed_state)
+    print(f"\n✅ Saved {out_name}")
+    print(f"   Original Size: {total_original / (1024*1024):.2f} MB")
+    print(f"   Quantized Size: {total_compressed / (1024*1024):.2f} MB")
+    print(f"   Reduction: {100 * (1 - total_compressed/total_original):.1f}%")
+if __name__ == "__main__":
+    main()

resnet50_mlx_int8.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1ab804f8257e78f03244ea033cdd55ed6b285317cf444c04234b3ce1d0e3961
+size 25822834

vgg16_mlx_int8.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17f8012268ac3cb74fd3c8ce5d243970b13141492b2e0e84fab1924a786ec25f
+size 138384160

vgg19_mlx_int8.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13309dd1b75cf316b0025db0c5791d6a89c654145a5f3a486f488b4bcd822b93
+size 143697608