StephanST commited on
Commit
b96bc6b
·
verified ·
1 Parent(s): 4407ed8

Upload folder using huggingface_hub

Browse files
so400m/cider-w8a8-g128/README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: nvidia-open-model-license
4
+ base_model: nvidia/C-RADIOv4-SO400M
5
+ library_name: mlx
6
+ pipeline_tag: image-feature-extraction
7
+ tags:
8
+ - mlx
9
+ - c-radio
10
+ - vision
11
+ - embeddings
12
+ - quantized
13
+ - cider
14
+ - apple-silicon
15
+ ---
16
+
17
+ # C-RADIOv4-SO400M MLX Cider W8A8 g128
18
+
19
+ Balanced Apple M5+ W8A8 runtime bundle for `nvidia/C-RADIOv4-SO400M`.
20
+
21
+ Implementation repository:
22
+
23
+ https://github.com/stephansturges/c-radio_v4_MLX
24
+
25
+ ## Source
26
+
27
+ - Upstream model: https://huggingface.co/nvidia/C-RADIOv4-SO400M
28
+ - Upstream revision: `c0457f5dc26ca145f954cd4fc5bb6114e5705ad8`
29
+ - Local bundle path: `bundles/c-radiov4-so400m-cider-w8a8-g128`
30
+
31
+ ## Format
32
+
33
+ - Runtime: MLX plus Cider
34
+ - Quantization: W8A8, int8 activations, int8 weights, group size 128
35
+ - Required hardware: Apple M5 or newer
36
+ - Required package: https://github.com/Mininglamp-AI/cider
37
+ - Bundle size observed locally: 480 MB
38
+
39
+ ## Measured Accuracy
40
+
41
+ Against local bf16 MLX at `512x512`:
42
+
43
+ | Data | Summary cosine | Spatial cosine |
44
+ | --- | ---: | ---: |
45
+ | Smoke image | 0.998630 | 0.998837 |
46
+ | 12 WALDO crops mean/min | 0.998808 / 0.998460 | 0.999269 / 0.998657 |
47
+
48
+ ## Measured Speed
49
+
50
+ Apple M5 Max, `mlx==0.31.2`, Cider `0.7.0`, compiled forward, no output materialization:
51
+
52
+ | Resolution | Batch | p50 latency | Throughput |
53
+ | ---: | ---: | ---: | ---: |
54
+ | 256x256 | 1 | 10.2 ms | 98.4 images/s |
55
+ | 256x256 | 4 | 26.5 ms | 150.8 images/s |
56
+ | 512x512 | 1 | 31.3 ms | 32.0 images/s |
57
+ | 512x512 | 4 | 112.8 ms | 35.5 images/s |
58
+
59
+ ## Usage
60
+
61
+ ```sh
62
+ cradio-mlx embed \
63
+ --backend mlx-so400m \
64
+ --checkpoint /path/to/this/bundle \
65
+ --image image.jpg \
66
+ --image-size 512 \
67
+ --dtype bfloat16 \
68
+ --save-npz embedding.npz
69
+ ```
70
+
71
+ ## License
72
+
73
+ The implementation code in `c-radio_v4_MLX` is MIT licensed. The model weights and this
74
+ converted bundle are governed by NVIDIA's Open Model License Agreement.
so400m/cider-w8a8-g128/config.json ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adaptor_configs": {},
3
+ "adaptor_names": null,
4
+ "architectures": [
5
+ "RADIOModel"
6
+ ],
7
+ "args": {
8
+ "aa": null,
9
+ "amp": true,
10
+ "amp_dtype": "bfloat16",
11
+ "amp_impl": "native",
12
+ "aug_repeats": 0,
13
+ "aug_splits": 0,
14
+ "auto_workload_inspector": false,
15
+ "bn_eps": null,
16
+ "bn_momentum": null,
17
+ "cache_dir": null,
18
+ "channels_last": false,
19
+ "checkpoint_folder": null,
20
+ "checkpoint_hist": 10,
21
+ "chk_keep_forever": 50,
22
+ "class_map": "",
23
+ "clip_grad": 4000.0,
24
+ "clip_mode": "norm",
25
+ "cls_token_per_teacher": true,
26
+ "coco_annotations_file": "/datasets/coco2017-adlsa/annotations/captions_val2017.json",
27
+ "coco_image_dir": "/datasets/coco2017-adlsa/val2017",
28
+ "color_jitter": 0.4,
29
+ "cooldown_epochs": 0,
30
+ "cpe_max_size": 2048,
31
+ "cpe_num_registers": null,
32
+ "crd_loss": false,
33
+ "crd_loss_weight": 0.8,
34
+ "crop_pct": null,
35
+ "cutmix": 0.0,
36
+ "cutmix_minmax": null,
37
+ "dataset_download": false,
38
+ "debug_full_knn": false,
39
+ "decay_epochs": 90,
40
+ "decay_milestones": [
41
+ 90,
42
+ 180,
43
+ 270
44
+ ],
45
+ "decay_rate": 0.1,
46
+ "depchain": true,
47
+ "detect_anomaly": false,
48
+ "dist_bn": "reduce",
49
+ "dist_norm_weight": 0.0,
50
+ "distributed": true,
51
+ "drop": 0.0,
52
+ "drop_block": null,
53
+ "drop_connect": null,
54
+ "drop_path": null,
55
+ "dtype": "float32",
56
+ "epoch": 293,
57
+ "epoch_repeats": 0.0,
58
+ "eval": false,
59
+ "eval_metric": "knn_top1",
60
+ "eval_teacher": false,
61
+ "eval_teacher_only": false,
62
+ "eval_throughput": false,
63
+ "fast_norm": false,
64
+ "fd_loss_fn": "MSE",
65
+ "feature_normalization": "PHI_STANDARDIZE",
66
+ "feature_summarizer": "cls_token",
67
+ "feature_upscale_factor": null,
68
+ "force_disable_damp": false,
69
+ "force_disable_spectral_reparam": false,
70
+ "force_new_wandb_id": false,
71
+ "force_spectral_reparam": false,
72
+ "freeze_bn": false,
73
+ "fsdp": true,
74
+ "full_equivariance": false,
75
+ "fuser": "",
76
+ "gp": null,
77
+ "grad_accum_steps": 1,
78
+ "grad_checkpointing": false,
79
+ "head_init_bias": null,
80
+ "head_init_scale": null,
81
+ "head_lr": null,
82
+ "head_warmup": 3,
83
+ "head_weight_decay": 0.03,
84
+ "hflip": 0.5,
85
+ "img_size": null,
86
+ "in_chans": null,
87
+ "initial_checkpoint": null,
88
+ "input_size": null,
89
+ "interpolation": "",
90
+ "layer_decay": null,
91
+ "local_rank": 0,
92
+ "log_interval": 50,
93
+ "log_mlflow": false,
94
+ "log_teacher_timings": true,
95
+ "log_train_metrics_per_epoch": true,
96
+ "log_train_metrics_per_log_interval": true,
97
+ "log_wandb": true,
98
+ "loss_auto_balance": false,
99
+ "lr_base": 0.1,
100
+ "lr_base_scale": "",
101
+ "lr_base_size": 256,
102
+ "lr_cycle_decay": 0.5,
103
+ "lr_cycle_limit": 1,
104
+ "lr_cycle_mul": 1.0,
105
+ "lr_k_decay": 1.0,
106
+ "lr_noise": null,
107
+ "lr_noise_pct": 0.67,
108
+ "lr_noise_std": 1.0,
109
+ "mean": null,
110
+ "mesa": {
111
+ "gaussian_kl": false,
112
+ "lambda": 0.1,
113
+ "ohem": true,
114
+ "shift_equivariance": {
115
+ "a": 0.5,
116
+ "b": 1.0
117
+ },
118
+ "start_epoch": 150
119
+ },
120
+ "min_lr": 0.0001,
121
+ "mixup": 0.0,
122
+ "mixup_mode": "batch",
123
+ "mixup_off_epoch": 0,
124
+ "mixup_prob": 1.0,
125
+ "mixup_switch_prob": 0.5,
126
+ "mlp_hidden_size": 1520,
127
+ "mlp_num_inner": 2,
128
+ "mlp_version": "v2",
129
+ "model": "vit_so400m_patch16_224",
130
+ "model_kwargs": {},
131
+ "model_norm": false,
132
+ "momentum": 0.9,
133
+ "no_custom_validation": false,
134
+ "no_ddp_bb": true,
135
+ "no_knn": false,
136
+ "no_prefetcher": false,
137
+ "no_resume_opt": false,
138
+ "no_save_checkpoint": false,
139
+ "no_val": false,
140
+ "num_classes": null,
141
+ "on_demand_workload_inspector": false,
142
+ "one_logger_app_tag": "",
143
+ "one_logger_is_baseline": false,
144
+ "one_logger_run_name": "",
145
+ "onelogger": null,
146
+ "opt_betas": null,
147
+ "opt_eps": null,
148
+ "overfit": false,
149
+ "patience_epochs": 10,
150
+ "perf_test_no_aug": false,
151
+ "perf_test_no_decode": false,
152
+ "perf_test_no_io": false,
153
+ "perf_test_only_dataloader": false,
154
+ "perf_test_simple_aug": false,
155
+ "pin_mem": false,
156
+ "prefetcher": true,
157
+ "pretrained": false,
158
+ "processed_neck_outputs": null,
159
+ "profile_train_exit_after_profiling": false,
160
+ "profile_train_export_chrome_trace": true,
161
+ "profile_train_export_csv": false,
162
+ "profile_train_iterations": 0,
163
+ "qradio": false,
164
+ "qradio_max_tokens": 512,
165
+ "qradio_min_tokens": 32,
166
+ "qradio_patch_token_mask_initial_ratio": 0.95,
167
+ "qradio_progressive_2d": false,
168
+ "qradio_quantizer": null,
169
+ "qradio_ramp_alpha": 1.5,
170
+ "rank": 0,
171
+ "ratio": [
172
+ 0.75,
173
+ 1.3333333333333333
174
+ ],
175
+ "recount": 1,
176
+ "recovery_interval": 0,
177
+ "register_multiple": 10,
178
+ "remode": "pixel",
179
+ "reprob": 0.0,
180
+ "reset_loss_state": true,
181
+ "resplit": false,
182
+ "sample_tracking": false,
183
+ "save_images": false,
184
+ "scale": [
185
+ 0.5,
186
+ 1.0
187
+ ],
188
+ "sched": "cosine",
189
+ "seed": 42,
190
+ "shift_equivariance": false,
191
+ "smoothing": 0.1,
192
+ "source_tracking": false,
193
+ "spectral_heads": false,
194
+ "spectral_reparam": false,
195
+ "spectral_weight_decay": null,
196
+ "split_bn": false,
197
+ "start_epoch": null,
198
+ "std": null,
199
+ "stream_teachers": false,
200
+ "student_intermediate_indices": null,
201
+ "student_load_skip_state_dict_keys_regex": null,
202
+ "student_reinit_model_layers_regex": null,
203
+ "student_strict_load_ignore_mismatched_shape_keys_regex": null,
204
+ "student_strict_load_ignore_missing_keys_regex": null,
205
+ "student_strict_load_ignore_unexpected_keys_regex": null,
206
+ "student_strict_load_state_dict": false,
207
+ "sync_bn": false,
208
+ "sync_resolutions_across_ranks": true,
209
+ "synchronize_step": false,
210
+ "teachers": [
211
+ {
212
+ "model": "siglip2-g-384",
213
+ "name": "siglip2-g",
214
+ "spatial_mlp_version": "attn",
215
+ "type": "siglip2",
216
+ "use_summary": true
217
+ },
218
+ {
219
+ "model": "dinov3_vit7b16",
220
+ "name": "dino_v3_7b",
221
+ "type": "dino_v3",
222
+ "use_summary": true
223
+ },
224
+ {
225
+ "model": "default",
226
+ "name": "sam3",
227
+ "type": "sam3",
228
+ "use_summary": false
229
+ }
230
+ ],
231
+ "timing_warmup_iters": 20,
232
+ "tokenizer_kwargs": {},
233
+ "tokenizer_type": null,
234
+ "tome": null,
235
+ "torchcompile": null,
236
+ "torchscript": false,
237
+ "train_interpolation": "random",
238
+ "train_split": "train",
239
+ "tta": 0,
240
+ "untie_neck_weights": false,
241
+ "use_coco": false,
242
+ "use_multi_epochs_loader": false,
243
+ "val_ema_only": false,
244
+ "val_split": "val",
245
+ "vflip": 0.0,
246
+ "vitdet_version": 1,
247
+ "wandb_entity": "",
248
+ "wandb_id": "",
249
+ "wandb_job_type": "",
250
+ "wandb_name": "",
251
+ "wandb_project": "",
252
+ "wandb_tags": null,
253
+ "warmup_lr": 1e-05,
254
+ "warmup_prefix": false,
255
+ "worker_seeding": "all",
256
+ "workers": 8,
257
+ "workload_inspector_analyze_nsys_traces": false,
258
+ "workload_inspector_baseline_start_iter": 1500,
259
+ "workload_inspector_major_slowdown_p95_factor": 10.0,
260
+ "workload_inspector_minor_slowdown_p95_factor": 3.0,
261
+ "workload_inspector_no_slowdown_check": false,
262
+ "workload_inspector_simulate_slowdown_num_times": 1,
263
+ "workload_inspector_simulate_slowdown_start_iter": null,
264
+ "world_size": 256
265
+ },
266
+ "auto_map": {
267
+ "AutoConfig": "hf_model.RADIOConfig",
268
+ "AutoModel": "hf_model.RADIOModel"
269
+ },
270
+ "feature_normalizer_config": null,
271
+ "inter_feature_normalizer_config": null,
272
+ "max_resolution": 2048,
273
+ "patch_size": 16,
274
+ "preferred_resolution": [
275
+ 512,
276
+ 512
277
+ ],
278
+ "torch_dtype": "float32",
279
+ "transformers_version": "4.51.3",
280
+ "version": "c-radio_v4-so400m",
281
+ "vitdet_window_size": null
282
+ }
so400m/cider-w8a8-g128/manifest.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-22T11:44:07Z",
3
+ "dtype": "bfloat16",
4
+ "extra": {
5
+ "conversion_state": "quantized_self_contained",
6
+ "quantization_stats": {
7
+ "copied_tensors": 221,
8
+ "padded_features": 1296,
9
+ "padded_tensors": 27,
10
+ "quantized_tensors": 109
11
+ },
12
+ "source_bundle": "bundles/c-radiov4-so400m-bf16",
13
+ "weights_file": "model.safetensors"
14
+ },
15
+ "license": "nvidia-open-model-license",
16
+ "manifest_version": 1,
17
+ "max_resolution": 2048,
18
+ "model_id": "nvidia/C-RADIOv4-SO400M",
19
+ "patch_size": 16,
20
+ "preferred_resolution": 512,
21
+ "quantization": {
22
+ "activation_bits": 8,
23
+ "bits": 8,
24
+ "group_size": 128,
25
+ "mode": "cider-w8a8",
26
+ "runtime": "cider",
27
+ "scheme": "symmetric_per_group",
28
+ "state": "packed_w8a8_runtime",
29
+ "weight_bits": 8
30
+ },
31
+ "revision": "c0457f5dc26ca145f954cd4fc5bb6114e5705ad8",
32
+ "source_files": {
33
+ "README.md": "af7881b8207e7060b8df5b8df4df4e87e628faffc188b494836bbec086233b33",
34
+ "config.json": "6e49e911ff6e980125c208b10ed4b51bb4a92061862cbb8947473f1b33f88fd4",
35
+ "model.safetensors": "aa289c27ae07dca0a21850217afc04430baf2a983c4be5d27b2dcd24347d4dab",
36
+ "preprocessor_config.json": "6ca88d165592015b02a183d26a20fc96748e837d1cb21bde5f611141aca593cf"
37
+ },
38
+ "variant": "so400m"
39
+ }
so400m/cider-w8a8-g128/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa289c27ae07dca0a21850217afc04430baf2a983c4be5d27b2dcd24347d4dab
3
+ size 503601048
so400m/cider-w8a8-g128/preprocessor_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 512,
4
+ "width": 512
5
+ },
6
+ "do_center_crop": false,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": false,
9
+ "do_rescale": true,
10
+ "do_resize": false,
11
+ "image_processor_type": "CLIPImageProcessor",
12
+ "processor_class": "CLIPProcessor",
13
+ "resample": 3,
14
+ "size": {
15
+ "shortest_edge": 512
16
+ }
17
+ }