Spaces:
Running
Running
| """ | |
| One-time ONNX conversion + dynamic INT8 quantization. | |
| Run locally: | |
| python scripts/convert_to_onnx.py | |
| Produces: | |
| onnx_models/siglip_vision_int8.onnx | |
| onnx_models/dinov2_int8.onnx | |
| Fix: attn_implementation="eager" disables scaled_dot_product_attention, | |
| which the legacy PyTorch ONNX exporter cannot trace (TypeError on Sqrt/scale). | |
| """ | |
| import os | |
| import torch | |
| import torch.nn as nn | |
| from pathlib import Path | |
| from onnxruntime.quantization import quantize_dynamic, QuantType | |
| OUT_DIR = Path("onnx_models") | |
| OUT_DIR.mkdir(exist_ok=True) | |
| def export_siglip(): | |
| print("Exporting SigLIP vision encoder...") | |
| from transformers import SiglipVisionModel | |
| model = SiglipVisionModel.from_pretrained( | |
| "google/siglip-base-patch16-224", | |
| attn_implementation="eager", # disables SDPA — required for ONNX export | |
| ).eval() | |
| class SigLIPWrapper(nn.Module): | |
| def __init__(self, m): | |
| super().__init__() | |
| self.m = m | |
| def forward(self, pixel_values): | |
| return self.m(pixel_values=pixel_values).pooler_output | |
| wrapper = SigLIPWrapper(model).eval() | |
| dummy = torch.randn(1, 3, 224, 224) | |
| with torch.no_grad(): | |
| test = wrapper(dummy) | |
| print(f" Forward pass OK — output shape: {test.shape}") | |
| fp32_path = OUT_DIR / "siglip_vision.onnx" | |
| with torch.no_grad(): | |
| torch.onnx.export( | |
| wrapper, dummy, fp32_path, | |
| input_names=["pixel_values"], | |
| output_names=["image_embeds"], | |
| dynamic_axes={"pixel_values": {0: "batch"}, "image_embeds": {0: "batch"}}, | |
| opset_version=14, | |
| do_constant_folding=True, | |
| ) | |
| print(f" fp32 saved ({fp32_path.stat().st_size // 1024 // 1024} MB)") | |
| int8_path = OUT_DIR / "siglip_vision_int8.onnx" | |
| quantize_dynamic(str(fp32_path), str(int8_path), weight_type=QuantType.QInt8) | |
| print(f" INT8 saved ({int8_path.stat().st_size // 1024 // 1024} MB)") | |
| os.remove(fp32_path) | |
| def export_dinov2(): | |
| print("\nExporting DINOv2...") | |
| from transformers import AutoModel | |
| model = AutoModel.from_pretrained( | |
| "facebook/dinov2-base", | |
| attn_implementation="eager", # same fix | |
| ).eval() | |
| class DINOv2Wrapper(nn.Module): | |
| def __init__(self, m): | |
| super().__init__() | |
| self.m = m | |
| def forward(self, pixel_values): | |
| return self.m(pixel_values=pixel_values).last_hidden_state[:, 0, :] | |
| wrapper = DINOv2Wrapper(model).eval() | |
| dummy = torch.randn(1, 3, 224, 224) | |
| with torch.no_grad(): | |
| test = wrapper(dummy) | |
| print(f" Forward pass OK — output shape: {test.shape}") | |
| fp32_path = OUT_DIR / "dinov2.onnx" | |
| with torch.no_grad(): | |
| torch.onnx.export( | |
| wrapper, dummy, fp32_path, | |
| input_names=["pixel_values"], | |
| output_names=["cls_features"], | |
| dynamic_axes={"pixel_values": {0: "batch"}, "cls_features": {0: "batch"}}, | |
| opset_version=14, | |
| do_constant_folding=True, | |
| ) | |
| print(f" fp32 saved ({fp32_path.stat().st_size // 1024 // 1024} MB)") | |
| int8_path = OUT_DIR / "dinov2_int8.onnx" | |
| quantize_dynamic(str(fp32_path), str(int8_path), weight_type=QuantType.QInt8) | |
| print(f" INT8 saved ({int8_path.stat().st_size // 1024 // 1024} MB)") | |
| os.remove(fp32_path) | |
| if __name__ == "__main__": | |
| print(f"PyTorch {torch.__version__}") | |
| export_siglip() | |
| export_dinov2() | |
| print("\nDone. Commit onnx_models/*.onnx to your Space repo.") | |
| for f in sorted(OUT_DIR.glob("*.onnx")): | |
| print(f" {f.name} ({f.stat().st_size // 1024 // 1024} MB)") |