""" One-time script to convert IndoBERT sentiment model to ONNX format (quantized). Run this once: python convert_model.py After conversion, the 'model/onnx/' folder will contain the quantized ONNX model that can be loaded directly in the browser via ONNX Runtime Web. """ import os import json import torch import shutil from pathlib import Path from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig MODEL_ID = "mdhugol/indonesia-bert-sentiment-classification" OUTPUT_DIR = Path("./model") ONNX_DIR = OUTPUT_DIR / "onnx" # Clean up previous conversion if ONNX_DIR.exists(): shutil.rmtree(ONNX_DIR) ONNX_DIR.mkdir(parents=True, exist_ok=True) print(f"[1/5] Loading model: {MODEL_ID}") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID) config = AutoConfig.from_pretrained(MODEL_ID) model.eval() print("[2/5] Creating dummy input for ONNX export...") dummy_text = "Ini adalah contoh kalimat untuk testing" inputs = tokenizer(dummy_text, return_tensors="pt", padding="max_length", max_length=128, truncation=True) print("[3/5] Exporting to ONNX (with embedded weights)...") raw_onnx_path = str(ONNX_DIR / "model_raw.onnx") # Use opset 14, disable external data to embed weights in the ONNX file with torch.no_grad(): torch.onnx.export( model, (inputs["input_ids"], inputs["attention_mask"], inputs["token_type_ids"]), raw_onnx_path, input_names=["input_ids", "attention_mask", "token_type_ids"], output_names=["logits"], dynamic_axes={ "input_ids": {0: "batch_size", 1: "sequence"}, "attention_mask": {0: "batch_size", 1: "sequence"}, "token_type_ids": {0: "batch_size", 1: "sequence"}, "logits": {0: "batch_size"}, }, opset_version=14, do_constant_folding=True, ) raw_size = os.path.getsize(raw_onnx_path) print(f" Raw ONNX size: {raw_size/1024/1024:.1f} MB") print("[4/5] Quantizing to int8 (dynamic quantization)...") from onnxruntime.quantization import quantize_dynamic, QuantType quant_onnx_path = str(ONNX_DIR / "model_quantized.onnx") quantize_dynamic( raw_onnx_path, quant_onnx_path, weight_type=QuantType.QUInt8, ) quant_size = os.path.getsize(quant_onnx_path) print(f" Quantized ONNX size: {quant_size/1024/1024:.1f} MB") print(f" Compression ratio: {raw_size/quant_size:.1f}x") # Remove the raw model, keep quantized os.remove(raw_onnx_path) # Rename quantized to model.onnx final_path = str(ONNX_DIR / "model.onnx") os.rename(quant_onnx_path, final_path) # Remove external data files if they exist for f in ONNX_DIR.glob("*.data"): os.remove(f) print("[5/5] Saving tokenizer and config files...") # Save tokenizer files tokenizer.save_pretrained(str(OUTPUT_DIR)) # Update config with label mapping config_data = config.to_dict() config_data["id2label"] = {"0": "Positif", "1": "Netral", "2": "Negatif"} config_data["label2id"] = {"Positif": 0, "Netral": 1, "Negatif": 2} with open(str(OUTPUT_DIR / "config.json"), "w") as f: json.dump(config_data, f, indent=2) print(f"\nDone! Model saved to '{OUTPUT_DIR}/'") print("\nFiles created:") for root, dirs, files in os.walk(str(OUTPUT_DIR)): for f in sorted(files): path = os.path.join(root, f) size = os.path.getsize(path) print(f" {os.path.relpath(path, str(OUTPUT_DIR))} ({size/1024/1024:.1f} MB)" if size > 1024*1024 else f" {os.path.relpath(path, str(OUTPUT_DIR))} ({size/1024:.1f} KB)") print("\nYou can now run the website with just: npx serve . -p 7860")