sentimeter / convert_model.py
rhmnsae's picture
fix
004817c
"""
One-time script to convert IndoBERT sentiment model to ONNX format (quantized).
Run this once: python convert_model.py
After conversion, the 'model/onnx/' folder will contain the quantized ONNX model
that can be loaded directly in the browser via ONNX Runtime Web.
"""
import os
import json
import torch
import shutil
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
MODEL_ID = "mdhugol/indonesia-bert-sentiment-classification"
OUTPUT_DIR = Path("./model")
ONNX_DIR = OUTPUT_DIR / "onnx"
# Clean up previous conversion
if ONNX_DIR.exists():
shutil.rmtree(ONNX_DIR)
ONNX_DIR.mkdir(parents=True, exist_ok=True)
print(f"[1/5] Loading model: {MODEL_ID}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
config = AutoConfig.from_pretrained(MODEL_ID)
model.eval()
print("[2/5] Creating dummy input for ONNX export...")
dummy_text = "Ini adalah contoh kalimat untuk testing"
inputs = tokenizer(dummy_text, return_tensors="pt", padding="max_length", max_length=128, truncation=True)
print("[3/5] Exporting to ONNX (with embedded weights)...")
raw_onnx_path = str(ONNX_DIR / "model_raw.onnx")
# Use opset 14, disable external data to embed weights in the ONNX file
with torch.no_grad():
torch.onnx.export(
model,
(inputs["input_ids"], inputs["attention_mask"], inputs["token_type_ids"]),
raw_onnx_path,
input_names=["input_ids", "attention_mask", "token_type_ids"],
output_names=["logits"],
dynamic_axes={
"input_ids": {0: "batch_size", 1: "sequence"},
"attention_mask": {0: "batch_size", 1: "sequence"},
"token_type_ids": {0: "batch_size", 1: "sequence"},
"logits": {0: "batch_size"},
},
opset_version=14,
do_constant_folding=True,
)
raw_size = os.path.getsize(raw_onnx_path)
print(f" Raw ONNX size: {raw_size/1024/1024:.1f} MB")
print("[4/5] Quantizing to int8 (dynamic quantization)...")
from onnxruntime.quantization import quantize_dynamic, QuantType
quant_onnx_path = str(ONNX_DIR / "model_quantized.onnx")
quantize_dynamic(
raw_onnx_path,
quant_onnx_path,
weight_type=QuantType.QUInt8,
)
quant_size = os.path.getsize(quant_onnx_path)
print(f" Quantized ONNX size: {quant_size/1024/1024:.1f} MB")
print(f" Compression ratio: {raw_size/quant_size:.1f}x")
# Remove the raw model, keep quantized
os.remove(raw_onnx_path)
# Rename quantized to model.onnx
final_path = str(ONNX_DIR / "model.onnx")
os.rename(quant_onnx_path, final_path)
# Remove external data files if they exist
for f in ONNX_DIR.glob("*.data"):
os.remove(f)
print("[5/5] Saving tokenizer and config files...")
# Save tokenizer files
tokenizer.save_pretrained(str(OUTPUT_DIR))
# Update config with label mapping
config_data = config.to_dict()
config_data["id2label"] = {"0": "Positif", "1": "Netral", "2": "Negatif"}
config_data["label2id"] = {"Positif": 0, "Netral": 1, "Negatif": 2}
with open(str(OUTPUT_DIR / "config.json"), "w") as f:
json.dump(config_data, f, indent=2)
print(f"\nDone! Model saved to '{OUTPUT_DIR}/'")
print("\nFiles created:")
for root, dirs, files in os.walk(str(OUTPUT_DIR)):
for f in sorted(files):
path = os.path.join(root, f)
size = os.path.getsize(path)
print(f" {os.path.relpath(path, str(OUTPUT_DIR))} ({size/1024/1024:.1f} MB)" if size > 1024*1024 else f" {os.path.relpath(path, str(OUTPUT_DIR))} ({size/1024:.1f} KB)")
print("\nYou can now run the website with just: npx serve . -p 7860")