Spaces:

rhmnsae
/

sentimeter

Running

App Files Files Community

sentimeter / convert_model.py

rhmnsae

fix

004817c 14 days ago

raw

history blame contribute delete

3.63 kB

	"""
	One-time script to convert IndoBERT sentiment model to ONNX format (quantized).
	Run this once: python convert_model.py
	After conversion, the 'model/onnx/' folder will contain the quantized ONNX model
	that can be loaded directly in the browser via ONNX Runtime Web.
	"""
	import os
	import json
	import torch
	import shutil
	from pathlib import Path
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig

	MODEL_ID = "mdhugol/indonesia-bert-sentiment-classification"
	OUTPUT_DIR = Path("./model")
	ONNX_DIR = OUTPUT_DIR / "onnx"

	# Clean up previous conversion
	if ONNX_DIR.exists():
	shutil.rmtree(ONNX_DIR)
	ONNX_DIR.mkdir(parents=True, exist_ok=True)

	print(f"[1/5] Loading model: {MODEL_ID}")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
	config = AutoConfig.from_pretrained(MODEL_ID)
	model.eval()

	print("[2/5] Creating dummy input for ONNX export...")
	dummy_text = "Ini adalah contoh kalimat untuk testing"
	inputs = tokenizer(dummy_text, return_tensors="pt", padding="max_length", max_length=128, truncation=True)

	print("[3/5] Exporting to ONNX (with embedded weights)...")
	raw_onnx_path = str(ONNX_DIR / "model_raw.onnx")

	# Use opset 14, disable external data to embed weights in the ONNX file
	with torch.no_grad():
	torch.onnx.export(
	model,
	(inputs["input_ids"], inputs["attention_mask"], inputs["token_type_ids"]),
	raw_onnx_path,
	input_names=["input_ids", "attention_mask", "token_type_ids"],
	output_names=["logits"],
	dynamic_axes={
	"input_ids": {0: "batch_size", 1: "sequence"},
	"attention_mask": {0: "batch_size", 1: "sequence"},
	"token_type_ids": {0: "batch_size", 1: "sequence"},
	"logits": {0: "batch_size"},
	},
	opset_version=14,
	do_constant_folding=True,
	)

	raw_size = os.path.getsize(raw_onnx_path)
	print(f" Raw ONNX size: {raw_size/1024/1024:.1f} MB")

	print("[4/5] Quantizing to int8 (dynamic quantization)...")
	from onnxruntime.quantization import quantize_dynamic, QuantType

	quant_onnx_path = str(ONNX_DIR / "model_quantized.onnx")
	quantize_dynamic(
	raw_onnx_path,
	quant_onnx_path,
	weight_type=QuantType.QUInt8,
	)
	quant_size = os.path.getsize(quant_onnx_path)
	print(f" Quantized ONNX size: {quant_size/1024/1024:.1f} MB")
	print(f" Compression ratio: {raw_size/quant_size:.1f}x")

	# Remove the raw model, keep quantized
	os.remove(raw_onnx_path)
	# Rename quantized to model.onnx
	final_path = str(ONNX_DIR / "model.onnx")
	os.rename(quant_onnx_path, final_path)

	# Remove external data files if they exist
	for f in ONNX_DIR.glob("*.data"):
	os.remove(f)

	print("[5/5] Saving tokenizer and config files...")
	# Save tokenizer files
	tokenizer.save_pretrained(str(OUTPUT_DIR))

	# Update config with label mapping
	config_data = config.to_dict()
	config_data["id2label"] = {"0": "Positif", "1": "Netral", "2": "Negatif"}
	config_data["label2id"] = {"Positif": 0, "Netral": 1, "Negatif": 2}
	with open(str(OUTPUT_DIR / "config.json"), "w") as f:
	json.dump(config_data, f, indent=2)

	print(f"\nDone! Model saved to '{OUTPUT_DIR}/'")
	print("\nFiles created:")
	for root, dirs, files in os.walk(str(OUTPUT_DIR)):
	for f in sorted(files):
	path = os.path.join(root, f)
	size = os.path.getsize(path)
	print(f" {os.path.relpath(path, str(OUTPUT_DIR))} ({size/1024/1024:.1f} MB)" if size > 1024*1024 else f" {os.path.relpath(path, str(OUTPUT_DIR))} ({size/1024:.1f} KB)")
	print("\nYou can now run the website with just: npx serve . -p 7860")