news-whisper-api / backend /models /export_mt5.py
Devang1290
feat: deploy News Whisper on-demand search API (FastAPI + Docker)
2cb327c
#!/usr/bin/env python3
"""
Export mT5_multilingual_XLSum to ONNX Format
=============================================
Converts the csebuetnlp/mT5_multilingual_XLSum model from PyTorch to ONNX format
for efficient CPU inference in the Hindi summarization pipeline.
This model is mT5-base fine-tuned on XL-Sum (45 languages including Hindi news).
It produces significantly better Hindi summaries than vanilla mT5-small.
The exported model is saved to: models/mt5_onnx/
This needs to be run ONCE before using the Hindi pipeline.
Disk space required: ~2.3 GB
Time: ~5 minutes (first run only)
Usage:
python backend/models/export_mt5.py
"""
import sys
from pathlib import Path
from optimum.onnxruntime import ORTModelForSeq2SeqLM
from transformers import AutoTokenizer
MODEL_ID = "csebuetnlp/mT5_multilingual_XLSum"
OUTPUT_DIR = Path(__file__).parent.parent.parent / "models" / "mt5_onnx"
def main():
print(f"Exporting {MODEL_ID} to ONNX → {OUTPUT_DIR}")
if OUTPUT_DIR.exists():
print(f"Output directory already exists. Skipping export.")
return
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Single API call — Optimum handles tied weights, ONNX export, configs
model = ORTModelForSeq2SeqLM.from_pretrained(MODEL_ID, export=True)
model.save_pretrained(OUTPUT_DIR)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.save_pretrained(OUTPUT_DIR)
# Verify output
files = sorted(OUTPUT_DIR.iterdir())
print(f"\nSaved {len(files)} files to {OUTPUT_DIR}:")
for f in files:
size_mb = f.stat().st_size / (1024 * 1024)
print(f" {f.name:40s} {size_mb:>8.2f} MB")
onnx_files = list(OUTPUT_DIR.glob("*.onnx"))
if not onnx_files:
print("ERROR: No .onnx files were produced!", file=sys.stderr)
sys.exit(1)
print(f"\nExport complete. {len(onnx_files)} ONNX model(s) ready.")
if __name__ == "__main__":
main()