multilingual-transliteration / convert_to_ct2.py
HF Deploy
Deploy multilingual transliteration app to Hugging Face Spaces
d14e502
raw
history blame contribute delete
569 Bytes
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
from ctranslate2.converters import TransformersConverter
from transformers import AutoTokenizer
model_dir = "models/translit"
output_dir = "ct2_model"
print("Starting CTranslate2 conversion...")
# Force-load slow SentencePiece tokenizer once to avoid tiktoken path
_ = AutoTokenizer.from_pretrained(model_dir, use_fast=False)
converter = TransformersConverter(model_dir)
converter.convert(
output_dir,
quantization="int8",
force=True,
)
print("✅ Conversion complete! Saved to:", output_dir)