Arwaaaa's picture
modelpy
14bda6c
import os
from transformers import WhisperProcessor, WhisperForConditionalGeneration
MODEL_NAME = "tarteel-ai/whisper-tiny-ar-quran"
# Use /tmp for all writable storage
CACHE_DIR = "/tmp/hf_cache"
LOCAL_SAVE_DIR = "/tmp/whisper-tiny-ar-quran-local"
# Make sure dirs exist
os.makedirs(CACHE_DIR, exist_ok=True)
os.makedirs(LOCAL_SAVE_DIR, exist_ok=True)
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
os.environ["HF_HOME"] = CACHE_DIR
os.environ["HF_HUB_CACHE"] = CACHE_DIR
_model = None
_processor = None
def load_model():
global _model, _processor
if _model is not None and _processor is not None:
return _model, _processor
local_config_file = os.path.join(LOCAL_SAVE_DIR, "preprocessor_config.json")
if os.path.exists(local_config_file):
_processor = WhisperProcessor.from_pretrained(LOCAL_SAVE_DIR)
_model = WhisperForConditionalGeneration.from_pretrained(LOCAL_SAVE_DIR)
else:
_processor = WhisperProcessor.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
_model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
_model.save_pretrained(LOCAL_SAVE_DIR)
_processor.save_pretrained(LOCAL_SAVE_DIR)
# Patch configs
if not hasattr(_model.generation_config, "lang_to_id"):
_model.generation_config.lang_to_id = {"arabic": 50361}
if not hasattr(_model.generation_config, "task_to_id"):
_model.generation_config.task_to_id = {"transcribe": 0}
_model.generation_config.language = "arabic"
_model.generation_config.task = "transcribe"
print(f"✅ Model and processor loaded from: {LOCAL_SAVE_DIR}")
return _model, _processor