import os from transformers import WhisperProcessor, WhisperForConditionalGeneration MODEL_NAME = "tarteel-ai/whisper-tiny-ar-quran" # Use /tmp for all writable storage CACHE_DIR = "/tmp/hf_cache" LOCAL_SAVE_DIR = "/tmp/whisper-tiny-ar-quran-local" # Make sure dirs exist os.makedirs(CACHE_DIR, exist_ok=True) os.makedirs(LOCAL_SAVE_DIR, exist_ok=True) os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR os.environ["HF_HOME"] = CACHE_DIR os.environ["HF_HUB_CACHE"] = CACHE_DIR _model = None _processor = None def load_model(): global _model, _processor if _model is not None and _processor is not None: return _model, _processor local_config_file = os.path.join(LOCAL_SAVE_DIR, "preprocessor_config.json") if os.path.exists(local_config_file): _processor = WhisperProcessor.from_pretrained(LOCAL_SAVE_DIR) _model = WhisperForConditionalGeneration.from_pretrained(LOCAL_SAVE_DIR) else: _processor = WhisperProcessor.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR) _model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR) _model.save_pretrained(LOCAL_SAVE_DIR) _processor.save_pretrained(LOCAL_SAVE_DIR) # Patch configs if not hasattr(_model.generation_config, "lang_to_id"): _model.generation_config.lang_to_id = {"arabic": 50361} if not hasattr(_model.generation_config, "task_to_id"): _model.generation_config.task_to_id = {"transcribe": 0} _model.generation_config.language = "arabic" _model.generation_config.task = "transcribe" print(f"✅ Model and processor loaded from: {LOCAL_SAVE_DIR}") return _model, _processor