Spaces:
Running
Running
| import os | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| MODEL_NAME = "tarteel-ai/whisper-tiny-ar-quran" | |
| # Use /tmp for all writable storage | |
| CACHE_DIR = "/tmp/hf_cache" | |
| LOCAL_SAVE_DIR = "/tmp/whisper-tiny-ar-quran-local" | |
| # Make sure dirs exist | |
| os.makedirs(CACHE_DIR, exist_ok=True) | |
| os.makedirs(LOCAL_SAVE_DIR, exist_ok=True) | |
| os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR | |
| os.environ["HF_HOME"] = CACHE_DIR | |
| os.environ["HF_HUB_CACHE"] = CACHE_DIR | |
| _model = None | |
| _processor = None | |
| def load_model(): | |
| global _model, _processor | |
| if _model is not None and _processor is not None: | |
| return _model, _processor | |
| local_config_file = os.path.join(LOCAL_SAVE_DIR, "preprocessor_config.json") | |
| if os.path.exists(local_config_file): | |
| _processor = WhisperProcessor.from_pretrained(LOCAL_SAVE_DIR) | |
| _model = WhisperForConditionalGeneration.from_pretrained(LOCAL_SAVE_DIR) | |
| else: | |
| _processor = WhisperProcessor.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR) | |
| _model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR) | |
| _model.save_pretrained(LOCAL_SAVE_DIR) | |
| _processor.save_pretrained(LOCAL_SAVE_DIR) | |
| # Patch configs | |
| if not hasattr(_model.generation_config, "lang_to_id"): | |
| _model.generation_config.lang_to_id = {"arabic": 50361} | |
| if not hasattr(_model.generation_config, "task_to_id"): | |
| _model.generation_config.task_to_id = {"transcribe": 0} | |
| _model.generation_config.language = "arabic" | |
| _model.generation_config.task = "transcribe" | |
| print(f"✅ Model and processor loaded from: {LOCAL_SAVE_DIR}") | |
| return _model, _processor |