fariedalfarizi commited on
Commit
5395cd1
Β·
1 Parent(s): 84bfbd3

Enable Whisper medium cache using /data persistent storage (HF Pro)

Browse files
Files changed (3) hide show
  1. Dockerfile +12 -8
  2. app/services/speech_to_text.py +2 -1
  3. start.sh +1 -0
Dockerfile CHANGED
@@ -21,9 +21,11 @@ RUN pip install --no-cache-dir -r requirements.txt
21
  # Create cache directory for models BEFORE copying code
22
  # This ensures model downloads are cached even when code changes
23
  RUN mkdir -p /.cache && chmod -R 777 /.cache
 
24
  ENV HF_HOME=/.cache
25
- ENV TORCH_HOME=/.cache
26
  ENV XDG_CACHE_HOME=/.cache
 
27
 
28
  # Pre-download models during build (HF Pro with persistent storage)
29
  # These layers will be CACHED and won't rebuild when only code changes
@@ -35,13 +37,15 @@ RUN python -c "from transformers import AutoTokenizer, AutoModelForSequenceClass
35
  AutoModelForSequenceClassification.from_pretrained('Cyberlace/swara-structure-model', cache_dir='/.cache'); \
36
  print('βœ… Structure Model cached!')" && chmod -R 777 /.cache
37
 
38
- # 2. Whisper medium will be downloaded on FIRST REQUEST (lazy loading)
39
- # Build OOM with 1.5GB download - HF Space has limited build memory
40
- # First request will take ~2-3 min extra for one-time download
41
- # RUN python -c "import whisper; \
42
- # print('πŸ“₯ Downloading Whisper medium model (1.5GB)...'); \
43
- # whisper.load_model('medium', download_root='/.cache'); \
44
- # print('βœ… Whisper medium cached!')" && chmod -R 777 /.cache
 
 
45
 
46
  # 3. Download Sentence Transformer for Keywords (~420MB)
47
  RUN python -c "from sentence_transformers import SentenceTransformer; \
 
21
  # Create cache directory for models BEFORE copying code
22
  # This ensures model downloads are cached even when code changes
23
  RUN mkdir -p /.cache && chmod -R 777 /.cache
24
+ RUN mkdir -p /data/.cache && chmod -R 777 /data/.cache
25
  ENV HF_HOME=/.cache
26
+ ENV TORCH_HOME=/data/.cache
27
  ENV XDG_CACHE_HOME=/.cache
28
+ ENV WHISPER_CACHE=/data/.cache
29
 
30
  # Pre-download models during build (HF Pro with persistent storage)
31
  # These layers will be CACHED and won't rebuild when only code changes
 
37
  AutoModelForSequenceClassification.from_pretrained('Cyberlace/swara-structure-model', cache_dir='/.cache'); \
38
  print('βœ… Structure Model cached!')" && chmod -R 777 /.cache
39
 
40
+ # 2. Download Whisper medium model (~1.5GB)
41
+ # Using /data for HF Pro Persistent Storage (survives restarts)
42
+ RUN mkdir -p /data/.cache && \
43
+ python -c "import whisper, os; \
44
+ os.environ['TORCH_HOME'] = '/data/.cache'; \
45
+ print('πŸ“₯ Downloading Whisper medium to persistent storage...'); \
46
+ whisper.load_model('medium', download_root='/data/.cache'); \
47
+ print('βœ… Whisper medium cached!')" && \
48
+ chmod -R 777 /data/.cache
49
 
50
  # 3. Download Sentence Transformer for Keywords (~420MB)
51
  RUN python -c "from sentence_transformers import SentenceTransformer; \
app/services/speech_to_text.py CHANGED
@@ -29,7 +29,8 @@ class SpeechToTextService:
29
  print(f"πŸ’» Using device: {self.device}")
30
 
31
  # Check if model is already cached
32
- cache_dir = os.environ.get('XDG_CACHE_HOME', '/.cache')
 
33
  model_cache_path = os.path.join(cache_dir, f'{model_name}.pt')
34
 
35
  # Load Whisper model
 
29
  print(f"πŸ’» Using device: {self.device}")
30
 
31
  # Check if model is already cached
32
+ # Use /data/.cache for Whisper (persistent storage on HF Pro)
33
+ cache_dir = os.environ.get('WHISPER_CACHE', '/data/.cache')
34
  model_cache_path = os.path.join(cache_dir, f'{model_name}.pt')
35
 
36
  # Load Whisper model
start.sh CHANGED
@@ -6,6 +6,7 @@ echo "=========================================="
6
 
7
  # Fix cache permissions on startup (in case of permission issues)
8
  chmod -R 777 /.cache 2>/dev/null || true
 
9
 
10
  # Fix OpenMP warning - set proper thread count
11
  export OMP_NUM_THREADS=4
 
6
 
7
  # Fix cache permissions on startup (in case of permission issues)
8
  chmod -R 777 /.cache 2>/dev/null || true
9
+ chmod -R 777 /data/.cache 2>/dev/null || true
10
 
11
  # Fix OpenMP warning - set proper thread count
12
  export OMP_NUM_THREADS=4