Spaces:

froster02
/

BIAF-offASR

Running

App Files Files Community

froster02 commited on about 15 hours ago

Commit

6919c1d

1 Parent(s): f7aacd7

feat: optimize for HF Spaces deployment

Browse files

Files changed (4) hide show

Dockerfile +14 -6
backend/app.py +14 -2
backend/download_models.py +14 -0
backend/models.py +59 -17

Dockerfile CHANGED Viewed

@@ -28,19 +28,27 @@ RUN pip install --no-cache-dir -r backend/requirements.txt
 # Copy all source files
 COPY backend/ ./backend/
-# Pre-download and bake models inside the Docker image during the build stage
-# This caches them permanently in the image so they are ready instantly on startup
-RUN python backend/download_models.py backend/models
 # Copy the built React assets from Stage 1 into the backend's static folder
 COPY --from=frontend-builder /app/frontend/dist ./frontend/dist
-# Create a non-root user with UID 1000 (standard for Hugging Face Spaces) and set directory ownerships
-RUN useradd -m -u 1000 user && \
-    chown -R 1000:1000 /app
 # Switch to the non-root user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"
 # Expose port 7860

 # Copy all source files
 COPY backend/ ./backend/
+# Create a non-root user with UID 1000 (standard for Hugging Face Spaces)
+RUN useradd -m -u 1000 user
+# Set environment variables for model caching in a writable location
+ENV HF_HOME=/app/backend/models/hf_cache
+ENV EASYOCR_MODULE_PATH=/app/backend/models/easyocr
+# Pre-download and bake models inside the Docker image
+# We run this as root but ensure the directory exists and will be chowned
+RUN mkdir -p /app/backend/models/easyocr && \
+    python backend/download_models.py backend/models
 # Copy the built React assets from Stage 1 into the backend's static folder
 COPY --from=frontend-builder /app/frontend/dist ./frontend/dist
+# Set directory ownerships for the non-root user
+RUN chown -R 1000:1000 /app
 # Switch to the non-root user
 USER user
+ENV HOME=/home/user
 ENV PATH="/home/user/.local/bin:$PATH"
 # Expose port 7860

backend/app.py CHANGED Viewed

@@ -1,11 +1,21 @@
 import os
 import shutil
 import uuid
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException, BackgroundTasks, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse, StreamingResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
 try:
     from langdetect import detect, DetectorFactory
     DetectorFactory.seed = 0
@@ -36,10 +46,12 @@ def clean_temp_folder():
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup logic
     clean_temp_folder()
-    print("[*] Temporary folder cleared.")
     yield
     # Shutdown logic (if any)
 app = FastAPI(title="Offline Translation API", version="1.0.0", lifespan=lifespan)
@@ -97,7 +109,7 @@ class TTSRequest(BaseModel):
 @app.get("/health")
 def health_check():
-    print("[*] Health check hit")
     return {"status": "healthy"}
 @app.get("/ping")

 import os
 import shutil
 import uuid
+import logging
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException, BackgroundTasks, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse, StreamingResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    handlers=[logging.StreamHandler()]
+)
+logger = logging.getLogger("baif-api")
 try:
     from langdetect import detect, DetectorFactory
     DetectorFactory.seed = 0
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup logic
+    logger.info("Application starting up...")
     clean_temp_folder()
+    logger.info("Temporary folder cleared.")
     yield
     # Shutdown logic (if any)
+    logger.info("Application shutting down...")
 app = FastAPI(title="Offline Translation API", version="1.0.0", lifespan=lifespan)
 @app.get("/health")
 def health_check():
+    logger.info("Health check hit")
     return {"status": "healthy"}
 @app.get("/ping")

backend/download_models.py CHANGED Viewed

@@ -54,6 +54,20 @@ def download_models(target_dir="./models"):
         except Exception as e:
             print(f"[✗] Error downloading TTS {model_id}: {e}", file=sys.stderr)
     print("\n[✓] All models downloaded successfully and cached for offline use!")
 if __name__ == "__main__":

         except Exception as e:
             print(f"[✗] Error downloading TTS {model_id}: {e}", file=sys.stderr)
+    # 4. EasyOCR Models (Marathi, Hindi, English)
+    print("\n[+] Downloading EasyOCR Models...")
+    try:
+        import easyocr
+        # Set EASYOCR_MODULE_PATH to make sure it downloads to the right place if the env var is not yet picked up
+        if "EASYOCR_MODULE_PATH" not in os.environ:
+             os.environ["EASYOCR_MODULE_PATH"] = os.path.join(target_dir, "easyocr")
+        # This will trigger the download of models for the specified languages
+        reader = easyocr.Reader(['hi', 'mr', 'en'], gpu=False)
+        print("[✓] Successfully downloaded EasyOCR models")
+    except Exception as e:
+        print(f"[✗] Error downloading EasyOCR models: {e}", file=sys.stderr)
     print("\n[✓] All models downloaded successfully and cached for offline use!")
 if __name__ == "__main__":

backend/models.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 import numpy as np
 import soundfile as sf
 import threading
 from transformers import (
     pipeline,
     AutoModelForSeq2SeqLM,
@@ -12,6 +13,10 @@ from transformers import (
     WhisperForConditionalGeneration
 )
 class ModelManager:
     def __init__(self, cache_dir="./models"):
         self.cache_dir = os.path.abspath(cache_dir)
@@ -30,6 +35,7 @@ class ModelManager:
             self.device = "cpu"
         print(f"[*] ModelManager initialized using device: {self.device} (CI_MODE={self.ci_mode})")
         # Lazy load containers
         self.whisper_pipe = {}
@@ -38,25 +44,45 @@ class ModelManager:
         self.tts_models = {}
         self.tts_tokenizers = {}
     def get_whisper(self, size="base"):
         with self.lock:
             if size not in self.whisper_pipe:
                 model_id = f"openai/whisper-{size}"
                 print(f"[*] Loading STT model {model_id} from {self.cache_dir} on {self.device}...")
-                # Load processor & model from local cache
-                processor = WhisperProcessor.from_pretrained(model_id, cache_dir=self.cache_dir)
-                model = WhisperForConditionalGeneration.from_pretrained(model_id, cache_dir=self.cache_dir)
-                # Pipeline does chunking automatically for long files
-                self.whisper_pipe[size] = pipeline(
-                    "automatic-speech-recognition",
-                    model=model,
-                    tokenizer=processor.tokenizer,
-                    feature_extractor=processor.feature_extractor,
-                    chunk_length_s=30,
-                    device=0 if self.device == "cuda" else (-1 if self.device == "cpu" else "mps")
-                )
             return self.whisper_pipe[size]
     def get_nllb(self):
@@ -64,8 +90,14 @@ class ModelManager:
             if self.nllb_model is None:
                 model_id = "facebook/nllb-200-distilled-600M"
                 print(f"[*] Loading NLLB-200 translation model from {self.cache_dir} on {self.device}...")
-                self.nllb_tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=self.cache_dir)
-                self.nllb_model = AutoModelForSeq2SeqLM.from_pretrained(model_id, cache_dir=self.cache_dir).to(self.device)
             return self.nllb_model, self.nllb_tokenizer
     def get_tts(self, lang):
@@ -81,8 +113,14 @@ class ModelManager:
                     raise ValueError(f"Unsupported TTS language: {lang}")
                 print(f"[*] Loading TTS model for {lang} ({model_id}) on {self.device}...")
-                self.tts_tokenizers[lang] = AutoTokenizer.from_pretrained(model_id, cache_dir=self.cache_dir)
-                self.tts_models[lang] = VitsModel.from_pretrained(model_id, cache_dir=self.cache_dir).to(self.device)
             return self.tts_models[lang], self.tts_tokenizers[lang]
@@ -122,6 +160,7 @@ class ModelManager:
                 stride_length_s=5,
                 generate_kwargs=gen_kwargs
             )
             # Extract segments from chunks
             chunks = result.get("chunks", [])
@@ -193,6 +232,7 @@ class ModelManager:
                 )
             translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
             return translated_text
     def translate_batch(self, texts, src_lang, tgt_lang):
@@ -252,6 +292,7 @@ class ModelManager:
                 )
             translated_texts = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
             # Map back to full results list
             for i, idx in enumerate(non_empty_indices):
@@ -302,5 +343,6 @@ class ModelManager:
             # MMS-TTS models output sample rate is 16000Hz
             sf.write(output_path, waveform_numpy, samplerate=16000)
             print(f"[✓] TTS audio written to: {output_path}")
             return output_path

 import numpy as np
 import soundfile as sf
 import threading
+import gc
 from transformers import (
     pipeline,
     AutoModelForSeq2SeqLM,
     WhisperForConditionalGeneration
 )
+# Optimize Torch for CPU-only environments like HF Spaces
+if not torch.cuda.is_available():
+    torch.set_num_threads(int(os.cpu_count() or 1))
 class ModelManager:
     def __init__(self, cache_dir="./models"):
         self.cache_dir = os.path.abspath(cache_dir)
             self.device = "cpu"
         print(f"[*] ModelManager initialized using device: {self.device} (CI_MODE={self.ci_mode})")
+        print(f"[*] Cache directory: {self.cache_dir}")
         # Lazy load containers
         self.whisper_pipe = {}
         self.tts_models = {}
         self.tts_tokenizers = {}
+    def _clear_memory(self):
+        """Force garbage collection and clear torch cache if on GPU"""
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        elif self.device == "mps":
+            torch.mps.empty_cache()
     def get_whisper(self, size="base"):
         with self.lock:
             if size not in self.whisper_pipe:
                 model_id = f"openai/whisper-{size}"
                 print(f"[*] Loading STT model {model_id} from {self.cache_dir} on {self.device}...")
+                try:
+                    # Load processor & model from local cache
+                    processor = WhisperProcessor.from_pretrained(model_id, cache_dir=self.cache_dir, local_files_only=True)
+                    model = WhisperForConditionalGeneration.from_pretrained(model_id, cache_dir=self.cache_dir, local_files_only=True)
+                    # Pipeline does chunking automatically for long files
+                    self.whisper_pipe[size] = pipeline(
+                        "automatic-speech-recognition",
+                        model=model,
+                        tokenizer=processor.tokenizer,
+                        feature_extractor=processor.feature_extractor,
+                        chunk_length_s=30,
+                        device=0 if self.device == "cuda" else (-1 if self.device == "cpu" else "mps")
+                    )
+                    print(f"[✓] Whisper-{size} loaded successfully.")
+                except Exception as e:
+                    print(f"[!] Error loading Whisper-{size}: {e}")
+                    # Try without local_files_only as fallback
+                    self.whisper_pipe[size] = pipeline(
+                        "automatic-speech-recognition",
+                        model=model_id,
+                        cache_dir=self.cache_dir,
+                        chunk_length_s=30,
+                        device=0 if self.device == "cuda" else (-1 if self.device == "cpu" else "mps")
+                    )
             return self.whisper_pipe[size]
     def get_nllb(self):
             if self.nllb_model is None:
                 model_id = "facebook/nllb-200-distilled-600M"
                 print(f"[*] Loading NLLB-200 translation model from {self.cache_dir} on {self.device}...")
+                try:
+                    self.nllb_tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=self.cache_dir, local_files_only=True)
+                    self.nllb_model = AutoModelForSeq2SeqLM.from_pretrained(model_id, cache_dir=self.cache_dir, local_files_only=True).to(self.device)
+                    print("[✓] NLLB-200 loaded successfully.")
+                except Exception as e:
+                    print(f"[!] Error loading NLLB-200: {e}")
+                    self.nllb_tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=self.cache_dir)
+                    self.nllb_model = AutoModelForSeq2SeqLM.from_pretrained(model_id, cache_dir=self.cache_dir).to(self.device)
             return self.nllb_model, self.nllb_tokenizer
     def get_tts(self, lang):
                     raise ValueError(f"Unsupported TTS language: {lang}")
                 print(f"[*] Loading TTS model for {lang} ({model_id}) on {self.device}...")
+                try:
+                    self.tts_tokenizers[lang] = AutoTokenizer.from_pretrained(model_id, cache_dir=self.cache_dir, local_files_only=True)
+                    self.tts_models[lang] = VitsModel.from_pretrained(model_id, cache_dir=self.cache_dir, local_files_only=True).to(self.device)
+                    print(f"[✓] TTS model for {lang} loaded successfully.")
+                except Exception as e:
+                    print(f"[!] Error loading TTS for {lang}: {e}")
+                    self.tts_tokenizers[lang] = AutoTokenizer.from_pretrained(model_id, cache_dir=self.cache_dir)
+                    self.tts_models[lang] = VitsModel.from_pretrained(model_id, cache_dir=self.cache_dir).to(self.device)
             return self.tts_models[lang], self.tts_tokenizers[lang]
                 stride_length_s=5,
                 generate_kwargs=gen_kwargs
             )
+            self._clear_memory()
             # Extract segments from chunks
             chunks = result.get("chunks", [])
                 )
             translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+            self._clear_memory()
             return translated_text
     def translate_batch(self, texts, src_lang, tgt_lang):
                 )
             translated_texts = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
+            self._clear_memory()
             # Map back to full results list
             for i, idx in enumerate(non_empty_indices):
             # MMS-TTS models output sample rate is 16000Hz
             sf.write(output_path, waveform_numpy, samplerate=16000)
+            self._clear_memory()
             print(f"[✓] TTS audio written to: {output_path}")
             return output_path