Spaces:

Arsive
/

lt_space

Build error

App Files Files Community

Arsive2 commited on Apr 14, 2025

Commit

eb52047

1 Parent(s): fb3dfc3

Updated to use a smaller model

Browse files

Files changed (4) hide show

Dockerfile +5 -1
api_server.py +38 -38
app/models/translation_model.py +156 -86
requirements.txt +3 -1

Dockerfile CHANGED Viewed

@@ -1,4 +1,5 @@
 FROM python:3.10-bullseye
 WORKDIR /app
 # Install system dependencies
@@ -34,6 +35,9 @@ EXPOSE 7860
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
 # Run the API server
-CMD ["uvicorn", "api_server:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.10-bullseye
 WORKDIR /app
 # Install system dependencies
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
+ENV OMP_NUM_THREADS=4
+ENV MKL_NUM_THREADS=4
+ENV TORCH_CPU_NUM_THREADS=4
 # Run the API server
+CMD ["uvicorn", "api_server:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "900"]

api_server.py CHANGED Viewed

@@ -6,6 +6,10 @@ import torch
 import os
 import logging
 import uvicorn
 # Configure logging
 logging.basicConfig(
@@ -30,24 +34,13 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Set environment variables if not already set
-os.environ.setdefault('TRANSFORMERS_CACHE', '/app/.cache')
-os.environ.setdefault('HF_HOME', '/app/.cache')
-os.environ.setdefault('NLTK_DATA', '/app/nltk_data')
-# Create necessary directories with proper permissions
-os.makedirs(os.environ.get('TRANSFORMERS_CACHE'), exist_ok=True)
-os.makedirs(os.environ.get('NLTK_DATA'), exist_ok=True)
 try:
-    from app.models.text_chunker import TextChunker
-    from app.models.html_processor import HTMLProcessor
-    from app.models.translation_model import TranslationModel
-    # Initialize components
-    text_chunker = TextChunker(max_tokens=250, overlap_tokens=30)
-    html_processor = HTMLProcessor()
     model = TranslationModel()
     initialization_error = None
 except Exception as e:
@@ -80,7 +73,7 @@ async def root():
             "message": "Service initialization failed",
             "error": initialization_error
         }
-    return {"status": "ok", "model": "MADLAD-400", "version": "3B"}
 @app.get("/health")
 async def health_check():
@@ -89,12 +82,10 @@ async def health_check():
         "status": "ok" if not initialization_error else "error",
         "error": initialization_error,
         "environment": {
-            "transformers_cache": os.environ.get('TRANSFORMERS_CACHE'),
-            "hf_home": os.environ.get('HF_HOME'),
-            "nltk_data": os.environ.get('NLTK_DATA'),
             "python_version": os.environ.get('PYTHON_VERSION'),
-            "cuda_available": torch.cuda.is_available() if 'torch' in globals() else "Unknown",
-            "device": str(model.device) if 'model' in globals() and hasattr(model, 'device') else "Unknown"
         }
     }
@@ -105,7 +96,10 @@ async def translate_text(request: TranslationRequest):
         raise HTTPException(status_code=500, detail=f"Service not properly initialized: {initialization_error}")
     try:
-        # Get chunks using TextChunker
         chunks = text_chunker.create_chunks(request.text)
         translated_chunks = []
@@ -143,17 +137,23 @@ async def translate_html(request: HTMLTranslationRequest):
         # Process each text fragment individually
         translated_fragments = []
-        for fragment in text_fragments:
-            if not fragment.strip():
-                translated_fragments.append(fragment)
-                continue
-            translated_text = model.translate(
-                fragment,
-                request.source_lang_code,
-                request.target_lang_code
-            )
-            translated_fragments.append(translated_text)
         # Replace the original text with translated text in the HTML structure
         translated_html = html_processor.replace_text(dom_data, translated_fragments)
@@ -179,9 +179,9 @@ async def process_document(
         file_content = await file.read()
         # Process document to extract text
-        extracted_text = model.process_document(
-            file_content,
-            file.filename,
             use_ocr=use_ocr
         )
@@ -191,7 +191,7 @@ async def process_document(
                 detail="No text could be extracted from the document"
             )
-        # Translate the extracted text
         translated_text = model.translate(
             extracted_text,
             source_lang_code,

 import os
 import logging
 import uvicorn
+from app.models.translation_model import TranslationModel
+from app.models.html_processor import HTMLProcessor
+from app.models.text_chunker import TextChunker
+from app.models.document_processor import DocumentProcessor
 # Configure logging
 logging.basicConfig(
     allow_headers=["*"],
 )
+# Initialize model components
 try:
+    # Use the CPU-optimized translation model
     model = TranslationModel()
+    html_processor = HTMLProcessor()
+    text_chunker = TextChunker(max_tokens=250, overlap_tokens=30)
+    document_processor = DocumentProcessor()
     initialization_error = None
 except Exception as e:
             "message": "Service initialization failed",
             "error": initialization_error
         }
+    return {"status": "ok", "model": "OPUS-MT/NLLB-CPU-Optimized", "version": "1.0"}
 @app.get("/health")
 async def health_check():
         "status": "ok" if not initialization_error else "error",
         "error": initialization_error,
         "environment": {
             "python_version": os.environ.get('PYTHON_VERSION'),
+            "cuda_available": torch.cuda.is_available(),
+            "device": str(model.device) if hasattr(model, 'device') else "Unknown",
+            "loaded_models": list(model.opus_mt_models.keys()) if hasattr(model, 'opus_mt_models') else []
         }
     }
         raise HTTPException(status_code=500, detail=f"Service not properly initialized: {initialization_error}")
     try:
+        # Using the OPUS-MT/NLLB hybrid model for more efficient translation
+        logger.info(f"Translating from {request.source_lang_code} to {request.target_lang_code}")
+        # Create chunks using TextChunker for long texts
         chunks = text_chunker.create_chunks(request.text)
         translated_chunks = []
         # Process each text fragment individually
         translated_fragments = []
+        # Process in smaller batches to avoid timeouts
+        batch_size = 10
+        for i in range(0, len(text_fragments), batch_size):
+            batch = text_fragments[i:i+batch_size]
+            for fragment in batch:
+                if not fragment.strip():
+                    translated_fragments.append(fragment)
+                    continue
+                translated_text = model.translate(
+                    fragment,
+                    request.source_lang_code,
+                    request.target_lang_code
+                )
+                translated_fragments.append(translated_text)
         # Replace the original text with translated text in the HTML structure
         translated_html = html_processor.replace_text(dom_data, translated_fragments)
         file_content = await file.read()
         # Process document to extract text
+        extracted_text = document_processor.process_document(
+            file_data=file_content,
+            filename=file.filename,
             use_ocr=use_ocr
         )
                 detail="No text could be extracted from the document"
             )
+        # Translate the extracted text using our more efficient model
         translated_text = model.translate(
             extracted_text,
             source_lang_code,

app/models/translation_model.py CHANGED Viewed

@@ -3,35 +3,37 @@ import logging
 import re
 import os
 from typing import Optional, Dict, Any, List
-from transformers import T5ForConditionalGeneration, T5Tokenizer
 logger = logging.getLogger(__name__)
 class TranslationModel:
     """
-    Model class for handling the translation functionality using MADLAD-400 model
     """
-    def __init__(self, model_name: str = "google/madlad400-3b-mt"):
         """
-        Initialize the translation model.
         Args:
-            model_name: Name of the Hugging Face model to use
         """
-        self.model_name = model_name
-        self.model = None
-        self.tokenizer = None
         self.device = self._get_device()
         self.initialized = False
         self.initialization_error = None
-        # Ensure cache directory exists and is writable
-        cache_dir = os.environ.get('TRANSFORMERS_CACHE', '/app/.cache')
-        os.makedirs(cache_dir, exist_ok=True)
         try:
-            self._load_model()
             self.initialized = True
         except Exception as e:
             self.initialization_error = str(e)
@@ -42,43 +44,91 @@ class TranslationModel:
         if torch.cuda.is_available():
             logger.info("Using CUDA GPU for translation")
             return torch.device("cuda")
-        elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
-            logger.info("Using Apple MPS (Metal) for translation")
-            return torch.device("mps")
         else:
             logger.info("Using CPU for translation")
             return torch.device("cpu")
-    def _load_model(self):
-        """Load the MADLAD-400 3B translation model."""
         try:
-            logger.info(f"Loading translation model: {self.model_name}")
-            self.tokenizer = T5Tokenizer.from_pretrained(
-                self.model_name,
-                cache_dir=os.environ.get('TRANSFORMERS_CACHE', '/app/.cache')
             )
-            # Use torch_dtype=torch.bfloat16 if available for faster inference
-            if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8:
-                logger.info("Using bfloat16 precision for model loading")
-                self.model = T5ForConditionalGeneration.from_pretrained(
-                    self.model_name,
-                    torch_dtype=torch.bfloat16,
-                    cache_dir=os.environ.get('TRANSFORMERS_CACHE', '/app/.cache')
-                )
-            else:
-                dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-                logger.info(f"Using {dtype} precision for model loading")
-                self.model = T5ForConditionalGeneration.from_pretrained(
-                    self.model_name,
-                    torch_dtype=dtype,
-                    cache_dir=os.environ.get('TRANSFORMERS_CACHE', '/app/.cache')
-                )
-            self.model.to(self.device)
-            logger.info(f"Model loaded successfully on {self.device}")
         except Exception as e:
-            logger.error(f"Error loading model: {str(e)}")
             raise
     def translate(self, text: str, source_lang_code: str, target_lang_code: str) -> str:
@@ -97,57 +147,77 @@ class TranslationModel:
             if not self.initialized:
                 raise ValueError(f"Translation model not properly initialized: {self.initialization_error}")
-            # Prepare input with MADLAD-400 format: <2{target_lang}> {source_text}
-            input_text = f"<2{target_lang_code}> {text}"
-            inputs = self.tokenizer(
-                input_text,
-                return_tensors="pt",
-                padding=True,
-                truncation=True,
-                max_length=512
-            )
-            inputs = {k: v.to(self.device) for k, v in inputs.items()}
-            with torch.no_grad():
-                translated = self.model.generate(
-                    **inputs,
-                    max_length=512,
-                    num_beams=5,
-                    early_stopping=True
-                )
-            translated_text = self.tokenizer.batch_decode(
-                translated,
-                skip_special_tokens=True
-            )[0]
             return re.sub(r'\s+', ' ', translated_text).strip()
         except Exception as e:
             logger.error(f"Translation error: {str(e)}")
             raise
-    def process_document(self, file_data: bytes, filename: str, use_ocr: bool = False) -> str:
-        """
-        Process document to extract text using PyMuPDF and optional OCR.
-        Args:
-            file_data: Raw file content
-            filename: Original filename
-            use_ocr: Whether to use OCR for text extraction
-        Returns:
-            Extracted text as string
-        """
-        if not self.initialized:
-            raise ValueError(f"Translation model not properly initialized: {self.initialization_error}")
-        from app.models.document_processor import DocumentProcessor
-        # Initialize document processor
-        doc_processor = DocumentProcessor()
-        # Process document and extract text
-        return doc_processor.process_document(file_data, filename, use_ocr)

 import re
 import os
 from typing import Optional, Dict, Any, List
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from tqdm import tqdm
 logger = logging.getLogger(__name__)
 class TranslationModel:
     """
+    More efficient translation model that uses smaller models optimized for CPU
     """
+    def __init__(self, model_cache_dir: str = ".cache/models"):
         """
+        Initialize the translation model manager.
         Args:
+            model_cache_dir: Directory to cache downloaded models
         """
+        self.model_cache_dir = model_cache_dir
         self.device = self._get_device()
+        self.opus_mt_models = {}  # Cache for loaded OPUS-MT models
+        self.fallback_model = None
+        self.fallback_tokenizer = None
         self.initialized = False
         self.initialization_error = None
+        # Create cache directory
+        os.makedirs(model_cache_dir, exist_ok=True)
         try:
+            # Initialize the fallback model (loads when first needed)
+            logger.info("TranslationModel initialized - models will be loaded on demand")
             self.initialized = True
         except Exception as e:
             self.initialization_error = str(e)
         if torch.cuda.is_available():
             logger.info("Using CUDA GPU for translation")
             return torch.device("cuda")
         else:
             logger.info("Using CPU for translation")
             return torch.device("cpu")
+    def _get_opus_mt_model_name(self, source_lang_code: str, target_lang_code: str) -> Optional[str]:
+        """Get the appropriate OPUS-MT model name for the language pair."""
+        # OPUS-MT uses different language codes in some cases
+        lang_code_mapping = {
+            'zh': 'zho',
+            'en': 'eng',
+            'ar': 'ara',
+            'fr': 'fra',
+            'de': 'deu',
+            'ru': 'rus',
+            'pt': 'por',
+            'es': 'spa',
+            'it': 'ita',
+            'nl': 'nld',
+            'pl': 'pol',
+            'ja': 'jpn',
+            'ko': 'kor',
+        }
+        source = lang_code_mapping.get(source_lang_code, source_lang_code)
+        target = lang_code_mapping.get(target_lang_code, target_lang_code)
+        # Try direct model first
+        model_name = f"Helsinki-NLP/opus-mt-{source}-{target}"
+        return model_name
+    def _load_opus_mt_model(self, source_lang_code: str, target_lang_code: str):
+        """Load an OPUS-MT model for the specific language pair."""
+        model_name = self._get_opus_mt_model_name(source_lang_code, target_lang_code)
+        # Check if model already loaded
+        key = f"{source_lang_code}-{target_lang_code}"
+        if key in self.opus_mt_models:
+            return self.opus_mt_models[key]
+        try:
+            logger.info(f"Loading OPUS-MT model: {model_name}")
+            # Load with half precision to save memory on CPU
+            model = AutoModelForSeq2SeqLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                cache_dir=self.model_cache_dir,
+                low_cpu_mem_usage=True
+            )
+            tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=self.model_cache_dir)
+            model.to(self.device)
+            logger.info(f"OPUS-MT model loaded successfully: {model_name}")
+            # Cache the model
+            self.opus_mt_models[key] = (model, tokenizer)
+            return model, tokenizer
+        except Exception as e:
+            logger.warning(f"Could not load OPUS-MT model {model_name}: {str(e)}")
+            return None
+    def _load_fallback_model(self):
+        """Load the fallback NLLB-200 model for language pairs without OPUS-MT models."""
+        if self.fallback_model is not None:
+            return
         try:
+            # Use the small distilled version for efficiency on CPU
+            model_name = "facebook/nllb-200-distilled-600M"
+            logger.info(f"Loading fallback model: {model_name}")
+            self.fallback_model = AutoModelForSeq2SeqLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                cache_dir=self.model_cache_dir,
+                low_cpu_mem_usage=True
             )
+            self.fallback_tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=self.model_cache_dir)
+            self.fallback_model.to(self.device)
+            logger.info(f"Fallback model loaded successfully: {model_name}")
         except Exception as e:
+            logger.error(f"Error loading fallback model: {str(e)}")
             raise
     def translate(self, text: str, source_lang_code: str, target_lang_code: str) -> str:
             if not self.initialized:
                 raise ValueError(f"Translation model not properly initialized: {self.initialization_error}")
+            # Try to use OPUS-MT model first (faster and often better quality)
+            opus_mt_result = self._load_opus_mt_model(source_lang_code, target_lang_code)
+            if opus_mt_result:
+                model, tokenizer = opus_mt_result
+                inputs = tokenizer(text, return_tensors="pt", padding=True)
+                inputs = {k: v.to(self.device) for k, v in inputs.items()}
+                with torch.no_grad():
+                    outputs = model.generate(**inputs, max_length=512, num_beams=4, early_stopping=True)
+                translated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+                logger.info(f"Translation completed using OPUS-MT model")
+            else:
+                # Fall back to NLLB model
+                logger.info(f"No OPUS-MT model available for {source_lang_code}-{target_lang_code}, using fallback model")
+                self._load_fallback_model()
+                # NLLB uses a specific format for inputs
+                tokenizer = self.fallback_tokenizer
+                model = self.fallback_model
+                # Prepare input with NLLB format
+                inputs = tokenizer(text, return_tensors="pt", padding=True)
+                inputs = {k: v.to(self.device) for k, v in inputs.items()}
+                # NLLB language codes are like "eng_Latn", "fra_Latn", etc.
+                nllb_source = _get_nllb_code(source_lang_code)
+                nllb_target = _get_nllb_code(target_lang_code)
+                # Force decoder to start with target language token
+                forced_bos_token_id = tokenizer.lang_code_to_id[nllb_target]
+                with torch.no_grad():
+                    outputs = model.generate(
+                        **inputs,
+                        forced_bos_token_id=forced_bos_token_id,
+                        max_length=512,
+                        num_beams=4,
+                        early_stopping=True
+                    )
+                translated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+                logger.info(f"Translation completed using fallback NLLB model")
+            # Clean up the output
             return re.sub(r'\s+', ' ', translated_text).strip()
         except Exception as e:
             logger.error(f"Translation error: {str(e)}")
             raise
+def _get_nllb_code(lang_code: str) -> str:
+    """Convert ISO language code to NLLB language code format."""
+    # Mapping for common languages
+    nllb_mapping = {
+        'en': 'eng_Latn',
+        'fr': 'fra_Latn',
+        'es': 'spa_Latn',
+        'de': 'deu_Latn',
+        'it': 'ita_Latn',
+        'pt': 'por_Latn',
+        'nl': 'nld_Latn',
+        'ru': 'rus_Cyrl',
+        'zh': 'zho_Hans',
+        'ar': 'ara_Arab',
+        'hi': 'hin_Deva',
+        'ja': 'jpn_Jpan',
+        'ko': 'kor_Hang',
+    }
+    return nllb_mapping.get(lang_code, f"{lang_code}_Latn")

requirements.txt CHANGED Viewed

@@ -4,10 +4,12 @@ pydantic==1.10.7
 transformers==4.30.2
 sentencepiece==0.1.99
 accelerate==0.20.3
 python-multipart==0.0.6
 pillow==9.5.0
 nltk==3.8.1
 tqdm==4.65.0
 beautifulsoup4==4.12.2
 PyMuPDF==1.22.5
-protobuf==3.20.3

 transformers==4.30.2
 sentencepiece==0.1.99
 accelerate==0.20.3
+optimum==1.8.8
 python-multipart==0.0.6
 pillow==9.5.0
 nltk==3.8.1
 tqdm==4.65.0
 beautifulsoup4==4.12.2
 PyMuPDF==1.22.5
+protobuf==3.20.3
+torch==2.0.1