Spaces:

DIVYA-NSHU99
/

disk

Sleeping

App Files Files Community

DIVYA-NSHU99 commited on Mar 7

Commit

b2c073b

verified ·

1 Parent(s): dd15286

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +32 -8

app/main.py CHANGED Viewed

@@ -2,12 +2,18 @@ from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import Optional
 import os
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 # Ensure models are cached to the runtime disk
 os.environ["HF_HOME"] = "/tmp/.cache/huggingface"
-# Import your analyzer
 from app.src.main import TrademarkAnalyzer
 from app.src.linguistic import LinguisticAnalyzer
@@ -25,27 +31,45 @@ analyzer = TrademarkAnalyzer(descriptive_keywords_path=data_path)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=2, max=10),
-    retry=retry_if_exception_type(Exception)  # Catch any exception during warmup
 )
 def warmup():
     """
-    Pre-download all required models with automatic retries.
-    This handles transient network issues and corrupted cache.
     """
-    print("Warming up: Attempting to load models...")
-    # 1. Load spaCy model
     print("Loading spaCy model...")
     LinguisticAnalyzer._get_nlp()
     print("✅ spaCy model loaded.")
-    # 2. Preload sentence‑transformer model (embedding)
     if hasattr(analyzer, 'embedding') and hasattr(analyzer.embedding, 'model'):
         print("Loading embedding model...")
         _ = analyzer.embedding.model
         print("✅ Embedding model ready.")
-    # 3. Preload cross‑encoder model
     if hasattr(analyzer, 'cross_encoder') and hasattr(analyzer.cross_encoder, 'model'):
         print("Loading cross-encoder model...")
         _ = analyzer.cross_encoder.model

 from pydantic import BaseModel
 from typing import Optional
 import os
+import nltk
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 # Ensure models are cached to the runtime disk
 os.environ["HF_HOME"] = "/tmp/.cache/huggingface"
+# Set NLTK data path (must match Dockerfile ENV)
+nltk_data_path = "/tmp/.cache/nltk"
+os.environ["NLTK_DATA"] = nltk_data_path
+nltk.data.path.append(nltk_data_path)
+# Import your analyzer (after setting paths)
 from app.src.main import TrademarkAnalyzer
 from app.src.linguistic import LinguisticAnalyzer
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=2, max=10),
+    retry=retry_if_exception_type(Exception)
 )
 def warmup():
     """
+    Pre-download all required models and NLTK data with automatic retries.
     """
+    print("Warming up: Attempting to load models and NLTK data...")
+    # ---- NLTK data ----
+    # Download WordNet if missing
+    try:
+        nltk.data.find('corpora/wordnet')
+        print("✅ WordNet already present.")
+    except LookupError:
+        print("Downloading WordNet...")
+        nltk.download('wordnet', download_dir=nltk_data_path)
+        print("✅ WordNet downloaded.")
+    # Download Punkt tokenizer (used by sent_tokenize)
+    try:
+        nltk.data.find('tokenizers/punkt')
+        print("✅ Punkt tokenizer already present.")
+    except LookupError:
+        print("Downloading Punkt tokenizer...")
+        nltk.download('punkt', download_dir=nltk_data_path)
+        print("✅ Punkt tokenizer downloaded.")
+    # ---- spaCy model ----
     print("Loading spaCy model...")
     LinguisticAnalyzer._get_nlp()
     print("✅ spaCy model loaded.")
+    # ---- Sentence‑transformer embedding model ----
     if hasattr(analyzer, 'embedding') and hasattr(analyzer.embedding, 'model'):
         print("Loading embedding model...")
         _ = analyzer.embedding.model
         print("✅ Embedding model ready.")
+    # ---- Cross‑encoder model ----
     if hasattr(analyzer, 'cross_encoder') and hasattr(analyzer.cross_encoder, 'model'):
         print("Loading cross-encoder model...")
         _ = analyzer.cross_encoder.model