Spaces:

Akshay30
/

decipherai-api

Sleeping

App Files Files Community

Akshay30 commited on 3 days ago

Commit

7b9f40a

1 Parent(s): 2f4af3f

Add startup diagnostics for model initialization

Browse files

Files changed (4) hide show

__pycache__/app.cpython-312.pyc +0 -0
app.py +68 -99
models/__pycache__/clip_classifier.cpython-312.pyc +0 -0
models/clip_classifier.py +40 -10

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (19 kB). View file

app.py CHANGED Viewed

@@ -60,7 +60,6 @@ allowed_origins = os.getenv(
 CORS(app, origins=allowed_origins.split(","))
 # Global components
-import threading
 config = Config()
 groq_client = None
 clip_classifier = None
@@ -69,16 +68,6 @@ script_detector = None
 cuneiform_processor = None
 references = {}
-# Live model preloading status tracking
-model_status = {
-    "status": "loading",
-    "groq": "pending",
-    "clip": "pending",
-    "translator": "pending",
-    "cuneiform": "pending",
-    "script_detector": "pending"
-}
 def load_references():
     """Load references from JSON file"""
@@ -113,77 +102,57 @@ def load_references():
         }
-def initialize_models_async():
-    """Load models sequentially in the background to prevent blocking Flask startup"""
-    global groq_client, clip_classifier, hf_models, script_detector, cuneiform_processor, model_status
     try:
-        print("[INFO] Background model preloading thread started...")
-        # Log GPU Diagnostics
-        log_gpu_info()
-        # Load references first
-        load_references()
-        # Groq
-        model_status["groq"] = "loading"
-        groq_client = GroqClient()
-        model_status["groq"] = "ready" if groq_client.is_available() else "unavailable"
-        print(f"[INFO] Groq client initialization complete: {model_status['groq']}")
-        # CLIP
-        model_status["clip"] = "loading"
-        clip_classifier = CLIPClassifier()
-        model_status["clip"] = "ready" if (clip_classifier and clip_classifier.pipeline is not None) else "failed"
-        print(f"[INFO] CLIP classifier initialization complete: {model_status['clip']}")
-        # HF Translator
-        model_status["translator"] = "loading"
-        hf_models = HuggingFaceModels()
-        model_status["translator"] = "ready" if (hf_models and hf_models.get_translator() is not None) else "failed"
-        print(f"[INFO] Hugging Face models initialization complete: {model_status['translator']}")
-        # Cuneiform Processor
-        model_status["cuneiform"] = "loading"
-        try:
-            print("[INFO] Initializing cuneiform processor...")
-            cuneiform_processor = CuneiformProcessor(
-                groq_client=groq_client,
-                references=references,
-                clip_classifier=clip_classifier
-            )
-            model_status["cuneiform"] = "ready" if cuneiform_processor.cuneiform_available else "unavailable"
-        except Exception as e:
-            print(f"[ERROR] Failed to initialize cuneiform processor: {e}")
-            model_status["cuneiform"] = "failed"
-            cuneiform_processor = None
-        print(f"[INFO] Cuneiform processor initialization complete: {model_status['cuneiform']}")
-        # Script Detection Service
-        model_status["script_detector"] = "loading"
-        script_detector = ScriptDetectionService(
             groq_client=groq_client,
             references=references,
-            clip_classifier=clip_classifier,
-            translator_pipe=hf_models.get_translator(),
-            cuneiform_processor=cuneiform_processor
         )
-        model_status["script_detector"] = "ready"
-        print(f"[INFO] Script detection service initialization complete: {model_status['script_detector']}")
-        model_status["status"] = "ready"
-        print("[SUCCESS] All models initialized successfully in the background")
     except Exception as e:
-        model_status["status"] = "failed"
-        print(f"[ERROR] Critical failure in background model initialization: {e}")
-def initialize_models():
-    """Spawn background thread to load models"""
-    print("[INFO] Spawning background thread for model initialization...")
-    model_status["status"] = "loading"
-    threading.Thread(target=initialize_models_async, daemon=True).start()
 @app.route('/analyze', methods=['POST'])
@@ -192,13 +161,6 @@ def analyze():
     tmp_path = None
     try:
-        # Check if models are fully loaded
-        if model_status["status"] != "ready":
-            return jsonify({
-                "error": "Models are still loading in the background. Please try again in a few moments.",
-                "status": "loading",
-                "models_status": model_status
-            }), 503
         # Validate request
         if 'image' not in request.files:
@@ -411,10 +373,18 @@ def chat():
 @app.route('/health', methods=['GET'])
 def health_check():
-    """Health check endpoint returning real-time load status"""
     return jsonify({
-        "status": "healthy" if model_status["status"] == "ready" else "initializing",
-        "models_status": model_status
     })
@@ -440,33 +410,32 @@ def info():
     })
-# --- Model initialization ---
-# When running under gunicorn (or any WSGI server), __name__ != "__main__",
-# so we initialize models at module level. The gunicorn --preload flag ensures
-# this runs once in the master process before forking workers.
 def _auto_initialize():
-    """Initialize models when running under a WSGI server (gunicorn, waitress, etc.)"""
     if os.getenv("WERKZEUG_RUN_MAIN") == "true":
         # Flask reloader child process — handled by __main__ block
         return
-    print("[INIT] WSGI server detected — initializing models...")
-    initialize_models()
 if __name__ == "__main__":
-    print("[INIT] Starting Ancient Script Recognition System...")
     # Start Flask app
     port = int(os.getenv("PORT", 7860))
     debug = os.getenv("DEBUG", "False").lower() == "true"
-    # Initialize all models (only in child process if debug mode is on to avoid duplicate threads)
     if not debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true":
-        initialize_models()
     else:
-        print("[INFO] Reloader active. Model initialization deferred to child process.")
-    print(f"[INFO] Starting server on port {port}")
     app.run(host="0.0.0.0", port=port, debug=debug)
 else:
     # Running under gunicorn / WSGI

 CORS(app, origins=allowed_origins.split(","))
 # Global components
 config = Config()
 groq_client = None
 clip_classifier = None
 cuneiform_processor = None
 references = {}
 def load_references():
     """Load references from JSON file"""
         }
+def initialize_components():
+    """Initialize lightweight component wrappers synchronously.
+    No heavy model weights are loaded here — all ML models use lazy loading
+    and will download/load on their first inference call. This ensures the
+    app starts instantly on resource-constrained environments like HF Spaces.
+    """
+    global groq_client, clip_classifier, hf_models, script_detector, cuneiform_processor
+    import time as _time
+    _t0 = _time.time()
+    print("[INIT] Initializing components (lazy loading — no model weights loaded yet)...", flush=True)
+    # Log GPU Diagnostics
+    log_gpu_info()
+    # Load references (small JSON file, instant)
+    load_references()
+    # Groq client (API key check only, no model download)
+    groq_client = GroqClient()
+    groq_status = "ready" if groq_client.is_available() else "unavailable"
+    print(f"[INIT] Groq client: {groq_status}", flush=True)
+    # CLIP classifier (lazy — model loads on first classify call)
+    clip_classifier = CLIPClassifier()
+    # HF Translator (lazy — model loads on first translate call)
+    hf_models = HuggingFaceModels()
+    # Cuneiform processor (lazy — CLIP & translator load on first use)
     try:
+        cuneiform_processor = CuneiformProcessor(
             groq_client=groq_client,
             references=references,
+            clip_classifier=clip_classifier
         )
     except Exception as e:
+        print(f"[ERROR] Failed to create cuneiform processor: {e}", flush=True)
+        cuneiform_processor = None
+    # Script detection service (creates processor instances, all lazy)
+    script_detector = ScriptDetectionService(
+        groq_client=groq_client,
+        references=references,
+        clip_classifier=clip_classifier,
+        translator_pipe=hf_models.get_translator(),
+        cuneiform_processor=cuneiform_processor
+    )
+    print(f"[INIT] All components ready in {_time.time()-_t0:.1f}s (models will load on first request)", flush=True)
 @app.route('/analyze', methods=['POST'])
     tmp_path = None
     try:
         # Validate request
         if 'image' not in request.files:
 @app.route('/health', methods=['GET'])
 def health_check():
+    """Health check endpoint — app is always ready, models load lazily on demand"""
+    models_loaded = {
+        "groq": groq_client.is_available() if groq_client else False,
+        "clip": clip_classifier.is_loaded if clip_classifier else False,
+        "translator": hf_models is not None if hf_models else False,
+        "cuneiform": cuneiform_processor is not None if cuneiform_processor else False,
+        "script_detector": script_detector is not None
+    }
     return jsonify({
+        "status": "healthy",
+        "architecture": "lazy_loading",
+        "models_loaded": models_loaded
     })
     })
+# --- Component initialization ---
+# Lightweight init runs synchronously at module level. No heavy model weights
+# are loaded here — all ML models use lazy loading on first inference call.
 def _auto_initialize():
+    """Initialize components when running under a WSGI server (gunicorn, waitress, etc.)"""
     if os.getenv("WERKZEUG_RUN_MAIN") == "true":
         # Flask reloader child process — handled by __main__ block
         return
+    print("[INIT] WSGI server detected — initializing components...", flush=True)
+    initialize_components()
 if __name__ == "__main__":
+    print("[INIT] Starting Ancient Script Recognition System (lazy loading)...", flush=True)
     # Start Flask app
     port = int(os.getenv("PORT", 7860))
     debug = os.getenv("DEBUG", "False").lower() == "true"
+    # Initialize lightweight components (only in child process if debug mode is on)
     if not debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true":
+        initialize_components()
     else:
+        print("[INFO] Reloader active. Component initialization deferred to child process.")
+    print(f"[INFO] Starting server on port {port}", flush=True)
     app.run(host="0.0.0.0", port=port, debug=debug)
 else:
     # Running under gunicorn / WSGI

models/__pycache__/clip_classifier.cpython-312.pyc ADDED Viewed

Binary file (9.5 kB). View file

models/clip_classifier.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import torch
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
@@ -5,44 +6,71 @@ import numpy as np
 from config import Config
 from utils.gpu_diagnostics import log_model_device
 class CLIPClassifier:
     def __init__(self):
         self.config = Config()
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model = None
         self.processor = None
-        # Load CLIP model and processor with fallback
         model_name = getattr(self.config, 'CLIP_MODEL', 'openai/clip-vit-base-patch32')
         try:
-            print(f"[INFO] Loading CLIP model: {model_name}...")
             self.model = CLIPModel.from_pretrained(model_name)
             self.processor = CLIPProcessor.from_pretrained(model_name)
             self.model.to(self.device)
-            self.model.eval()  # Set model to evaluation mode
             log_model_device("CLIP script classifier", self.device)
-            print(f"[INFO] CLIP model loaded on {self.device}")
         except Exception as e:
-            print(f"[WARN] Failed to load CLIP model '{model_name}': {e}")
             fallback_name = "openai/clip-vit-base-patch32"
             try:
-                print(f"[INFO] Loading fallback CLIP model: {fallback_name}...")
                 self.model = CLIPModel.from_pretrained(fallback_name)
                 self.processor = CLIPProcessor.from_pretrained(fallback_name)
                 self.model.to(self.device)
-                self.model.eval()  # Set model to evaluation mode
                 log_model_device("CLIP script classifier (fallback)", self.device)
-                print(f"[INFO] Fallback CLIP model loaded on {self.device}")
             except Exception as fe:
-                print(f"[ERROR] Failed to load fallback CLIP model: {fe}")
     @property
     def pipeline(self):
         """Property checked in app.py/test.py to ensure model is initialized"""
         return self.model if self.model is not None else None
     def classify_script_type(self, image):
         """Classify script type of image into one of the four supported categories"""
         if not self.pipeline:
             return "unknown", 0.0
@@ -84,6 +112,8 @@ class CLIPClassifier:
     def classify_symbols(self, crops, candidate_labels):
         """Classify segmented symbol image crops against candidate labels"""
         if not self.pipeline or not crops or not candidate_labels:
             return [None] * len(crops) if crops else []

+import time
 import torch
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
 from config import Config
 from utils.gpu_diagnostics import log_model_device
 class CLIPClassifier:
     def __init__(self):
         self.config = Config()
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model = None
         self.processor = None
+        self._loaded = False
+        print("[INFO] CLIPClassifier created (lazy — model will load on first use)", flush=True)
+    def _ensure_loaded(self):
+        """Lazily load CLIP model and processor on first use, with fallback."""
+        if self._loaded:
+            return
         model_name = getattr(self.config, 'CLIP_MODEL', 'openai/clip-vit-base-patch32')
         try:
+            _t0 = time.time()
+            print(f"[CLIP LAZY] Step 1/4 — Loading CLIPModel: {model_name}...", flush=True)
             self.model = CLIPModel.from_pretrained(model_name)
+            print(f"[CLIP LAZY] Step 2/4 — CLIPModel loaded in {time.time()-_t0:.1f}s. Loading CLIPProcessor...", flush=True)
+            _t1 = time.time()
             self.processor = CLIPProcessor.from_pretrained(model_name)
+            print(f"[CLIP LAZY] Step 3/4 — CLIPProcessor loaded in {time.time()-_t1:.1f}s. Moving to {self.device}...", flush=True)
+            _t2 = time.time()
             self.model.to(self.device)
+            self.model.eval()
             log_model_device("CLIP script classifier", self.device)
+            print(f"[CLIP LAZY] Step 4/4 — CLIP ready on {self.device} — total {time.time()-_t0:.1f}s", flush=True)
+            self._loaded = True
         except Exception as e:
+            print(f"[WARN] Failed to load CLIP model '{model_name}': {e}", flush=True)
             fallback_name = "openai/clip-vit-base-patch32"
             try:
+                _t0 = time.time()
+                print(f"[CLIP LAZY] Fallback 1/2 — Loading: {fallback_name}...", flush=True)
                 self.model = CLIPModel.from_pretrained(fallback_name)
                 self.processor = CLIPProcessor.from_pretrained(fallback_name)
+                print(f"[CLIP LAZY] Fallback 2/2 — Moving to {self.device}...", flush=True)
                 self.model.to(self.device)
+                self.model.eval()
                 log_model_device("CLIP script classifier (fallback)", self.device)
+                print(f"[CLIP LAZY] Fallback CLIP ready — total {time.time()-_t0:.1f}s", flush=True)
+                self._loaded = True
             except Exception as fe:
+                print(f"[ERROR] Failed to load fallback CLIP model: {fe}", flush=True)
     @property
     def pipeline(self):
         """Property checked in app.py/test.py to ensure model is initialized"""
         return self.model if self.model is not None else None
+    @property
+    def is_loaded(self):
+        """Check if model has been lazily loaded yet."""
+        return self._loaded
     def classify_script_type(self, image):
         """Classify script type of image into one of the four supported categories"""
+        self._ensure_loaded()
         if not self.pipeline:
             return "unknown", 0.0
     def classify_symbols(self, crops, candidate_labels):
         """Classify segmented symbol image crops against candidate labels"""
+        self._ensure_loaded()
         if not self.pipeline or not crops or not candidate_labels:
             return [None] * len(crops) if crops else []