Spaces:

sethmcknight
/

msse-ai-engineering

Sleeping

App Files Files Community

sethmcknight commited on Oct 24, 2025

Commit

6d37c4a

1 Parent(s): b3b90ec

fix: Add lock to prevent ingestion race condition

Browse files

Files changed (1) hide show

src/app_factory.py +40 -12

src/app_factory.py CHANGED Viewed

@@ -6,6 +6,7 @@ This approach allows for easier testing and management of application state.
 import concurrent.futures
 import logging
 import os
 from typing import Any, Dict
 from dotenv import load_dotenv
@@ -27,21 +28,43 @@ def ensure_embeddings_on_startup():
     """
     Ensure embeddings exist and have the correct dimension on app startup.
     This is critical for Render deployments where the vector store is ephemeral.
     """
-    from src.config import (
-        COLLECTION_NAME,
-        CORPUS_DIRECTORY,
-        DEFAULT_CHUNK_SIZE,
-        DEFAULT_OVERLAP,
-        EMBEDDING_DIMENSION,
-        EMBEDDING_MODEL_NAME,
-        RANDOM_SEED,
-        VECTOR_DB_PERSIST_PATH,
-    )
-    from src.ingestion.ingestion_pipeline import IngestionPipeline
-    from src.vector_store.vector_db import VectorDatabase
     try:
         logging.info("Checking vector store on startup...")
         # Initialize vector database to check its state
@@ -85,6 +108,11 @@ def ensure_embeddings_on_startup():
         logging.error(f"Failed to ensure embeddings on startup: {e}")
         # Don't crash the app, but log the error
         # The app will still start but searches may fail
 def create_app(

 import concurrent.futures
 import logging
 import os
+import time
 from typing import Any, Dict
 from dotenv import load_dotenv
     """
     Ensure embeddings exist and have the correct dimension on app startup.
     This is critical for Render deployments where the vector store is ephemeral.
+    Uses a file-based lock to prevent race conditions between workers.
     """
+    lock_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data", "locks")
+    if not os.path.exists(lock_dir):
+        os.makedirs(lock_dir)
+    lock_file = os.path.join(lock_dir, "ingestion.lock")
+    lock_timeout = 180  # 3 minutes
+    start_time = time.time()
+    while os.path.exists(lock_file):
+        if time.time() - start_time > lock_timeout:
+            logging.error(f"Lock file {lock_file} has been present for over {lock_timeout} seconds. Aborting wait.")
+            # In a real-world scenario, you might want to raise an exception
+            # or attempt to delete a stale lock file. For now, we just stop waiting.
+            return
+        logging.info(f"Another process is handling ingestion. Waiting for lock file {lock_file} to be released...")
+        time.sleep(5)
     try:
+        # Acquire lock
+        with open(lock_file, "w") as f:
+            f.write(str(os.getpid()))
+        logging.info(f"Acquired ingestion lock: {lock_file}")
+        from src.config import (
+            COLLECTION_NAME,
+            CORPUS_DIRECTORY,
+            DEFAULT_CHUNK_SIZE,
+            DEFAULT_OVERLAP,
+            EMBEDDING_DIMENSION,
+            EMBEDDING_MODEL_NAME,
+            RANDOM_SEED,
+            VECTOR_DB_PERSIST_PATH,
+        )
+        from src.ingestion.ingestion_pipeline import IngestionPipeline
+        from src.vector_store.vector_db import VectorDatabase
         logging.info("Checking vector store on startup...")
         # Initialize vector database to check its state
         logging.error(f"Failed to ensure embeddings on startup: {e}")
         # Don't crash the app, but log the error
         # The app will still start but searches may fail
+    finally:
+        # Release lock
+        if os.path.exists(lock_file):
+            os.remove(lock_file)
+            logging.info(f"Released ingestion lock: {lock_file}")
 def create_app(