Spaces:

lukhsaankumar
/

DeepFakeDetectorBackend

Sleeping

App Files Files Community

lukhsaankumar commited on Apr 20

Commit

d0d4075

1 Parent(s): 49dc015

Deploy DeepFake Detector API - 2026-04-20 01:37:53

Browse files

Files changed (4) hide show

COLD_START_OPTIMIZATION.md +27 -6
app/services/model_registry.py +33 -10
requirements.txt +1 -1
start.sh +5 -0

COLD_START_OPTIMIZATION.md CHANGED Viewed

@@ -263,16 +263,37 @@ Use the same procedure before and after changes.
 4. Capture per-model load durations from logs.
 5. Save a comparison table in this file.
 ## Comparison Template (Fill After Implementation)
 | Metric | Baseline (2026-04-20) | After Phase 1 | After Phase 2 | Final |
 |---|---:|---:|---:|---:|
-| Queue/build to app startup | 28s |  |  |  |
-| App startup to model-ready | 94s |  |  |  |
-| API model load phase | 21s |  |  |  |
-| vit-base load | 13s |  |  |  |
-| deit-distilled load | 5s |  |  |  |
-| Total visible build timed stages | 20.4s |  |  |  |
 ## Expected Outcome

 4. Capture per-model load durations from logs.
 5. Save a comparison table in this file.
+## Phase 1 Results From Latest Logs
+Source log window:
+- Build queued at 2026-04-20 05:04:31
+- Application startup begins at 2026-04-20 05:05:07
+- Models loaded successfully at 2026-04-20 05:06:46
+### Phase 1 Timing Summary
+| Segment | Start | End | Duration | Notes |
+|---|---:|---:|---:|---|
+| Queue/build to app startup | 05:04:31 | 05:05:07 | 36s | Includes scheduling, build finalization, image start |
+| App startup to model-ready | 05:05:07 | 05:06:46 | 99s | Time from uvicorn start message to models loaded |
+| API model load phase | 05:06:41 | 05:06:46 | 5s | From "Starting DeepFake Detector API..." to "Models loaded successfully!" |
+### Phase 1 Observations
+- All Hugging Face repos were served from cache at runtime, confirming the build-time prefetch is working.
+- The previous runtime download cost was eliminated from startup.
+- The remaining startup time is now dominated by model wrapper initialization and import/init overhead rather than repo downloads.
 ## Comparison Template (Fill After Implementation)
 | Metric | Baseline (2026-04-20) | After Phase 1 | After Phase 2 | Final |
 |---|---:|---:|---:|---:|
+| Queue/build to app startup | 28s | 36s |  |  |
+| App startup to model-ready | 94s | 99s |  |  |
+| API model load phase | 21s | 5s |  |  |
+| vit-base load | 13s | 1s |  |  |
+| deit-distilled load | 5s | 2s |  |  |
+| Total visible build timed stages | 20.4s | 28.0s |  |  |
 ## Expected Outcome

app/services/model_registry.py CHANGED Viewed

@@ -152,24 +152,49 @@ class ModelRegistry:
                     details={"repo_id": fusion_repo_id}
                 )
-            # Download and load each submodel
-            for submodel_repo_id in submodel_repos:
-                await self._load_submodel(submodel_repo_id)
             # Create and load fusion wrapper
             fusion_wrapper_class = get_fusion_wrapper_class(fusion_config)
             logger.info(f"Using fusion wrapper class {fusion_wrapper_class.__name__}")
-            self._fusion = fusion_wrapper_class(
                 repo_id=fusion_repo_id,
                 config=fusion_config,
                 local_path=fusion_path
             )
-            self._fusion.load()
             self._is_loaded = True
             logger.info(f"Successfully loaded {len(self._submodels)} submodels and fusion model")
-    async def _load_submodel(self, repo_id: str) -> None:
         """
         Download and load a single submodel.
@@ -198,11 +223,9 @@ class ModelRegistry:
             config=config,
             local_path=local_path
         )
-        wrapper.load()
-        # Store by short name
-        self._submodels[wrapper.name] = wrapper
         logger.info(f"Loaded submodel: {wrapper.name}")
     def _read_config(self, local_path: str) -> Dict[str, Any]:
         """

                     details={"repo_id": fusion_repo_id}
                 )
+            # Load submodels concurrently with a small bound to avoid
+            # overwhelming the container while still reducing cold-start wall time.
+            max_concurrent_loads = 2
+            semaphore = asyncio.Semaphore(max_concurrent_loads)
+            async def load_with_limit(repo_id: str):
+                async with semaphore:
+                    return await self._load_submodel(repo_id)
+            load_results = await asyncio.gather(
+                *(load_with_limit(submodel_repo_id) for submodel_repo_id in submodel_repos),
+                return_exceptions=True
+            )
+            errors = [result for result in load_results if isinstance(result, Exception)]
+            if errors:
+                error_messages = [str(error) for error in errors]
+                raise RuntimeError(
+                    f"Failed to load one or more submodels: {error_messages}"
+                )
+            loaded_submodels = {
+                wrapper.name: wrapper
+                for wrapper in load_results
+                if not isinstance(wrapper, Exception)
+            }
             # Create and load fusion wrapper
             fusion_wrapper_class = get_fusion_wrapper_class(fusion_config)
             logger.info(f"Using fusion wrapper class {fusion_wrapper_class.__name__}")
+            fusion_wrapper = fusion_wrapper_class(
                 repo_id=fusion_repo_id,
                 config=fusion_config,
                 local_path=fusion_path
             )
+            fusion_wrapper.load()
+            self._fusion = fusion_wrapper
+            self._submodels = loaded_submodels
             self._is_loaded = True
             logger.info(f"Successfully loaded {len(self._submodels)} submodels and fusion model")
+    async def _load_submodel(self, repo_id: str) -> BaseSubmodelWrapper:
         """
         Download and load a single submodel.
             config=config,
             local_path=local_path
         )
+        await asyncio.to_thread(wrapper.load)
         logger.info(f"Loaded submodel: {wrapper.name}")
+        return wrapper
     def _read_config(self, local_path: str) -> Dict[str, Any]:
         """

requirements.txt CHANGED Viewed

@@ -19,7 +19,7 @@ torchvision>=0.15.0,<1.0.0
 timm>=0.9.0
 # Machine Learning (for fusion models)
-scikit-learn>=1.3.0,<2.0.0
 numpy>=1.24.0,<2.0.0
 # Hugging Face Hub

 timm>=0.9.0
 # Machine Learning (for fusion models)
+scikit-learn==1.6.1
 numpy>=1.24.0,<2.0.0
 # Hugging Face Hub

start.sh CHANGED Viewed

@@ -12,5 +12,10 @@ fi
 # Use PORT from env, .env file, or default to 7860
 PORT=${PORT:-7860}
 echo "Starting uvicorn on port $PORT"
 exec uvicorn app.main:app --host 0.0.0.0 --port "$PORT" --log-level info

 # Use PORT from env, .env file, or default to 7860
 PORT=${PORT:-7860}
+# Ensure OpenMP thread count is a valid integer to avoid libgomp warnings.
+if ! [[ "${OMP_NUM_THREADS:-}" =~ ^[0-9]+$ ]]; then
+    export OMP_NUM_THREADS=1
+fi
 echo "Starting uvicorn on port $PORT"
 exec uvicorn app.main:app --host 0.0.0.0 --port "$PORT" --log-level info