lukhsaankumar commited on
Commit
d0d4075
·
1 Parent(s): 49dc015

Deploy DeepFake Detector API - 2026-04-20 01:37:53

Browse files
COLD_START_OPTIMIZATION.md CHANGED
@@ -263,16 +263,37 @@ Use the same procedure before and after changes.
263
  4. Capture per-model load durations from logs.
264
  5. Save a comparison table in this file.
265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  ## Comparison Template (Fill After Implementation)
267
 
268
  | Metric | Baseline (2026-04-20) | After Phase 1 | After Phase 2 | Final |
269
  |---|---:|---:|---:|---:|
270
- | Queue/build to app startup | 28s | | | |
271
- | App startup to model-ready | 94s | | | |
272
- | API model load phase | 21s | | | |
273
- | vit-base load | 13s | | | |
274
- | deit-distilled load | 5s | | | |
275
- | Total visible build timed stages | 20.4s | | | |
276
 
277
  ## Expected Outcome
278
 
 
263
  4. Capture per-model load durations from logs.
264
  5. Save a comparison table in this file.
265
 
266
+ ## Phase 1 Results From Latest Logs
267
+
268
+ Source log window:
269
+ - Build queued at 2026-04-20 05:04:31
270
+ - Application startup begins at 2026-04-20 05:05:07
271
+ - Models loaded successfully at 2026-04-20 05:06:46
272
+
273
+ ### Phase 1 Timing Summary
274
+
275
+ | Segment | Start | End | Duration | Notes |
276
+ |---|---:|---:|---:|---|
277
+ | Queue/build to app startup | 05:04:31 | 05:05:07 | 36s | Includes scheduling, build finalization, image start |
278
+ | App startup to model-ready | 05:05:07 | 05:06:46 | 99s | Time from uvicorn start message to models loaded |
279
+ | API model load phase | 05:06:41 | 05:06:46 | 5s | From "Starting DeepFake Detector API..." to "Models loaded successfully!" |
280
+
281
+ ### Phase 1 Observations
282
+
283
+ - All Hugging Face repos were served from cache at runtime, confirming the build-time prefetch is working.
284
+ - The previous runtime download cost was eliminated from startup.
285
+ - The remaining startup time is now dominated by model wrapper initialization and import/init overhead rather than repo downloads.
286
+
287
  ## Comparison Template (Fill After Implementation)
288
 
289
  | Metric | Baseline (2026-04-20) | After Phase 1 | After Phase 2 | Final |
290
  |---|---:|---:|---:|---:|
291
+ | Queue/build to app startup | 28s | 36s | | |
292
+ | App startup to model-ready | 94s | 99s | | |
293
+ | API model load phase | 21s | 5s | | |
294
+ | vit-base load | 13s | 1s | | |
295
+ | deit-distilled load | 5s | 2s | | |
296
+ | Total visible build timed stages | 20.4s | 28.0s | | |
297
 
298
  ## Expected Outcome
299
 
app/services/model_registry.py CHANGED
@@ -152,24 +152,49 @@ class ModelRegistry:
152
  details={"repo_id": fusion_repo_id}
153
  )
154
 
155
- # Download and load each submodel
156
- for submodel_repo_id in submodel_repos:
157
- await self._load_submodel(submodel_repo_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
  # Create and load fusion wrapper
160
  fusion_wrapper_class = get_fusion_wrapper_class(fusion_config)
161
  logger.info(f"Using fusion wrapper class {fusion_wrapper_class.__name__}")
162
- self._fusion = fusion_wrapper_class(
163
  repo_id=fusion_repo_id,
164
  config=fusion_config,
165
  local_path=fusion_path
166
  )
167
- self._fusion.load()
 
 
168
 
169
  self._is_loaded = True
170
  logger.info(f"Successfully loaded {len(self._submodels)} submodels and fusion model")
171
 
172
- async def _load_submodel(self, repo_id: str) -> None:
173
  """
174
  Download and load a single submodel.
175
 
@@ -198,11 +223,9 @@ class ModelRegistry:
198
  config=config,
199
  local_path=local_path
200
  )
201
- wrapper.load()
202
-
203
- # Store by short name
204
- self._submodels[wrapper.name] = wrapper
205
  logger.info(f"Loaded submodel: {wrapper.name}")
 
206
 
207
  def _read_config(self, local_path: str) -> Dict[str, Any]:
208
  """
 
152
  details={"repo_id": fusion_repo_id}
153
  )
154
 
155
+ # Load submodels concurrently with a small bound to avoid
156
+ # overwhelming the container while still reducing cold-start wall time.
157
+ max_concurrent_loads = 2
158
+ semaphore = asyncio.Semaphore(max_concurrent_loads)
159
+
160
+ async def load_with_limit(repo_id: str):
161
+ async with semaphore:
162
+ return await self._load_submodel(repo_id)
163
+
164
+ load_results = await asyncio.gather(
165
+ *(load_with_limit(submodel_repo_id) for submodel_repo_id in submodel_repos),
166
+ return_exceptions=True
167
+ )
168
+
169
+ errors = [result for result in load_results if isinstance(result, Exception)]
170
+ if errors:
171
+ error_messages = [str(error) for error in errors]
172
+ raise RuntimeError(
173
+ f"Failed to load one or more submodels: {error_messages}"
174
+ )
175
+
176
+ loaded_submodels = {
177
+ wrapper.name: wrapper
178
+ for wrapper in load_results
179
+ if not isinstance(wrapper, Exception)
180
+ }
181
 
182
  # Create and load fusion wrapper
183
  fusion_wrapper_class = get_fusion_wrapper_class(fusion_config)
184
  logger.info(f"Using fusion wrapper class {fusion_wrapper_class.__name__}")
185
+ fusion_wrapper = fusion_wrapper_class(
186
  repo_id=fusion_repo_id,
187
  config=fusion_config,
188
  local_path=fusion_path
189
  )
190
+ fusion_wrapper.load()
191
+ self._fusion = fusion_wrapper
192
+ self._submodels = loaded_submodels
193
 
194
  self._is_loaded = True
195
  logger.info(f"Successfully loaded {len(self._submodels)} submodels and fusion model")
196
 
197
+ async def _load_submodel(self, repo_id: str) -> BaseSubmodelWrapper:
198
  """
199
  Download and load a single submodel.
200
 
 
223
  config=config,
224
  local_path=local_path
225
  )
226
+ await asyncio.to_thread(wrapper.load)
 
 
 
227
  logger.info(f"Loaded submodel: {wrapper.name}")
228
+ return wrapper
229
 
230
  def _read_config(self, local_path: str) -> Dict[str, Any]:
231
  """
requirements.txt CHANGED
@@ -19,7 +19,7 @@ torchvision>=0.15.0,<1.0.0
19
  timm>=0.9.0
20
 
21
  # Machine Learning (for fusion models)
22
- scikit-learn>=1.3.0,<2.0.0
23
  numpy>=1.24.0,<2.0.0
24
 
25
  # Hugging Face Hub
 
19
  timm>=0.9.0
20
 
21
  # Machine Learning (for fusion models)
22
+ scikit-learn==1.6.1
23
  numpy>=1.24.0,<2.0.0
24
 
25
  # Hugging Face Hub
start.sh CHANGED
@@ -12,5 +12,10 @@ fi
12
  # Use PORT from env, .env file, or default to 7860
13
  PORT=${PORT:-7860}
14
 
 
 
 
 
 
15
  echo "Starting uvicorn on port $PORT"
16
  exec uvicorn app.main:app --host 0.0.0.0 --port "$PORT" --log-level info
 
12
  # Use PORT from env, .env file, or default to 7860
13
  PORT=${PORT:-7860}
14
 
15
+ # Ensure OpenMP thread count is a valid integer to avoid libgomp warnings.
16
+ if ! [[ "${OMP_NUM_THREADS:-}" =~ ^[0-9]+$ ]]; then
17
+ export OMP_NUM_THREADS=1
18
+ fi
19
+
20
  echo "Starting uvicorn on port $PORT"
21
  exec uvicorn app.main:app --host 0.0.0.0 --port "$PORT" --log-level info