lukhsaankumar commited on
Commit
9fd7a87
·
1 Parent(s): 3c57c7f

Deploy DeepFake Detector API - 2026-04-20 02:01:56

Browse files
COLD_START_OPTIMIZATION.md CHANGED
@@ -284,16 +284,38 @@ Source log window:
284
  - The previous runtime download cost was eliminated from startup.
285
  - The remaining startup time is now dominated by model wrapper initialization and import/init overhead rather than repo downloads.
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  ## Comparison Template (Fill After Implementation)
288
 
289
  | Metric | Baseline (2026-04-20) | After Phase 1 | After Phase 2 | Final |
290
  |---|---:|---:|---:|---:|
291
- | Queue/build to app startup | 28s | 36s | | |
292
- | App startup to model-ready | 94s | 99s | | |
293
- | API model load phase | 21s | 5s | | |
294
- | vit-base load | 13s | 1s | | |
295
- | deit-distilled load | 5s | 2s | | |
296
- | Total visible build timed stages | 20.4s | 28.0s | | |
297
 
298
  ## Expected Outcome
299
 
 
284
  - The previous runtime download cost was eliminated from startup.
285
  - The remaining startup time is now dominated by model wrapper initialization and import/init overhead rather than repo downloads.
286
 
287
+ ## Phase 2 Results From Latest Logs
288
+
289
+ Source log window:
290
+ - Build queued at 2026-04-20 05:46:19
291
+ - Application startup begins at 2026-04-20 05:48:18
292
+ - Models loaded successfully at 2026-04-20 05:49:56
293
+
294
+ ### Phase 2 Timing Summary
295
+
296
+ | Segment | Start | End | Duration | Notes |
297
+ |---|---:|---:|---:|---|
298
+ | Queue/build to app startup | 05:46:19 | 05:48:18 | 119s | Includes scheduling, build finalization, image start |
299
+ | App startup to model-ready | 05:48:18 | 05:49:56 | 98s | Time from uvicorn start message to models loaded |
300
+ | API model load phase | 05:49:52 | 05:49:56 | 4s | From "Starting DeepFake Detector API..." to "Models loaded successfully!" |
301
+
302
+ ### Phase 2 Observations
303
+
304
+ - Submodel loading now overlaps in runtime logs (bounded parallel local initialization is active).
305
+ - Runtime API model load phase improved slightly (5s -> 4s).
306
+ - End-to-end startup remained dominated by pre-lifespan/init time (98s still much larger than model load slice).
307
+ - Runtime hygiene warnings no longer appeared in this run (no OMP warning and no sklearn pickle version warning).
308
+
309
  ## Comparison Template (Fill After Implementation)
310
 
311
  | Metric | Baseline (2026-04-20) | After Phase 1 | After Phase 2 | Final |
312
  |---|---:|---:|---:|---:|
313
+ | Queue/build to app startup | 28s | 36s | 119s | |
314
+ | App startup to model-ready | 94s | 99s | 98s | |
315
+ | API model load phase | 21s | 5s | 4s | |
316
+ | vit-base load | 13s | 1s | 2s | |
317
+ | deit-distilled load | 5s | 2s | 2s | |
318
+ | Total visible build timed stages | 20.4s | 28.0s | 112.7s | |
319
 
320
  ## Expected Outcome
321
 
app/main.py CHANGED
@@ -4,9 +4,17 @@ FastAPI application entry point.
4
  DeepFake Detector API - Milestone 1: Hugging Face hosted dummy models.
5
  """
6
 
 
 
7
  from contextlib import asynccontextmanager
8
  from typing import AsyncGenerator
9
 
 
 
 
 
 
 
10
  from fastapi import FastAPI, Request
11
  from fastapi.middleware.cors import CORSMiddleware
12
  from fastapi.responses import JSONResponse
@@ -20,6 +28,10 @@ from app.services.model_registry import get_model_registry
20
  # Set up logging
21
  setup_logging()
22
  logger = get_logger(__name__)
 
 
 
 
23
 
24
 
25
  @asynccontextmanager
@@ -31,6 +43,9 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
31
  - Startup: Load models from Hugging Face
32
  - Shutdown: Cleanup resources
33
  """
 
 
 
34
  # Startup
35
  logger.info("Starting DeepFake Detector API...")
36
  logger.info(f"Configuration: HF_FUSION_REPO_ID={settings.HF_FUSION_REPO_ID}")
@@ -38,12 +53,22 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
38
 
39
  # Load models from Hugging Face
40
  try:
 
41
  registry = get_model_registry()
42
  await registry.load_from_fusion_repo(settings.HF_FUSION_REPO_ID)
 
 
 
 
43
  logger.info("Models loaded successfully!")
44
  except Exception as e:
45
  logger.error(f"Failed to load models on startup: {e}")
46
  logger.warning("API will start but /ready will report not_ready until models are loaded")
 
 
 
 
 
47
 
48
  yield # Application runs here
49
 
 
4
  DeepFake Detector API - Milestone 1: Hugging Face hosted dummy models.
5
  """
6
 
7
+ import time
8
+ from datetime import datetime, timezone
9
  from contextlib import asynccontextmanager
10
  from typing import AsyncGenerator
11
 
12
+ MAIN_IMPORT_T0 = time.perf_counter()
13
+ print(
14
+ f"{datetime.now(timezone.utc).isoformat()} | INFO | app.main | phase3 module_import_start",
15
+ flush=True,
16
+ )
17
+
18
  from fastapi import FastAPI, Request
19
  from fastapi.middleware.cors import CORSMiddleware
20
  from fastapi.responses import JSONResponse
 
28
  # Set up logging
29
  setup_logging()
30
  logger = get_logger(__name__)
31
+ logger.info(
32
+ "phase3 module_import_complete duration_seconds=%.3f",
33
+ time.perf_counter() - MAIN_IMPORT_T0,
34
+ )
35
 
36
 
37
  @asynccontextmanager
 
43
  - Startup: Load models from Hugging Face
44
  - Shutdown: Cleanup resources
45
  """
46
+ startup_t0 = time.perf_counter()
47
+ logger.info("phase3 startup_lifespan_begin")
48
+
49
  # Startup
50
  logger.info("Starting DeepFake Detector API...")
51
  logger.info(f"Configuration: HF_FUSION_REPO_ID={settings.HF_FUSION_REPO_ID}")
 
53
 
54
  # Load models from Hugging Face
55
  try:
56
+ model_load_t0 = time.perf_counter()
57
  registry = get_model_registry()
58
  await registry.load_from_fusion_repo(settings.HF_FUSION_REPO_ID)
59
+ logger.info(
60
+ "phase3 startup_model_load_duration_seconds=%.3f",
61
+ time.perf_counter() - model_load_t0,
62
+ )
63
  logger.info("Models loaded successfully!")
64
  except Exception as e:
65
  logger.error(f"Failed to load models on startup: {e}")
66
  logger.warning("API will start but /ready will report not_ready until models are loaded")
67
+
68
+ logger.info(
69
+ "phase3 startup_lifespan_total_duration_seconds=%.3f",
70
+ time.perf_counter() - startup_t0,
71
+ )
72
 
73
  yield # Application runs here
74
 
app/services/model_registry.py CHANGED
@@ -4,6 +4,7 @@ Model registry for managing loaded models.
4
 
5
  import asyncio
6
  import json
 
7
  from pathlib import Path
8
  from typing import Any, Dict, List, Optional, Type
9
 
@@ -128,6 +129,8 @@ class ModelRegistry:
128
  fusion_repo_id: Hugging Face repository ID for fusion model
129
  force_reload: If True, reload even if already loaded
130
  """
 
 
131
  async with self._load_lock:
132
  if self._is_loaded and not force_reload:
133
  logger.info("Models already loaded, skipping")
@@ -136,9 +139,15 @@ class ModelRegistry:
136
  logger.info(f"Loading models from fusion repo: {fusion_repo_id}")
137
 
138
  # Download fusion repo
 
139
  fusion_path = await asyncio.to_thread(
140
  self._hf_service.download_repo, fusion_repo_id
141
  )
 
 
 
 
 
142
 
143
  # Read fusion config
144
  fusion_config = self._read_config(fusion_path)
@@ -154,11 +163,17 @@ class ModelRegistry:
154
 
155
  # Prepare submodels sequentially to avoid concurrent Hugging Face
156
  # download contention, then load the already-downloaded artifacts in parallel.
 
157
  prepared_submodels = []
158
  for submodel_repo_id in submodel_repos:
159
  prepared_submodels.append(
160
  await self._prepare_submodel(submodel_repo_id)
161
  )
 
 
 
 
 
162
 
163
  max_concurrent_loads = 2
164
  semaphore = asyncio.Semaphore(max_concurrent_loads)
@@ -167,10 +182,16 @@ class ModelRegistry:
167
  async with semaphore:
168
  return await self._load_prepared_submodel(prepared_submodel)
169
 
 
170
  load_results = await asyncio.gather(
171
  *(load_with_limit(prepared_submodel) for prepared_submodel in prepared_submodels),
172
  return_exceptions=True
173
  )
 
 
 
 
 
174
 
175
  errors = [result for result in load_results if isinstance(result, Exception)]
176
  if errors:
@@ -193,11 +214,20 @@ class ModelRegistry:
193
  config=fusion_config,
194
  local_path=fusion_path
195
  )
 
196
  fusion_wrapper.load()
 
 
 
 
197
  self._fusion = fusion_wrapper
198
  self._submodels = loaded_submodels
199
 
200
  self._is_loaded = True
 
 
 
 
201
  logger.info(f"Successfully loaded {len(self._submodels)} submodels and fusion model")
202
 
203
  async def _prepare_submodel(self, repo_id: str) -> Dict[str, Any]:
@@ -207,6 +237,7 @@ class ModelRegistry:
207
  This stays sequential to avoid concurrent Hugging Face download issues.
208
  """
209
  logger.info(f"Preparing submodel: {repo_id}")
 
210
 
211
  local_path = await asyncio.to_thread(
212
  self._hf_service.download_repo, repo_id
@@ -214,6 +245,12 @@ class ModelRegistry:
214
  config = self._read_config(local_path)
215
  wrapper_class = get_wrapper_class(config)
216
 
 
 
 
 
 
 
217
  return {
218
  "repo_id": repo_id,
219
  "local_path": local_path,
@@ -232,6 +269,7 @@ class ModelRegistry:
232
 
233
  logger.info(f"Loading submodel: {repo_id}")
234
  logger.info(f"Using wrapper class {wrapper_class.__name__} for {repo_id}")
 
235
 
236
  # Create and load wrapper
237
  wrapper = wrapper_class(
@@ -241,6 +279,11 @@ class ModelRegistry:
241
  )
242
  await asyncio.to_thread(wrapper.load)
243
  logger.info(f"Loaded submodel: {wrapper.name}")
 
 
 
 
 
244
  return wrapper
245
 
246
  def _read_config(self, local_path: str) -> Dict[str, Any]:
 
4
 
5
  import asyncio
6
  import json
7
+ import time
8
  from pathlib import Path
9
  from typing import Any, Dict, List, Optional, Type
10
 
 
129
  fusion_repo_id: Hugging Face repository ID for fusion model
130
  force_reload: If True, reload even if already loaded
131
  """
132
+ total_t0 = time.perf_counter()
133
+
134
  async with self._load_lock:
135
  if self._is_loaded and not force_reload:
136
  logger.info("Models already loaded, skipping")
 
139
  logger.info(f"Loading models from fusion repo: {fusion_repo_id}")
140
 
141
  # Download fusion repo
142
+ fusion_download_t0 = time.perf_counter()
143
  fusion_path = await asyncio.to_thread(
144
  self._hf_service.download_repo, fusion_repo_id
145
  )
146
+ logger.info(
147
+ "phase3 fusion_repo_download_duration_seconds=%.3f repo_id=%s",
148
+ time.perf_counter() - fusion_download_t0,
149
+ fusion_repo_id,
150
+ )
151
 
152
  # Read fusion config
153
  fusion_config = self._read_config(fusion_path)
 
163
 
164
  # Prepare submodels sequentially to avoid concurrent Hugging Face
165
  # download contention, then load the already-downloaded artifacts in parallel.
166
+ prepare_t0 = time.perf_counter()
167
  prepared_submodels = []
168
  for submodel_repo_id in submodel_repos:
169
  prepared_submodels.append(
170
  await self._prepare_submodel(submodel_repo_id)
171
  )
172
+ logger.info(
173
+ "phase3 submodel_prepare_total_duration_seconds=%.3f count=%d",
174
+ time.perf_counter() - prepare_t0,
175
+ len(prepared_submodels),
176
+ )
177
 
178
  max_concurrent_loads = 2
179
  semaphore = asyncio.Semaphore(max_concurrent_loads)
 
182
  async with semaphore:
183
  return await self._load_prepared_submodel(prepared_submodel)
184
 
185
+ load_t0 = time.perf_counter()
186
  load_results = await asyncio.gather(
187
  *(load_with_limit(prepared_submodel) for prepared_submodel in prepared_submodels),
188
  return_exceptions=True
189
  )
190
+ logger.info(
191
+ "phase3 submodel_parallel_load_total_duration_seconds=%.3f concurrency=%d",
192
+ time.perf_counter() - load_t0,
193
+ max_concurrent_loads,
194
+ )
195
 
196
  errors = [result for result in load_results if isinstance(result, Exception)]
197
  if errors:
 
214
  config=fusion_config,
215
  local_path=fusion_path
216
  )
217
+ fusion_wrapper_t0 = time.perf_counter()
218
  fusion_wrapper.load()
219
+ logger.info(
220
+ "phase3 fusion_wrapper_load_duration_seconds=%.3f",
221
+ time.perf_counter() - fusion_wrapper_t0,
222
+ )
223
  self._fusion = fusion_wrapper
224
  self._submodels = loaded_submodels
225
 
226
  self._is_loaded = True
227
+ logger.info(
228
+ "phase3 load_from_fusion_repo_total_duration_seconds=%.3f",
229
+ time.perf_counter() - total_t0,
230
+ )
231
  logger.info(f"Successfully loaded {len(self._submodels)} submodels and fusion model")
232
 
233
  async def _prepare_submodel(self, repo_id: str) -> Dict[str, Any]:
 
237
  This stays sequential to avoid concurrent Hugging Face download issues.
238
  """
239
  logger.info(f"Preparing submodel: {repo_id}")
240
+ prepare_t0 = time.perf_counter()
241
 
242
  local_path = await asyncio.to_thread(
243
  self._hf_service.download_repo, repo_id
 
245
  config = self._read_config(local_path)
246
  wrapper_class = get_wrapper_class(config)
247
 
248
+ logger.info(
249
+ "phase3 prepare_submodel_duration_seconds=%.3f repo_id=%s",
250
+ time.perf_counter() - prepare_t0,
251
+ repo_id,
252
+ )
253
+
254
  return {
255
  "repo_id": repo_id,
256
  "local_path": local_path,
 
269
 
270
  logger.info(f"Loading submodel: {repo_id}")
271
  logger.info(f"Using wrapper class {wrapper_class.__name__} for {repo_id}")
272
+ load_t0 = time.perf_counter()
273
 
274
  # Create and load wrapper
275
  wrapper = wrapper_class(
 
279
  )
280
  await asyncio.to_thread(wrapper.load)
281
  logger.info(f"Loaded submodel: {wrapper.name}")
282
+ logger.info(
283
+ "phase3 load_prepared_submodel_duration_seconds=%.3f repo_id=%s",
284
+ time.perf_counter() - load_t0,
285
+ repo_id,
286
+ )
287
  return wrapper
288
 
289
  def _read_config(self, local_path: str) -> Dict[str, Any]: