hetchyy commited on
Commit
7fc82ea
·
1 Parent(s): 2c29aa7

Update walltime estimation

Browse files
Files changed (2) hide show
  1. config.py +22 -10
  2. src/api/session_api.py +30 -13
config.py CHANGED
@@ -63,21 +63,33 @@ NGRAM_INDEX_PATH = DATA_PATH / f"phoneme_ngram_index_{NGRAM_SIZE}.pkl"
63
  # Inference settings
64
  # =============================================================================
65
 
66
- # VAD lease: linear regression from 121 GPU runs (R²=0.992)
 
67
  def get_vad_duration(minutes):
68
  """GPU seconds needed for VAD based on audio minutes."""
69
- VAD_LEASE_BUFFER = 6 # safety margin over regression (seconds)
70
- return max(3, 0.3 * minutes + VAD_LEASE_BUFFER)
71
 
72
  def get_asr_duration(minutes, model_name="Base"):
73
- """GPU seconds needed for ASR.
74
- """
75
  if model_name == "Large":
76
- return 15
77
- return 7
78
-
79
- ESTIMATE_ALIGNMENT_OVERHEAD_S = 4 # DP alignment + result building + gpu queue
80
- ESTIMATE_CPU_MULTIPLIER = 50
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  # Batching strategy
83
  BATCHING_STRATEGY = "dynamic" # "naive" (fixed count) or "dynamic" (seconds + pad waste)
 
63
  # Inference settings
64
  # =============================================================================
65
 
66
+ # VAD lease: linear regression from 203 GPU runs (R²=0.996)
67
+ # vad_gpu = 0.284 * minutes + 1.80; max residual +2.17s
68
  def get_vad_duration(minutes):
69
  """GPU seconds needed for VAD based on audio minutes."""
70
+ VAD_LEASE_BUFFER = 3 # covers max residual (2.17s) with margin
71
+ return max(3, 0.284 * minutes + 1.80 + VAD_LEASE_BUFFER)
72
 
73
  def get_asr_duration(minutes, model_name="Base"):
74
+ """GPU seconds needed for ASR (constant, independent of audio duration)."""
 
75
  if model_name == "Large":
76
+ return 10 # max observed 8.43s (n=32)
77
+ return 5 # max observed 4.63s (n=177)
78
+
79
+ # Wall-time estimation: direct regression on total time (not sum of leases)
80
+ # GPU Base: total = 0.43 * minutes + 5.5 (R²=0.89, n=177)
81
+ # GPU Large: total = 0.50 * minutes + 11.2 (R²=0.20, n=32)
82
+ # CPU Base: total = 11.2 * minutes + 20.9 (R²=0.46, n=37)
83
+ # CPU Large: total = 25.2 * minutes + 24.4 (R²=0.67, n=11)
84
+ ESTIMATE_GPU_BASE_SLOPE = 0.43
85
+ ESTIMATE_GPU_BASE_INTERCEPT = 5.5
86
+ ESTIMATE_GPU_LARGE_SLOPE = 0.50
87
+ ESTIMATE_GPU_LARGE_INTERCEPT = 11.2
88
+ ESTIMATE_CPU_BASE_SLOPE = 11.2
89
+ ESTIMATE_CPU_BASE_INTERCEPT = 20.9
90
+ ESTIMATE_CPU_LARGE_SLOPE = 25.2
91
+ ESTIMATE_CPU_LARGE_INTERCEPT = 24.4
92
+ ESTIMATE_WALL_BUFFER = 1.5 # multiplier on regression to cover variance
93
 
94
  # Batching strategy
95
  BATCHING_STRATEGY = "dynamic" # "naive" (fixed count) or "dynamic" (seconds + pad waste)
src/api/session_api.py CHANGED
@@ -216,10 +216,17 @@ def _load_session_metadata(audio_id):
216
 
217
  def estimate_duration(endpoint, audio_duration_s=None, audio_id=None,
218
  model_name="Base", device="GPU"):
219
- """Estimate processing duration for a given endpoint."""
 
 
 
 
220
  from config import (
221
- get_vad_duration, get_asr_duration,
222
- ESTIMATE_ALIGNMENT_OVERHEAD_S, ESTIMATE_CPU_MULTIPLIER,
 
 
 
223
  MFA_PROGRESS_SEGMENT_RATE,
224
  )
225
 
@@ -261,16 +268,26 @@ def estimate_duration(endpoint, audio_duration_s=None, audio_id=None,
261
  num_segments = len(segments)
262
  estimate = MFA_PROGRESS_SEGMENT_RATE * num_segments
263
  else:
264
- # --- Pipeline endpoints: VAD + ASR + alignment overhead ---
265
- estimate = 0.0
266
- if endpoint in _VAD_ENDPOINTS:
267
- estimate += get_vad_duration(minutes)
268
- estimate += get_asr_duration(minutes, model_name)
269
- estimate += ESTIMATE_ALIGNMENT_OVERHEAD_S
270
-
271
- # --- CPU multiplier ---
272
- if device == "CPU":
273
- estimate *= ESTIMATE_CPU_MULTIPLIER
 
 
 
 
 
 
 
 
 
 
274
 
275
  rounded = max(5, math.ceil(estimate / 5) * 5)
276
 
 
216
 
217
  def estimate_duration(endpoint, audio_duration_s=None, audio_id=None,
218
  model_name="Base", device="GPU"):
219
+ """Estimate processing duration for a given endpoint.
220
+
221
+ Uses direct wall-time regression (not sum of lease components) fitted on
222
+ 257 runs from hetchyy/quran-aligner-logs v1 dataset.
223
+ """
224
  from config import (
225
+ ESTIMATE_GPU_BASE_SLOPE, ESTIMATE_GPU_BASE_INTERCEPT,
226
+ ESTIMATE_GPU_LARGE_SLOPE, ESTIMATE_GPU_LARGE_INTERCEPT,
227
+ ESTIMATE_CPU_BASE_SLOPE, ESTIMATE_CPU_BASE_INTERCEPT,
228
+ ESTIMATE_CPU_LARGE_SLOPE, ESTIMATE_CPU_LARGE_INTERCEPT,
229
+ ESTIMATE_WALL_BUFFER,
230
  MFA_PROGRESS_SEGMENT_RATE,
231
  )
232
 
 
268
  num_segments = len(segments)
269
  estimate = MFA_PROGRESS_SEGMENT_RATE * num_segments
270
  else:
271
+ # --- Pipeline endpoints: direct wall-time regression ---
272
+ device_upper = (device or "GPU").upper()
273
+ is_large = model_name == "Large"
274
+
275
+ if device_upper == "CPU":
276
+ if is_large:
277
+ estimate = ESTIMATE_CPU_LARGE_SLOPE * minutes + ESTIMATE_CPU_LARGE_INTERCEPT
278
+ else:
279
+ estimate = ESTIMATE_CPU_BASE_SLOPE * minutes + ESTIMATE_CPU_BASE_INTERCEPT
280
+ else:
281
+ if is_large:
282
+ estimate = ESTIMATE_GPU_LARGE_SLOPE * minutes + ESTIMATE_GPU_LARGE_INTERCEPT
283
+ else:
284
+ estimate = ESTIMATE_GPU_BASE_SLOPE * minutes + ESTIMATE_GPU_BASE_INTERCEPT
285
+
286
+ # Retranscribe/realign skip VAD — scale down by ~50% (ASR+DP only)
287
+ if endpoint not in _VAD_ENDPOINTS:
288
+ estimate *= 0.5
289
+
290
+ estimate *= ESTIMATE_WALL_BUFFER
291
 
292
  rounded = max(5, math.ceil(estimate / 5) * 5)
293