Claude commited on
Commit
575f8ed
·
unverified ·
1 Parent(s): 0171a09

fix(frontend): add cold start retry with waking_up state (BUG-001)

Browse files

- Add 'waking_up' to JobStatus type
- Implement exponential backoff retry logic in useSegmentation hook
- Detect 503 errors and network failures for cold start
- Max 5 retries with delays: 2s, 4s, 8s, 16s, 30s (capped)
- Update ProgressIndicator with yellow styling for waking_up state
- Show user-friendly message during backend wake-up

frontend/src/components/ProgressIndicator.tsx CHANGED
@@ -24,17 +24,22 @@ export function ProgressIndicator({
24
  }: ProgressIndicatorProps) {
25
  const isError = status === 'failed'
26
  const isComplete = status === 'completed'
 
27
 
28
  // Determine bar color based on status
29
  const barColorClass = isError
30
  ? 'bg-red-500'
31
  : isComplete
32
  ? 'bg-green-500'
33
- : 'bg-blue-500'
 
 
34
 
35
- // Animate the bar while running
36
  const animationClass =
37
- status === 'running' || status === 'pending' ? 'animate-pulse' : ''
 
 
38
 
39
  return (
40
  <div className="bg-gray-800 rounded-lg p-4 space-y-3">
@@ -73,10 +78,12 @@ export function ProgressIndicator({
73
  ? 'text-red-400'
74
  : isComplete
75
  ? 'text-green-400'
76
- : 'text-blue-400'
 
 
77
  }`}
78
  >
79
- {status}
80
  </span>
81
  </div>
82
  </div>
 
24
  }: ProgressIndicatorProps) {
25
  const isError = status === 'failed'
26
  const isComplete = status === 'completed'
27
+ const isWakingUp = status === 'waking_up'
28
 
29
  // Determine bar color based on status
30
  const barColorClass = isError
31
  ? 'bg-red-500'
32
  : isComplete
33
  ? 'bg-green-500'
34
+ : isWakingUp
35
+ ? 'bg-yellow-500'
36
+ : 'bg-blue-500'
37
 
38
+ // Animate the bar while running or waking up
39
  const animationClass =
40
+ status === 'running' || status === 'pending' || status === 'waking_up'
41
+ ? 'animate-pulse'
42
+ : ''
43
 
44
  return (
45
  <div className="bg-gray-800 rounded-lg p-4 space-y-3">
 
78
  ? 'text-red-400'
79
  : isComplete
80
  ? 'text-green-400'
81
+ : isWakingUp
82
+ ? 'text-yellow-400'
83
+ : 'text-blue-400'
84
  }`}
85
  >
86
+ {status === 'waking_up' ? 'waking up' : status}
87
  </span>
88
  </div>
89
  </div>
frontend/src/hooks/useSegmentation.ts CHANGED
@@ -1,10 +1,21 @@
1
  import { useState, useCallback, useRef, useEffect } from 'react'
2
- import { apiClient } from '../api/client'
3
  import type { SegmentationResult, JobStatus } from '../types'
4
 
5
  // Polling interval in milliseconds
6
  const POLLING_INTERVAL = 2000
7
 
 
 
 
 
 
 
 
 
 
 
 
8
  /**
9
  * Hook for running segmentation with async job polling.
10
  *
@@ -84,6 +95,7 @@ export function useSegmentation() {
84
  diceScore: response.result.diceScore,
85
  volumeMl: response.result.volumeMl,
86
  elapsedSeconds: response.result.elapsedSeconds,
 
87
  },
88
  })
89
  }
@@ -108,24 +120,34 @@ export function useSegmentation() {
108
 
109
  /**
110
  * Start segmentation job and begin polling
 
 
 
 
111
  */
112
  const runSegmentation = useCallback(
113
- async (caseId: string, fastMode = true) => {
114
- // Cancel any existing job/polling
115
- stopPolling()
116
- abortControllerRef.current?.abort()
 
 
117
 
118
- const abortController = new AbortController()
119
- abortControllerRef.current = abortController
120
 
121
- // Reset state
122
- setError(null)
123
- setResult(null)
124
- setProgress(0)
125
- setProgressMessage('Creating job...')
126
- setJobStatus('pending')
127
- setElapsedSeconds(undefined)
128
- setIsLoading(true)
 
 
 
 
129
 
130
  try {
131
  // Create the job
@@ -153,7 +175,37 @@ export function useSegmentation() {
153
  // Ignore abort errors
154
  if (err instanceof Error && err.name === 'AbortError') return
155
 
156
- const message = err instanceof Error ? err.message : 'Failed to start job'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  setError(message)
158
  setIsLoading(false)
159
  setJobStatus('failed')
 
1
  import { useState, useCallback, useRef, useEffect } from 'react'
2
+ import { apiClient, ApiError } from '../api/client'
3
  import type { SegmentationResult, JobStatus } from '../types'
4
 
5
  // Polling interval in milliseconds
6
  const POLLING_INTERVAL = 2000
7
 
8
+ // Cold start retry configuration
9
+ const MAX_COLD_START_RETRIES = 5
10
+ const INITIAL_RETRY_DELAY = 2000 // 2 seconds
11
+ const MAX_RETRY_DELAY = 30000 // 30 seconds
12
+
13
+ /**
14
+ * Sleep utility for async delays
15
+ */
16
+ const sleep = (ms: number): Promise<void> =>
17
+ new Promise((resolve) => setTimeout(resolve, ms))
18
+
19
  /**
20
  * Hook for running segmentation with async job polling.
21
  *
 
95
  diceScore: response.result.diceScore,
96
  volumeMl: response.result.volumeMl,
97
  elapsedSeconds: response.result.elapsedSeconds,
98
+ warning: response.result.warning,
99
  },
100
  })
101
  }
 
120
 
121
  /**
122
  * Start segmentation job and begin polling
123
+ *
124
+ * @param caseId - The case ID to process
125
+ * @param fastMode - Whether to use fast inference mode
126
+ * @param retryCount - Internal retry counter for cold start handling (do not set manually)
127
  */
128
  const runSegmentation = useCallback(
129
+ async (caseId: string, fastMode = true, retryCount = 0) => {
130
+ // Only reset state on first attempt (not retries)
131
+ if (retryCount === 0) {
132
+ // Cancel any existing job/polling
133
+ stopPolling()
134
+ abortControllerRef.current?.abort()
135
 
136
+ const abortController = new AbortController()
137
+ abortControllerRef.current = abortController
138
 
139
+ // Reset state
140
+ setError(null)
141
+ setResult(null)
142
+ setProgress(0)
143
+ setProgressMessage('Creating job...')
144
+ setJobStatus('pending')
145
+ setElapsedSeconds(undefined)
146
+ setIsLoading(true)
147
+ }
148
+
149
+ const abortController = abortControllerRef.current
150
+ if (!abortController) return
151
 
152
  try {
153
  // Create the job
 
175
  // Ignore abort errors
176
  if (err instanceof Error && err.name === 'AbortError') return
177
 
178
+ // Detect cold start (503 Service Unavailable or network failure)
179
+ const is503 = err instanceof ApiError && err.status === 503
180
+ const isNetworkError =
181
+ err instanceof TypeError && err.message.toLowerCase().includes('fetch')
182
+
183
+ // Retry on cold start errors with exponential backoff
184
+ if ((is503 || isNetworkError) && retryCount < MAX_COLD_START_RETRIES) {
185
+ setJobStatus('waking_up')
186
+ setProgressMessage(
187
+ `Backend is waking up... Please wait (~30-60s). Retry ${retryCount + 1}/${MAX_COLD_START_RETRIES}`
188
+ )
189
+ setProgress(0)
190
+
191
+ // Exponential backoff: 2s, 4s, 8s, 16s, 30s (capped)
192
+ const delay = Math.min(
193
+ INITIAL_RETRY_DELAY * Math.pow(2, retryCount),
194
+ MAX_RETRY_DELAY
195
+ )
196
+ await sleep(delay)
197
+
198
+ // Recursive retry
199
+ return runSegmentation(caseId, fastMode, retryCount + 1)
200
+ }
201
+
202
+ // Max retries exceeded or non-retryable error
203
+ const message =
204
+ is503 || isNetworkError
205
+ ? 'Backend failed to wake up. Please try again later.'
206
+ : err instanceof Error
207
+ ? err.message
208
+ : 'Failed to start job'
209
  setError(message)
210
  setIsLoading(false)
211
  setJobStatus('failed')
frontend/src/types/index.ts CHANGED
@@ -4,6 +4,7 @@ export interface Metrics {
4
  diceScore: number | null
5
  volumeMl: number | null
6
  elapsedSeconds: number
 
7
  }
8
 
9
  // Final segmentation result with URLs and metrics
@@ -26,10 +27,11 @@ export interface SegmentResponse {
26
  elapsedSeconds: number
27
  dwiUrl: string
28
  predictionUrl: string
 
29
  }
30
 
31
  // Job Status Types
32
- export type JobStatus = 'pending' | 'running' | 'completed' | 'failed'
33
 
34
  // Response from POST /api/segment (job creation)
35
  export interface CreateJobResponse {
 
4
  diceScore: number | null
5
  volumeMl: number | null
6
  elapsedSeconds: number
7
+ warning?: string | null
8
  }
9
 
10
  // Final segmentation result with URLs and metrics
 
27
  elapsedSeconds: number
28
  dwiUrl: string
29
  predictionUrl: string
30
+ warning?: string | null
31
  }
32
 
33
  // Job Status Types
34
+ export type JobStatus = 'pending' | 'running' | 'completed' | 'failed' | 'waking_up'
35
 
36
  // Response from POST /api/segment (job creation)
37
  export interface CreateJobResponse {