talha77 commited on
Commit
896ff00
·
verified ·
1 Parent(s): 175ec70

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -1
  2. app.py +9 -0
Dockerfile CHANGED
@@ -2,7 +2,8 @@ FROM python:3.10-slim
2
 
3
  ENV PYTHONUNBUFFERED=1 \
4
  HF_HOME=/data/.cache/huggingface \
5
- OMP_NUM_THREADS=1
 
6
 
7
  WORKDIR /app
8
 
 
2
 
3
  ENV PYTHONUNBUFFERED=1 \
4
  HF_HOME=/data/.cache/huggingface \
5
+ OMP_NUM_THREADS=1 \
6
+ PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
7
 
8
  WORKDIR /app
9
 
app.py CHANGED
@@ -226,6 +226,15 @@ async def tts_endpoint(
226
  _num_samples, _sr, duration = output.get_info()
227
 
228
  audio_bytes = output.to_bytes() # WAV bytes
 
 
 
 
 
 
 
 
 
229
  finally:
230
  # Cleanup temp file used for cloning (if any)
231
  if use_voice_cloning and speaker_path and os.path.isfile(speaker_path):
 
226
  _num_samples, _sr, duration = output.get_info()
227
 
228
  audio_bytes = output.to_bytes() # WAV bytes
229
+ except RuntimeError as exc:
230
+ # Gracefully surface CUDA OOM errors instead of crashing the app
231
+ message = str(exc)
232
+ if "CUDA out of memory" in message:
233
+ raise HTTPException(
234
+ status_code=503,
235
+ detail="CUDA out of memory on the Space GPU. Try shorter text, shorter speaker audio, or fewer concurrent requests.",
236
+ )
237
+ raise
238
  finally:
239
  # Cleanup temp file used for cloning (if any)
240
  if use_voice_cloning and speaker_path and os.path.isfile(speaker_path):