Spaces:

hswift
/

decent-sampler-audio-api

Runtime error

App Files Files Community

hswift commited on Sep 12, 2025

Commit

375db4a

verified ·

1 Parent(s): 4c26d1d

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -19

app.py CHANGED Viewed

@@ -1,4 +1,19 @@
 import os
 import torch
 import tempfile
 from fastapi import FastAPI, HTTPException
@@ -8,7 +23,6 @@ from pydantic import BaseModel
 from diffusers import AudioLDMPipeline
 from scipy.io.wavfile import write as write_wav
 import numpy as np
-import logging
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -35,26 +49,22 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if "cuda" in device else torch.float32
 logger.info(f"Using device: {device} with dtype: {torch_dtype}")
-# --- FIX FOR PERMISSION ERROR ---
-# The environment we're running in doesn't allow writing to the default '/.cache' directory.
-# We explicitly define a writable directory within '/tmp' for the model cache.
-CACHE_DIR = "/tmp/huggingface_cache"
-os.makedirs(CACHE_DIR, exist_ok=True)
 logger.info(f"Using model cache directory: {CACHE_DIR}")
 try:
     # Use the stable, recommended model
     repo_id = "cvssp/audioldm-s-full-v2"
     pipe = AudioLDMPipeline.from_pretrained(
         repo_id,
         torch_dtype=torch_dtype,
-        cache_dir=CACHE_DIR # Pass the writable cache directory to the loader
     )
     pipe = pipe.to(device)
     logger.info(f"Successfully loaded model: {repo_id}")
 except Exception as e:
-    logger.error(f"Failed to load the model: {e}")
     pipe = None # Ensure pipe is None if loading fails
 # --- API Endpoint ---
@@ -66,10 +76,8 @@ async def generate_audio_endpoint(request: AudioRequest):
     prompt = request.prompt
     logger.info(f"Generating audio for prompt: '{prompt}'")
-    # Use a temporary file to store the generated audio
     temp_file_path = ""
     try:
-        # Generate the audio waveform
         audio = pipe(
             prompt,
             num_inference_steps=200,
@@ -79,28 +87,21 @@ async def generate_audio_endpoint(request: AudioRequest):
         sample_rate = 16000
-        # Create a temporary file to save the audio
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
             temp_file_path = temp_file.name
-            # Normalize and convert to 16-bit integer format for WAV
             audio_int16 = (audio * 32767).astype(np.int16)
-            # Write the WAV file
             write_wav(temp_file_path, sample_rate, audio_int16)
             logger.info(f"Audio saved to temporary file: {temp_file_path}")
-        # Return the audio file as a response.
         return FileResponse(
             path=temp_file_path,
             media_type='audio/wav',
             filename=f"{prompt[:50].replace(' ', '_')}.wav",
-            background=os.remove(temp_file_path) # Clean up the file after sending
         )
     except Exception as e:
         logger.error(f"Error during audio generation for prompt '{prompt}': {e}", exc_info=True)
-        # Clean up the temp file if it was created before the error
         if temp_file_path and os.path.exists(temp_file_path):
             os.remove(temp_file_path)
         raise HTTPException(status_code=500, detail=str(e))

 import os
+import logging
+# --- FIX FOR ALL PERMISSION ERRORS ---
+# Set environment variables BEFORE importing torch or diffusers.
+# This forces all underlying libraries (huggingface_hub, torch, etc.)
+# to use a writable directory inside /tmp, avoiding any permission errors.
+CACHE_DIR = "/tmp/huggingface_cache"
+os.environ['HF_HOME'] = CACHE_DIR
+os.environ['HF_HUB_CACHE'] = os.path.join(CACHE_DIR, 'hub')
+os.environ['TORCH_HOME'] = os.path.join(CACHE_DIR, 'torch')
+os.makedirs(os.path.join(CACHE_DIR, 'hub'), exist_ok=True)
+os.makedirs(os.path.join(CACHE_DIR, 'torch'), exist_ok=True)
+# Now it's safe to import the other libraries
 import torch
 import tempfile
 from fastapi import FastAPI, HTTPException
 from diffusers import AudioLDMPipeline
 from scipy.io.wavfile import write as write_wav
 import numpy as np
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 torch_dtype = torch.float16 if "cuda" in device else torch.float32
 logger.info(f"Using device: {device} with dtype: {torch_dtype}")
 logger.info(f"Using model cache directory: {CACHE_DIR}")
+pipe = None
 try:
     # Use the stable, recommended model
     repo_id = "cvssp/audioldm-s-full-v2"
     pipe = AudioLDMPipeline.from_pretrained(
         repo_id,
         torch_dtype=torch_dtype,
+        # cache_dir is still good practice but the environment variables are the real fix
+        cache_dir=CACHE_DIR
     )
     pipe = pipe.to(device)
     logger.info(f"Successfully loaded model: {repo_id}")
 except Exception as e:
+    logger.error(f"Failed to load the model: {e}", exc_info=True)
     pipe = None # Ensure pipe is None if loading fails
 # --- API Endpoint ---
     prompt = request.prompt
     logger.info(f"Generating audio for prompt: '{prompt}'")
     temp_file_path = ""
     try:
         audio = pipe(
             prompt,
             num_inference_steps=200,
         sample_rate = 16000
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
             temp_file_path = temp_file.name
             audio_int16 = (audio * 32767).astype(np.int16)
             write_wav(temp_file_path, sample_rate, audio_int16)
             logger.info(f"Audio saved to temporary file: {temp_file_path}")
         return FileResponse(
             path=temp_file_path,
             media_type='audio/wav',
             filename=f"{prompt[:50].replace(' ', '_')}.wav",
+            background=os.remove(temp_file_path)
         )
     except Exception as e:
         logger.error(f"Error during audio generation for prompt '{prompt}': {e}", exc_info=True)
         if temp_file_path and os.path.exists(temp_file_path):
             os.remove(temp_file_path)
         raise HTTPException(status_code=500, detail=str(e))