Spaces:

hswift
/

decent-sampler-audio-api

Runtime error

App Files Files Community

hswift commited on Sep 11, 2025

Commit

088b7f2

verified ·

1 Parent(s): 9f76617

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -46

app.py CHANGED Viewed

@@ -2,15 +2,40 @@ from fastapi import FastAPI, HTTPException
 from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 import io
-import wave
-import math
-import struct
-# Initialize the FastAPI app
 app = FastAPI()
-# IMPORTANT: Add CORS middleware to allow requests from your frontend
-# This is crucial for connecting the two parts.
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],  # Allows all origins
@@ -19,55 +44,57 @@ app.add_middleware(
     allow_headers=["*"],  # Allows all headers
 )
-def create_mock_wav_in_memory(prompt: str) -> io.BytesIO:
-    """Generates a 1-second, 440Hz sine wave WAV file in memory."""
-    sample_rate = 44100
-    duration_seconds = 1
-    frequency = 440.0  # A4 note
-    num_samples = duration_seconds * sample_rate
-    # Use io.BytesIO to build the WAV file in memory
-    wav_file_in_memory = io.BytesIO()
-    with wave.open(wav_file_in_memory, 'wb') as w:
-        w.setnchannels(1)  # Mono
-        w.setsampwidth(2)  # 16-bit PCM
-        w.setframerate(sample_rate)
-        for i in range(num_samples):
-            # Calculate the sample value for the sine wave
-            value = int(32767.0 * math.sin(2 * math.pi * frequency * i / sample_rate))
-            # Pack the value as a 16-bit signed integer
-            data = struct.pack('<h', value)
-            w.writeframesraw(data)
-    # Go back to the beginning of the in-memory file so it can be read
-    wav_file_in_memory.seek(0)
-    return wav_file_in_memory
 @app.post("/generate-audio")
 async def generate_audio_endpoint(payload: dict):
     """
-    This endpoint receives a text prompt and returns a generated audio file.
     """
     prompt = payload.get("prompt")
     if not prompt:
         raise HTTPException(status_code=400, detail="A 'prompt' is required in the request body.")
-    # Generate the mock audio data
-    audio_data_stream = create_mock_wav_in_memory(prompt)
-    # Create a safe filename from the prompt
-    safe_filename = "".join(c for c in prompt if c.isalnum() or c in (' ', '_')).rstrip()[:50] + ".wav"
-    # Return the in-memory WAV file as a streaming response
-    return StreamingResponse(
-        audio_data_stream,
-        media_type="audio/wav",
-        headers={"Content-Disposition": f"attachment; filename=\"{safe_filename}\""}
-    )
 @app.get("/")
 def read_root():
     """A simple root endpoint to confirm the API is running."""
-    return {"message": "Mock Audio Generation API is running."}

 from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 import io
+import torch
+from diffusers import AudioLDM2Pipeline
+from scipy.io.wavfile import write as write_wav
+import numpy as np
+import logging
+# --- Setup Logging ---
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# --- Initialize FastAPI App ---
 app = FastAPI()
+# --- Model Loading ---
+# This section loads the AI model when the application starts.
+# This is crucial for performance, so it only happens once.
+MODEL_REPO = "cvssp/audioldm2"
+pipeline = None
+device = "cuda" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if device == "cuda" else torch.float32
+try:
+    logger.info(f"Attempting to load model '{MODEL_REPO}' on device: {device} with dtype: {torch_dtype}")
+    # Load the pre-trained AudioLDM2 pipeline
+    pipeline = AudioLDM2Pipeline.from_pretrained(MODEL_REPO, torch_dtype=torch_dtype)
+    pipeline = pipeline.to(device)
+    logger.info("Model loaded successfully and moved to device.")
+except Exception as e:
+    logger.error(f"Fatal error during model loading: {e}", exc_info=True)
+    # If the model fails to load, the 'pipeline' variable will remain None.
+    # The endpoint will then report an error.
+# --- CORS Middleware ---
+# Allows the frontend website to communicate with this API
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],  # Allows all origins
     allow_headers=["*"],  # Allows all headers
 )
+# --- API Endpoints ---
 @app.post("/generate-audio")
 async def generate_audio_endpoint(payload: dict):
     """
+    Receives a text prompt and returns a generated WAV audio file.
     """
+    if pipeline is None:
+        logger.error("Request received, but model is not loaded.")
+        raise HTTPException(status_code=503, detail="Model is not available or failed to load. Please check the server logs.")
     prompt = payload.get("prompt")
     if not prompt:
         raise HTTPException(status_code=400, detail="A 'prompt' is required in the request body.")
+    try:
+        logger.info(f"Generating audio for prompt: '{prompt}'")
+        # Generate audio. The model works well with negative prompts to guide it.
+        audio = pipeline(
+            prompt,
+            negative_prompt="Low quality, noisy, muffled, mono", # Helps improve quality
+            num_inference_steps=200,  # Higher steps can improve quality
+            audio_length_in_s=2.5     # Generate 2.5-second clips
+        ).audios[0]
+        # The model output is a numpy array with float values from -1.0 to 1.0.
+        # We need to convert it to a 16-bit PCM WAV file.
+        sample_rate = 16000  # The model's default sample rate
+        # Scale to 16-bit integer range
+        audio_int16 = np.int16(audio * 32767)
+        # Use io.BytesIO to build the WAV file in memory
+        wav_file_in_memory = io.BytesIO()
+        write_wav(wav_file_in_memory, sample_rate, audio_int16)
+        wav_file_in_memory.seek(0)  # Rewind to the beginning of the stream
+        safe_filename = "".join(c for c in prompt if c.isalnum() or c in (' ', '_')).rstrip()[:50] + ".wav"
+        logger.info(f"Successfully generated audio for prompt: '{prompt}'")
+        return StreamingResponse(
+            wav_file_in_memory,
+            media_type="audio/wav",
+            headers={"Content-Disposition": f"attachment; filename=\"{safe_filename}\""}
+        )
+    except Exception as e:
+        logger.error(f"Error during audio generation for prompt '{prompt}': {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio generation.")
 @app.get("/")
 def read_root():
     """A simple root endpoint to confirm the API is running."""
+    return {"message": "Decent Sampler Audio Generation API is running."}