Spaces:

Carley1234
/

efectos

Runtime error

Carley1234 commited on Apr 28

Commit

a0240fe

verified ·

1 Parent(s): 9fae321

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -89,7 +89,16 @@ async def generate_effect(job_id: str, prompt: str = Form(...), duration: int =
         def run_inference():
             with torch.no_grad():
-                return audio_pipe(prompt, forward_params={"max_new_tokens": max_tokens})
         result = await asyncio.to_thread(run_inference)
@@ -100,15 +109,20 @@ async def generate_effect(job_id: str, prompt: str = Form(...), duration: int =
         if isinstance(audio_data, torch.Tensor):
             audio_data = audio_data.cpu().numpy()
-        # Squeeze if necessary
-        audio_data = np.squeeze(audio_data)
-        # Normalize audio to -1.0 to 1.0 range if it isn't already
         max_val = np.abs(audio_data).max()
         if max_val > 0:
-            audio_data = audio_data / max_val
-        # Convert to 16-bit PCM (standard WAV format) for better quality/compatibility
         audio_data = (audio_data * 32767).astype(np.int16)
         wav_buf = io.BytesIO()

         def run_inference():
             with torch.no_grad():
+                # Enabling sampling for AudioGen-small as well
+                return audio_pipe(
+                    prompt,
+                    forward_params={
+                        "max_new_tokens": max_tokens,
+                        "do_sample": True,
+                        "temperature": 1.0,
+                        "top_k": 250
+                    }
+                )
         result = await asyncio.to_thread(run_inference)
         if isinstance(audio_data, torch.Tensor):
             audio_data = audio_data.cpu().numpy()
+        # Clean data and handle dimensions
+        audio_data = np.nan_to_num(audio_data)
+        if audio_data.ndim > 1:
+            audio_data = audio_data[0]
+        if audio_data.ndim > 1:
+            audio_data = np.mean(audio_data, axis=0)
+        # Normalize audio to -1.0 to 1.0 range
         max_val = np.abs(audio_data).max()
         if max_val > 0:
+            audio_data = audio_data / (max_val + 1e-6)
+        # Convert to 16-bit PCM (standard WAV format)
         audio_data = (audio_data * 32767).astype(np.int16)
         wav_buf = io.BytesIO()