Spaces:
Runtime error
Runtime error
fix audio loading
Browse files
notebooks/test_model.ipynb
CHANGED
|
@@ -46,6 +46,7 @@
|
|
| 46 |
"source": [
|
| 47 |
"import torch\n",
|
| 48 |
"import random\n",
|
|
|
|
| 49 |
"import numpy as np\n",
|
| 50 |
"from datasets import load_dataset\n",
|
| 51 |
"from IPython.display import Audio\n",
|
|
@@ -266,8 +267,8 @@
|
|
| 266 |
"source": [
|
| 267 |
"start_step = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
|
| 268 |
"overlap_secs = 2 #@param {type:\"integer\"}\n",
|
| 269 |
-
"
|
| 270 |
-
"overlap_samples = overlap_secs *
|
| 271 |
"slice_size = mel.x_res * mel.hop_length\n",
|
| 272 |
"stride = slice_size - overlap_samples\n",
|
| 273 |
"generator = torch.Generator(device=device)\n",
|
|
@@ -275,9 +276,9 @@
|
|
| 275 |
"print(f'Seed = {seed}')\n",
|
| 276 |
"track = np.array([])\n",
|
| 277 |
"not_first = 0\n",
|
| 278 |
-
"for sample in range(len(
|
| 279 |
" generator.manual_seed(seed)\n",
|
| 280 |
-
" audio = np.array(
|
| 281 |
" if not_first:\n",
|
| 282 |
" # Normalize and re-insert generated audio\n",
|
| 283 |
" audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
|
|
@@ -309,7 +310,7 @@
|
|
| 309 |
"outputs": [],
|
| 310 |
"source": [
|
| 311 |
"slice = 3 #@param {type:\"integer\"}\n",
|
| 312 |
-
"raw_audio =
|
| 313 |
"_, (sample_rate,\n",
|
| 314 |
" audio2) = audio_diffusion.generate_spectrogram_and_audio_from_audio(\n",
|
| 315 |
" raw_audio=raw_audio,\n",
|
|
@@ -507,7 +508,7 @@
|
|
| 507 |
"metadata": {},
|
| 508 |
"outputs": [],
|
| 509 |
"source": [
|
| 510 |
-
"model_id = \"teticio/latent-audio-diffusion-ddim-256
|
| 511 |
]
|
| 512 |
},
|
| 513 |
{
|
|
|
|
| 46 |
"source": [
|
| 47 |
"import torch\n",
|
| 48 |
"import random\n",
|
| 49 |
+
"import librosa\n",
|
| 50 |
"import numpy as np\n",
|
| 51 |
"from datasets import load_dataset\n",
|
| 52 |
"from IPython.display import Audio\n",
|
|
|
|
| 267 |
"source": [
|
| 268 |
"start_step = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
|
| 269 |
"overlap_secs = 2 #@param {type:\"integer\"}\n",
|
| 270 |
+
"track_audio, _ = librosa.load(audio_file, mono=True, sr=sample_rate)\n",
|
| 271 |
+
"overlap_samples = overlap_secs * sample_rate\n",
|
| 272 |
"slice_size = mel.x_res * mel.hop_length\n",
|
| 273 |
"stride = slice_size - overlap_samples\n",
|
| 274 |
"generator = torch.Generator(device=device)\n",
|
|
|
|
| 276 |
"print(f'Seed = {seed}')\n",
|
| 277 |
"track = np.array([])\n",
|
| 278 |
"not_first = 0\n",
|
| 279 |
+
"for sample in range(len(track_audio) // stride):\n",
|
| 280 |
" generator.manual_seed(seed)\n",
|
| 281 |
+
" audio = np.array(track_audio[sample * stride:sample * stride + slice_size])\n",
|
| 282 |
" if not_first:\n",
|
| 283 |
" # Normalize and re-insert generated audio\n",
|
| 284 |
" audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
|
|
|
|
| 310 |
"outputs": [],
|
| 311 |
"source": [
|
| 312 |
"slice = 3 #@param {type:\"integer\"}\n",
|
| 313 |
+
"raw_audio = track_audio[sample * stride:sample * stride + slice_size]\n",
|
| 314 |
"_, (sample_rate,\n",
|
| 315 |
" audio2) = audio_diffusion.generate_spectrogram_and_audio_from_audio(\n",
|
| 316 |
" raw_audio=raw_audio,\n",
|
|
|
|
| 508 |
"metadata": {},
|
| 509 |
"outputs": [],
|
| 510 |
"source": [
|
| 511 |
+
"model_id = \"teticio/latent-audio-diffusion-ddim-256\" #@param [\"teticio/latent-audio-diffusion-256\", \"teticio/latent-audio-diffusion-ddim-256\"]"
|
| 512 |
]
|
| 513 |
},
|
| 514 |
{
|