Spaces:
Runtime error
Runtime error
fix remix
Browse files- notebooks/test_model.ipynb +15 -36
notebooks/test_model.ipynb
CHANGED
|
@@ -90,7 +90,7 @@
|
|
| 90 |
{
|
| 91 |
"cell_type": "code",
|
| 92 |
"execution_count": null,
|
| 93 |
-
"id": "
|
| 94 |
"metadata": {},
|
| 95 |
"outputs": [],
|
| 96 |
"source": [
|
|
@@ -181,7 +181,7 @@
|
|
| 181 |
},
|
| 182 |
{
|
| 183 |
"cell_type": "markdown",
|
| 184 |
-
"id": "
|
| 185 |
"metadata": {},
|
| 186 |
"source": [
|
| 187 |
"### Generate continuations (\"out-painting\")"
|
|
@@ -190,7 +190,7 @@
|
|
| 190 |
{
|
| 191 |
"cell_type": "code",
|
| 192 |
"execution_count": null,
|
| 193 |
-
"id": "
|
| 194 |
"metadata": {},
|
| 195 |
"outputs": [],
|
| 196 |
"source": [
|
|
@@ -245,26 +245,14 @@
|
|
| 245 |
},
|
| 246 |
{
|
| 247 |
"cell_type": "code",
|
| 248 |
-
"execution_count":
|
| 249 |
"id": "5a257e69",
|
| 250 |
"metadata": {
|
| 251 |
-
"scrolled":
|
| 252 |
},
|
| 253 |
-
"outputs": [
|
| 254 |
-
{
|
| 255 |
-
"ename": "NameError",
|
| 256 |
-
"evalue": "name 'sample_rate' is not defined",
|
| 257 |
-
"output_type": "error",
|
| 258 |
-
"traceback": [
|
| 259 |
-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
| 260 |
-
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
| 261 |
-
"Input \u001b[0;32mIn [9]\u001b[0m, in \u001b[0;36m<cell line: 10>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 11\u001b[0m generator\u001b[38;5;241m.\u001b[39mmanual_seed(seed)\n\u001b[1;32m 12\u001b[0m audio \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(mel\u001b[38;5;241m.\u001b[39maudio[sample \u001b[38;5;241m*\u001b[39m stride:sample \u001b[38;5;241m*\u001b[39m stride \u001b[38;5;241m+\u001b[39m slice_size])\n\u001b[0;32m---> 13\u001b[0m display(Audio(audio, rate\u001b[38;5;241m=\u001b[39m\u001b[43msample_rate\u001b[49m))\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(track) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# Normalize and re-insert generated audio\u001b[39;00m\n\u001b[1;32m 16\u001b[0m audio[:overlap_samples] \u001b[38;5;241m=\u001b[39m audio2[\u001b[38;5;241m-\u001b[39moverlap_samples:] \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(\n\u001b[1;32m 17\u001b[0m audio[:overlap_samples]) \u001b[38;5;241m/\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(audio2[\u001b[38;5;241m-\u001b[39moverlap_samples:])\n",
|
| 262 |
-
"\u001b[0;31mNameError\u001b[0m: name 'sample_rate' is not defined"
|
| 263 |
-
]
|
| 264 |
-
}
|
| 265 |
-
],
|
| 266 |
"source": [
|
| 267 |
-
"start_step =
|
| 268 |
"overlap_secs = 2 #@param {type:\"integer\"}\n",
|
| 269 |
"mel.load_audio(audio_file)\n",
|
| 270 |
"overlap_samples = overlap_secs * mel.get_sample_rate()\n",
|
|
@@ -273,11 +261,11 @@
|
|
| 273 |
"generator = torch.Generator()\n",
|
| 274 |
"seed = generator.seed()\n",
|
| 275 |
"track = np.array([])\n",
|
|
|
|
| 276 |
"for sample in range(len(mel.audio) // stride):\n",
|
| 277 |
" generator.manual_seed(seed)\n",
|
| 278 |
" audio = np.array(mel.audio[sample * stride:sample * stride + slice_size])\n",
|
| 279 |
-
"
|
| 280 |
-
" if len(track) > 0:\n",
|
| 281 |
" # Normalize and re-insert generated audio\n",
|
| 282 |
" audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
|
| 283 |
" audio[:overlap_samples]) / np.max(audio2[-overlap_samples:])\n",
|
|
@@ -286,24 +274,15 @@
|
|
| 286 |
" raw_audio=audio,\n",
|
| 287 |
" start_step=start_step,\n",
|
| 288 |
" generator=generator,\n",
|
| 289 |
-
" mask_start_secs=overlap_secs
|
| 290 |
-
"
|
| 291 |
-
"
|
| 292 |
-
|
| 293 |
-
},
|
| 294 |
-
{
|
| 295 |
-
"cell_type": "code",
|
| 296 |
-
"execution_count": null,
|
| 297 |
-
"id": "8f5ed5d8",
|
| 298 |
-
"metadata": {},
|
| 299 |
-
"outputs": [],
|
| 300 |
-
"source": [
|
| 301 |
-
"display(Audio(track, rate=sample_rate))"
|
| 302 |
]
|
| 303 |
},
|
| 304 |
{
|
| 305 |
"cell_type": "markdown",
|
| 306 |
-
"id": "
|
| 307 |
"metadata": {},
|
| 308 |
"source": [
|
| 309 |
"### Fill the gap (\"in-painting\")"
|
|
@@ -312,7 +291,7 @@
|
|
| 312 |
{
|
| 313 |
"cell_type": "code",
|
| 314 |
"execution_count": null,
|
| 315 |
-
"id": "
|
| 316 |
"metadata": {},
|
| 317 |
"outputs": [],
|
| 318 |
"source": [
|
|
|
|
| 90 |
{
|
| 91 |
"cell_type": "code",
|
| 92 |
"execution_count": null,
|
| 93 |
+
"id": "4dc17ac0",
|
| 94 |
"metadata": {},
|
| 95 |
"outputs": [],
|
| 96 |
"source": [
|
|
|
|
| 181 |
},
|
| 182 |
{
|
| 183 |
"cell_type": "markdown",
|
| 184 |
+
"id": "58a876c1",
|
| 185 |
"metadata": {},
|
| 186 |
"source": [
|
| 187 |
"### Generate continuations (\"out-painting\")"
|
|
|
|
| 190 |
{
|
| 191 |
"cell_type": "code",
|
| 192 |
"execution_count": null,
|
| 193 |
+
"id": "b95d5780",
|
| 194 |
"metadata": {},
|
| 195 |
"outputs": [],
|
| 196 |
"source": [
|
|
|
|
| 245 |
},
|
| 246 |
{
|
| 247 |
"cell_type": "code",
|
| 248 |
+
"execution_count": null,
|
| 249 |
"id": "5a257e69",
|
| 250 |
"metadata": {
|
| 251 |
+
"scrolled": false
|
| 252 |
},
|
| 253 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
"source": [
|
| 255 |
+
"start_step = 600 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
|
| 256 |
"overlap_secs = 2 #@param {type:\"integer\"}\n",
|
| 257 |
"mel.load_audio(audio_file)\n",
|
| 258 |
"overlap_samples = overlap_secs * mel.get_sample_rate()\n",
|
|
|
|
| 261 |
"generator = torch.Generator()\n",
|
| 262 |
"seed = generator.seed()\n",
|
| 263 |
"track = np.array([])\n",
|
| 264 |
+
"not_first = 0\n",
|
| 265 |
"for sample in range(len(mel.audio) // stride):\n",
|
| 266 |
" generator.manual_seed(seed)\n",
|
| 267 |
" audio = np.array(mel.audio[sample * stride:sample * stride + slice_size])\n",
|
| 268 |
+
" if not_first:\n",
|
|
|
|
| 269 |
" # Normalize and re-insert generated audio\n",
|
| 270 |
" audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
|
| 271 |
" audio[:overlap_samples]) / np.max(audio2[-overlap_samples:])\n",
|
|
|
|
| 274 |
" raw_audio=audio,\n",
|
| 275 |
" start_step=start_step,\n",
|
| 276 |
" generator=generator,\n",
|
| 277 |
+
" mask_start_secs=overlap_secs * not_first)\n",
|
| 278 |
+
" track = np.concatenate([track, audio2[overlap_samples * not_first:]])\n",
|
| 279 |
+
" not_first = 1\n",
|
| 280 |
+
" display(Audio(track, rate=sample_rate))"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
]
|
| 282 |
},
|
| 283 |
{
|
| 284 |
"cell_type": "markdown",
|
| 285 |
+
"id": "924ff9d5",
|
| 286 |
"metadata": {},
|
| 287 |
"source": [
|
| 288 |
"### Fill the gap (\"in-painting\")"
|
|
|
|
| 291 |
{
|
| 292 |
"cell_type": "code",
|
| 293 |
"execution_count": null,
|
| 294 |
+
"id": "0200264c",
|
| 295 |
"metadata": {},
|
| 296 |
"outputs": [],
|
| 297 |
"source": [
|