Spaces:
Runtime error
Runtime error
clarification
Browse files- notebooks/test_model.ipynb +15 -15
notebooks/test_model.ipynb
CHANGED
|
@@ -492,7 +492,7 @@
|
|
| 492 |
},
|
| 493 |
{
|
| 494 |
"cell_type": "markdown",
|
| 495 |
-
"id": "
|
| 496 |
"metadata": {},
|
| 497 |
"source": [
|
| 498 |
"## Latent Audio Diffusion\n",
|
|
@@ -502,7 +502,7 @@
|
|
| 502 |
{
|
| 503 |
"cell_type": "code",
|
| 504 |
"execution_count": null,
|
| 505 |
-
"id": "
|
| 506 |
"metadata": {},
|
| 507 |
"outputs": [],
|
| 508 |
"source": [
|
|
@@ -512,7 +512,7 @@
|
|
| 512 |
{
|
| 513 |
"cell_type": "code",
|
| 514 |
"execution_count": null,
|
| 515 |
-
"id": "
|
| 516 |
"metadata": {},
|
| 517 |
"outputs": [],
|
| 518 |
"source": [
|
|
@@ -522,7 +522,7 @@
|
|
| 522 |
{
|
| 523 |
"cell_type": "code",
|
| 524 |
"execution_count": null,
|
| 525 |
-
"id": "
|
| 526 |
"metadata": {},
|
| 527 |
"outputs": [],
|
| 528 |
"source": [
|
|
@@ -537,7 +537,7 @@
|
|
| 537 |
{
|
| 538 |
"cell_type": "code",
|
| 539 |
"execution_count": null,
|
| 540 |
-
"id": "
|
| 541 |
"metadata": {},
|
| 542 |
"outputs": [],
|
| 543 |
"source": [
|
|
@@ -551,7 +551,7 @@
|
|
| 551 |
},
|
| 552 |
{
|
| 553 |
"cell_type": "markdown",
|
| 554 |
-
"id": "
|
| 555 |
"metadata": {},
|
| 556 |
"source": [
|
| 557 |
"### Interpolation in latent space\n",
|
|
@@ -561,43 +561,43 @@
|
|
| 561 |
{
|
| 562 |
"cell_type": "code",
|
| 563 |
"execution_count": null,
|
| 564 |
-
"id": "
|
| 565 |
"metadata": {},
|
| 566 |
"outputs": [],
|
| 567 |
"source": [
|
| 568 |
"generator.manual_seed(seed)\n",
|
| 569 |
-
"
|
| 570 |
" audio_diffusion.pipe.unet.sample_size[0],\n",
|
| 571 |
" audio_diffusion.pipe.unet.sample_size[1]),\n",
|
| 572 |
" generator=generator)\n",
|
| 573 |
-
"
|
| 574 |
]
|
| 575 |
},
|
| 576 |
{
|
| 577 |
"cell_type": "code",
|
| 578 |
"execution_count": null,
|
| 579 |
-
"id": "
|
| 580 |
"metadata": {},
|
| 581 |
"outputs": [],
|
| 582 |
"source": [
|
| 583 |
"generator.manual_seed(seed2)\n",
|
| 584 |
-
"
|
| 585 |
" audio_diffusion.pipe.unet.sample_size[0],\n",
|
| 586 |
" audio_diffusion.pipe.unet.sample_size[1]),\n",
|
| 587 |
" generator=generator)\n",
|
| 588 |
-
"
|
| 589 |
]
|
| 590 |
},
|
| 591 |
{
|
| 592 |
"cell_type": "code",
|
| 593 |
"execution_count": null,
|
| 594 |
-
"id": "
|
| 595 |
"metadata": {},
|
| 596 |
"outputs": [],
|
| 597 |
"source": [
|
| 598 |
"alpha = 0.5 #@param {type:\"slider\", min:0, max:1, step:0.1}\n",
|
| 599 |
"_, (sample_rate, audio3) = audio_diffusion.generate_spectrogram_and_audio(\n",
|
| 600 |
-
" noise=audio_diffusion.pipe.slerp(
|
| 601 |
" generator=generator)\n",
|
| 602 |
"display(Audio(audio, rate=mel.get_sample_rate()))\n",
|
| 603 |
"display(Audio(audio2, rate=mel.get_sample_rate()))\n",
|
|
@@ -607,7 +607,7 @@
|
|
| 607 |
{
|
| 608 |
"cell_type": "code",
|
| 609 |
"execution_count": null,
|
| 610 |
-
"id": "
|
| 611 |
"metadata": {},
|
| 612 |
"outputs": [],
|
| 613 |
"source": []
|
|
|
|
| 492 |
},
|
| 493 |
{
|
| 494 |
"cell_type": "markdown",
|
| 495 |
+
"id": "9b244547",
|
| 496 |
"metadata": {},
|
| 497 |
"source": [
|
| 498 |
"## Latent Audio Diffusion\n",
|
|
|
|
| 502 |
{
|
| 503 |
"cell_type": "code",
|
| 504 |
"execution_count": null,
|
| 505 |
+
"id": "a88b3fbb",
|
| 506 |
"metadata": {},
|
| 507 |
"outputs": [],
|
| 508 |
"source": [
|
|
|
|
| 512 |
{
|
| 513 |
"cell_type": "code",
|
| 514 |
"execution_count": null,
|
| 515 |
+
"id": "15e353ee",
|
| 516 |
"metadata": {},
|
| 517 |
"outputs": [],
|
| 518 |
"source": [
|
|
|
|
| 522 |
{
|
| 523 |
"cell_type": "code",
|
| 524 |
"execution_count": null,
|
| 525 |
+
"id": "fa0f0c8c",
|
| 526 |
"metadata": {},
|
| 527 |
"outputs": [],
|
| 528 |
"source": [
|
|
|
|
| 537 |
{
|
| 538 |
"cell_type": "code",
|
| 539 |
"execution_count": null,
|
| 540 |
+
"id": "73dc575d",
|
| 541 |
"metadata": {},
|
| 542 |
"outputs": [],
|
| 543 |
"source": [
|
|
|
|
| 551 |
},
|
| 552 |
{
|
| 553 |
"cell_type": "markdown",
|
| 554 |
+
"id": "428d2d67",
|
| 555 |
"metadata": {},
|
| 556 |
"source": [
|
| 557 |
"### Interpolation in latent space\n",
|
|
|
|
| 561 |
{
|
| 562 |
"cell_type": "code",
|
| 563 |
"execution_count": null,
|
| 564 |
+
"id": "72211c2b",
|
| 565 |
"metadata": {},
|
| 566 |
"outputs": [],
|
| 567 |
"source": [
|
| 568 |
"generator.manual_seed(seed)\n",
|
| 569 |
+
"latents = torch.randn((1, audio_diffusion.pipe.unet.in_channels,\n",
|
| 570 |
" audio_diffusion.pipe.unet.sample_size[0],\n",
|
| 571 |
" audio_diffusion.pipe.unet.sample_size[1]),\n",
|
| 572 |
" generator=generator)\n",
|
| 573 |
+
"latents.shape"
|
| 574 |
]
|
| 575 |
},
|
| 576 |
{
|
| 577 |
"cell_type": "code",
|
| 578 |
"execution_count": null,
|
| 579 |
+
"id": "6c732dbe",
|
| 580 |
"metadata": {},
|
| 581 |
"outputs": [],
|
| 582 |
"source": [
|
| 583 |
"generator.manual_seed(seed2)\n",
|
| 584 |
+
"latents2 = torch.randn((1, audio_diffusion.pipe.unet.in_channels,\n",
|
| 585 |
" audio_diffusion.pipe.unet.sample_size[0],\n",
|
| 586 |
" audio_diffusion.pipe.unet.sample_size[1]),\n",
|
| 587 |
" generator=generator)\n",
|
| 588 |
+
"latents2.shape"
|
| 589 |
]
|
| 590 |
},
|
| 591 |
{
|
| 592 |
"cell_type": "code",
|
| 593 |
"execution_count": null,
|
| 594 |
+
"id": "159bcfc4",
|
| 595 |
"metadata": {},
|
| 596 |
"outputs": [],
|
| 597 |
"source": [
|
| 598 |
"alpha = 0.5 #@param {type:\"slider\", min:0, max:1, step:0.1}\n",
|
| 599 |
"_, (sample_rate, audio3) = audio_diffusion.generate_spectrogram_and_audio(\n",
|
| 600 |
+
" noise=audio_diffusion.pipe.slerp(latents, latents2, alpha),\n",
|
| 601 |
" generator=generator)\n",
|
| 602 |
"display(Audio(audio, rate=mel.get_sample_rate()))\n",
|
| 603 |
"display(Audio(audio2, rate=mel.get_sample_rate()))\n",
|
|
|
|
| 607 |
{
|
| 608 |
"cell_type": "code",
|
| 609 |
"execution_count": null,
|
| 610 |
+
"id": "ce6c9cc1",
|
| 611 |
"metadata": {},
|
| 612 |
"outputs": [],
|
| 613 |
"source": []
|