Spaces:
Build error
Build error
update demo
Browse files
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
title: Musika
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
|
|
|
| 1 |
---
|
| 2 |
title: Musika
|
| 3 |
+
emoji: 🎵
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
utils.py
CHANGED
|
@@ -34,11 +34,7 @@ class Utils_functions:
|
|
| 34 |
)
|
| 35 |
mel_f = tf.convert_to_tensor(librosa.mel_frequencies(n_mels=args.mel_bins + 2, fmin=0.0, fmax=args.sr // 2))
|
| 36 |
enorm = tf.cast(
|
| 37 |
-
tf.expand_dims(
|
| 38 |
-
tf.constant(2.0 / (mel_f[2 : args.mel_bins + 2] - mel_f[: args.mel_bins])),
|
| 39 |
-
0,
|
| 40 |
-
),
|
| 41 |
-
tf.float32,
|
| 42 |
)
|
| 43 |
melmat = tf.multiply(melmat, enorm)
|
| 44 |
melmat = tf.divide(melmat, tf.reduce_sum(melmat, axis=0))
|
|
@@ -165,8 +161,9 @@ class Utils_functions:
|
|
| 165 |
outls.append(model(x[i * bs : i * bs + bs], training=False))
|
| 166 |
|
| 167 |
if dual_out:
|
| 168 |
-
return
|
| 169 |
-
[outls[k][
|
|
|
|
| 170 |
)
|
| 171 |
else:
|
| 172 |
return np.concatenate(outls, 0)
|
|
@@ -199,8 +196,9 @@ class Utils_functions:
|
|
| 199 |
inp = tf.concat(inpls, 0)
|
| 200 |
res = model(inp, training=False)
|
| 201 |
outls.append(res)
|
| 202 |
-
return
|
| 203 |
-
[outls[k][
|
|
|
|
| 204 |
)
|
| 205 |
|
| 206 |
def distribute_dec2(self, x, model, bs=64):
|
|
@@ -228,17 +226,7 @@ class Utils_functions:
|
|
| 228 |
return tf.image.random_crop(noisetot, [1, self.args.latlen, 64 + 64])
|
| 229 |
|
| 230 |
def generate_example_stereo(self, models_ls):
|
| 231 |
-
(
|
| 232 |
-
critic,
|
| 233 |
-
gen,
|
| 234 |
-
enc,
|
| 235 |
-
dec,
|
| 236 |
-
enc2,
|
| 237 |
-
dec2,
|
| 238 |
-
critic_rec,
|
| 239 |
-
gen_ema,
|
| 240 |
-
[opt_dec, opt_disc],
|
| 241 |
-
) = models_ls
|
| 242 |
abb = gen_ema(self.get_noise_interp(), training=False)
|
| 243 |
abbls = tf.split(abb, abb.shape[-2] // 16, -2)
|
| 244 |
abb = tf.concat(abbls, 0)
|
|
@@ -247,13 +235,7 @@ class Utils_functions:
|
|
| 247 |
for channel in range(2):
|
| 248 |
|
| 249 |
ab = self.distribute_dec2(
|
| 250 |
-
abb[
|
| 251 |
-
:,
|
| 252 |
-
:,
|
| 253 |
-
:,
|
| 254 |
-
channel * self.args.latdepth : channel * self.args.latdepth + self.args.latdepth,
|
| 255 |
-
],
|
| 256 |
-
dec2,
|
| 257 |
)
|
| 258 |
abls = tf.split(ab, ab.shape[-2] // self.args.shape, -2)
|
| 259 |
ab = tf.concat(abls, 0)
|
|
@@ -291,28 +273,14 @@ class Utils_functions:
|
|
| 291 |
|
| 292 |
fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(20, 20))
|
| 293 |
axs[0].imshow(
|
| 294 |
-
np.flip(
|
| 295 |
-
np.array(
|
| 296 |
-
tf.transpose(
|
| 297 |
-
self.wv2spec_hop((abwv[:, 0] + abwv[:, 1]) / 2.0, 80.0, 256),
|
| 298 |
-
[1, 0],
|
| 299 |
-
)
|
| 300 |
-
),
|
| 301 |
-
-2,
|
| 302 |
-
),
|
| 303 |
cmap=None,
|
| 304 |
)
|
| 305 |
axs[0].axis("off")
|
| 306 |
axs[0].set_title("Generated1")
|
| 307 |
axs[1].imshow(
|
| 308 |
np.flip(
|
| 309 |
-
np.array(
|
| 310 |
-
tf.transpose(
|
| 311 |
-
self.wv2spec_hop((abwv2[:, 0] + abwv2[:, 1]) / 2.0, 80.0, 256),
|
| 312 |
-
[1, 0],
|
| 313 |
-
)
|
| 314 |
-
),
|
| 315 |
-
-2,
|
| 316 |
),
|
| 317 |
cmap=None,
|
| 318 |
)
|
|
@@ -320,13 +288,7 @@ class Utils_functions:
|
|
| 320 |
axs[1].set_title("Generated2")
|
| 321 |
axs[2].imshow(
|
| 322 |
np.flip(
|
| 323 |
-
np.array(
|
| 324 |
-
tf.transpose(
|
| 325 |
-
self.wv2spec_hop((abwv3[:, 0] + abwv3[:, 1]) / 2.0, 80.0, 256),
|
| 326 |
-
[1, 0],
|
| 327 |
-
)
|
| 328 |
-
),
|
| 329 |
-
-2,
|
| 330 |
),
|
| 331 |
cmap=None,
|
| 332 |
)
|
|
@@ -334,13 +296,7 @@ class Utils_functions:
|
|
| 334 |
axs[2].set_title("Generated3")
|
| 335 |
axs[3].imshow(
|
| 336 |
np.flip(
|
| 337 |
-
np.array(
|
| 338 |
-
tf.transpose(
|
| 339 |
-
self.wv2spec_hop((abwv4[:, 0] + abwv4[:, 1]) / 2.0, 80.0, 256),
|
| 340 |
-
[1, 0],
|
| 341 |
-
)
|
| 342 |
-
),
|
| 343 |
-
-2,
|
| 344 |
),
|
| 345 |
cmap=None,
|
| 346 |
)
|
|
@@ -351,26 +307,9 @@ class Utils_functions:
|
|
| 351 |
|
| 352 |
# Save in training loop
|
| 353 |
def save_end(
|
| 354 |
-
self,
|
| 355 |
-
epoch,
|
| 356 |
-
gloss,
|
| 357 |
-
closs,
|
| 358 |
-
mloss,
|
| 359 |
-
models_ls=None,
|
| 360 |
-
n_save=3,
|
| 361 |
-
save_path="checkpoints",
|
| 362 |
):
|
| 363 |
-
(
|
| 364 |
-
critic,
|
| 365 |
-
gen,
|
| 366 |
-
enc,
|
| 367 |
-
dec,
|
| 368 |
-
enc2,
|
| 369 |
-
dec2,
|
| 370 |
-
critic_rec,
|
| 371 |
-
gen_ema,
|
| 372 |
-
[opt_dec, opt_disc],
|
| 373 |
-
) = models_ls
|
| 374 |
if epoch % n_save == 0:
|
| 375 |
print("Saving...")
|
| 376 |
path = f"{save_path}/MUSIKA!_-{str(gloss)[:9]}-{str(closs)[:9]}-{str(mloss)[:9]}"
|
|
@@ -502,7 +441,7 @@ class Utils_functions:
|
|
| 502 |
)
|
| 503 |
|
| 504 |
def render_gradio(self, models_ls_techno, models_ls_classical, train=True):
|
| 505 |
-
article_text = "Original work by Marco Pasini ([Twitter](https://twitter.com/marco_ppasini)) at Johannes Kepler Universität Linz.
|
| 506 |
|
| 507 |
def gradio_func(x, y, z):
|
| 508 |
return self.stfunc(x, y, z, models_ls_techno, models_ls_classical)
|
|
@@ -514,13 +453,10 @@ class Utils_functions:
|
|
| 514 |
choices=["Techno/Experimental", "Classical"],
|
| 515 |
type="index",
|
| 516 |
default="Classical",
|
| 517 |
-
label="Music Genre to Generate
|
| 518 |
),
|
| 519 |
gr.inputs.Radio(
|
| 520 |
-
choices=["
|
| 521 |
-
type="index",
|
| 522 |
-
default="115 s",
|
| 523 |
-
label="Generated Music Length",
|
| 524 |
),
|
| 525 |
gr.inputs.Slider(
|
| 526 |
minimum=0,
|
|
@@ -536,7 +472,7 @@ class Utils_functions:
|
|
| 536 |
],
|
| 537 |
allow_screenshot=False,
|
| 538 |
title="musika!",
|
| 539 |
-
description="Blazingly Fast Stereo Waveform Music Generation of Arbitrary Length",
|
| 540 |
article=article_text,
|
| 541 |
layout="vertical",
|
| 542 |
theme="huggingface",
|
|
@@ -551,7 +487,7 @@ class Utils_functions:
|
|
| 551 |
if train:
|
| 552 |
iface.launch(prevent_thread_lock=True)
|
| 553 |
else:
|
| 554 |
-
iface.launch()
|
| 555 |
# iface.launch(share=True, enable_queue=True)
|
| 556 |
print("--------------------------------")
|
| 557 |
print("--------------------------------")
|
|
|
|
| 34 |
)
|
| 35 |
mel_f = tf.convert_to_tensor(librosa.mel_frequencies(n_mels=args.mel_bins + 2, fmin=0.0, fmax=args.sr // 2))
|
| 36 |
enorm = tf.cast(
|
| 37 |
+
tf.expand_dims(tf.constant(2.0 / (mel_f[2 : args.mel_bins + 2] - mel_f[: args.mel_bins])), 0,), tf.float32,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
)
|
| 39 |
melmat = tf.multiply(melmat, enorm)
|
| 40 |
melmat = tf.divide(melmat, tf.reduce_sum(melmat, axis=0))
|
|
|
|
| 161 |
outls.append(model(x[i * bs : i * bs + bs], training=False))
|
| 162 |
|
| 163 |
if dual_out:
|
| 164 |
+
return (
|
| 165 |
+
np.concatenate([outls[k][0] for k in range(len(outls))], 0),
|
| 166 |
+
np.concatenate([outls[k][1] for k in range(len(outls))], 0),
|
| 167 |
)
|
| 168 |
else:
|
| 169 |
return np.concatenate(outls, 0)
|
|
|
|
| 196 |
inp = tf.concat(inpls, 0)
|
| 197 |
res = model(inp, training=False)
|
| 198 |
outls.append(res)
|
| 199 |
+
return (
|
| 200 |
+
np.concatenate([outls[k][0] for k in range(len(outls))], 0),
|
| 201 |
+
np.concatenate([outls[k][1] for k in range(len(outls))], 0),
|
| 202 |
)
|
| 203 |
|
| 204 |
def distribute_dec2(self, x, model, bs=64):
|
|
|
|
| 226 |
return tf.image.random_crop(noisetot, [1, self.args.latlen, 64 + 64])
|
| 227 |
|
| 228 |
def generate_example_stereo(self, models_ls):
|
| 229 |
+
(critic, gen, enc, dec, enc2, dec2, critic_rec, gen_ema, [opt_dec, opt_disc],) = models_ls
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
abb = gen_ema(self.get_noise_interp(), training=False)
|
| 231 |
abbls = tf.split(abb, abb.shape[-2] // 16, -2)
|
| 232 |
abb = tf.concat(abbls, 0)
|
|
|
|
| 235 |
for channel in range(2):
|
| 236 |
|
| 237 |
ab = self.distribute_dec2(
|
| 238 |
+
abb[:, :, :, channel * self.args.latdepth : channel * self.args.latdepth + self.args.latdepth,], dec2,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
)
|
| 240 |
abls = tf.split(ab, ab.shape[-2] // self.args.shape, -2)
|
| 241 |
ab = tf.concat(abls, 0)
|
|
|
|
| 273 |
|
| 274 |
fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(20, 20))
|
| 275 |
axs[0].imshow(
|
| 276 |
+
np.flip(np.array(tf.transpose(self.wv2spec_hop((abwv[:, 0] + abwv[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
cmap=None,
|
| 278 |
)
|
| 279 |
axs[0].axis("off")
|
| 280 |
axs[0].set_title("Generated1")
|
| 281 |
axs[1].imshow(
|
| 282 |
np.flip(
|
| 283 |
+
np.array(tf.transpose(self.wv2spec_hop((abwv2[:, 0] + abwv2[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
),
|
| 285 |
cmap=None,
|
| 286 |
)
|
|
|
|
| 288 |
axs[1].set_title("Generated2")
|
| 289 |
axs[2].imshow(
|
| 290 |
np.flip(
|
| 291 |
+
np.array(tf.transpose(self.wv2spec_hop((abwv3[:, 0] + abwv3[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
),
|
| 293 |
cmap=None,
|
| 294 |
)
|
|
|
|
| 296 |
axs[2].set_title("Generated3")
|
| 297 |
axs[3].imshow(
|
| 298 |
np.flip(
|
| 299 |
+
np.array(tf.transpose(self.wv2spec_hop((abwv4[:, 0] + abwv4[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
),
|
| 301 |
cmap=None,
|
| 302 |
)
|
|
|
|
| 307 |
|
| 308 |
# Save in training loop
|
| 309 |
def save_end(
|
| 310 |
+
self, epoch, gloss, closs, mloss, models_ls=None, n_save=3, save_path="checkpoints",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
):
|
| 312 |
+
(critic, gen, enc, dec, enc2, dec2, critic_rec, gen_ema, [opt_dec, opt_disc],) = models_ls
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
if epoch % n_save == 0:
|
| 314 |
print("Saving...")
|
| 315 |
path = f"{save_path}/MUSIKA!_-{str(gloss)[:9]}-{str(closs)[:9]}-{str(mloss)[:9]}"
|
|
|
|
| 441 |
)
|
| 442 |
|
| 443 |
def render_gradio(self, models_ls_techno, models_ls_classical, train=True):
|
| 444 |
+
article_text = "Original work by Marco Pasini ([Twitter](https://twitter.com/marco_ppasini)) and Jan Schlüter at Johannes Kepler Universität Linz."
|
| 445 |
|
| 446 |
def gradio_func(x, y, z):
|
| 447 |
return self.stfunc(x, y, z, models_ls_techno, models_ls_classical)
|
|
|
|
| 453 |
choices=["Techno/Experimental", "Classical"],
|
| 454 |
type="index",
|
| 455 |
default="Classical",
|
| 456 |
+
label="Music Genre to Generate",
|
| 457 |
),
|
| 458 |
gr.inputs.Radio(
|
| 459 |
+
choices=["23s", "1m 58s", "3m 57s"], type="index", default="1m 58s", label="Generated Music Length",
|
|
|
|
|
|
|
|
|
|
| 460 |
),
|
| 461 |
gr.inputs.Slider(
|
| 462 |
minimum=0,
|
|
|
|
| 472 |
],
|
| 473 |
allow_screenshot=False,
|
| 474 |
title="musika!",
|
| 475 |
+
description="Blazingly Fast Stereo Waveform Music Generation of Arbitrary Length. Be patient and enjoy the weirdness!",
|
| 476 |
article=article_text,
|
| 477 |
layout="vertical",
|
| 478 |
theme="huggingface",
|
|
|
|
| 487 |
if train:
|
| 488 |
iface.launch(prevent_thread_lock=True)
|
| 489 |
else:
|
| 490 |
+
iface.launch(enable_queue=True)
|
| 491 |
# iface.launch(share=True, enable_queue=True)
|
| 492 |
print("--------------------------------")
|
| 493 |
print("--------------------------------")
|