Spaces:

hoom4n
/

BachNet

Sleeping

App Files Files Community

hoom4n commited on Oct 7, 2025

Commit

f320de7

verified ·

1 Parent(s): e986d81

Upload 17 files

Browse files

Files changed (18) hide show

.gitattributes +1 -0
artifacts/train_logs.json +51 -0
artifacts/vocab.npy +3 -0
assets/css/theme.css +61 -0
assets/markdown/english_help.md +4 -0
assets/markdown/english_summary.md +5 -0
assets/markdown/persian_help.md +4 -0
assets/markdown/persian_summary.md +6 -0
data/jsb_chorales.zip +3 -0
model/bach_model.keras +3 -0
samples/sample.mid +0 -0
src/config.py +14 -0
src/dataset.py +56 -0
src/inference.py +42 -0
src/metrics.py +19 -0
src/model.py +28 -0
src/trainer.py +20 -0
src/utils.py +48 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model/bach_model.keras filter=lfs diff=lfs merge=lfs -text

artifacts/train_logs.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+    "Preplexity": [
+        2.019901752471924,
+        1.3914695978164673,
+        1.28533136844635,
+        1.2096575498580933,
+        1.1621309518814087
+    ],
+    "accuracy": [
+        0.7929478287696838,
+        0.9003167748451233,
+        0.9229070544242859,
+        0.9404971599578857,
+        0.9523513913154602
+    ],
+    "loss": [
+        0.7067986726760864,
+        0.3346928358078003,
+        0.25587165355682373,
+        0.19564126431941986,
+        0.1558060199022293
+    ],
+    "val_Preplexity": [
+        2.1313233375549316,
+        2.116178512573242,
+        2.2391598224639893,
+        2.3532228469848633,
+        2.4712650775909424
+    ],
+    "val_accuracy": [
+        0.8125607967376709,
+        0.8240785598754883,
+        0.8254508376121521,
+        0.8284142017364502,
+        0.8301998376846313
+    ],
+    "val_loss": [
+        0.7608373165130615,
+        0.7542304992675781,
+        0.811232328414917,
+        0.8612667322158813,
+        0.9103369116783142
+    ],
+    "learning_rate": [
+        0.0010000000474974513,
+        0.0009440609137527645,
+        0.0008413951727561653,
+        0.0007079457864165306,
+        0.000562341301701963
+    ]
+}

artifacts/vocab.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:82d14189d3504eeef49f6a39d240ffeb9793e1a11c0eb73ea0516ff47e774aff
+size 504

assets/css/theme.css ADDED Viewed

	@@ -0,0 +1,61 @@

+.gradio-container {
+    background: linear-gradient(to bottom right, #e0f7fa, #b3e5fc) !important;
+}
+.dark .gradio-container {
+    background: linear-gradient(to bottom right, #2a0a3a, #1e1a5e) !important;
+}
+button {
+    border-radius: 20px !important;
+    background: linear-gradient(to right, #4a90e2, #e94e77) !important;
+    color: white !important;
+}
+#title {
+    font-size: 2.5em !important;
+    color: #1e3a8a;
+    text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
+    text-align: center;
+    margin-top: 36px;
+    margin-bottom: 10px;
+}
+.dark #title {
+    color: #e0f7fa !important;
+    text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
+}
+#title.persian {
+    text-align: center !important;
+}
+#summary {
+    color: #334155;
+    background: rgba(255,255,255,0.8);
+    padding: 15px;
+    border-radius: 15px;
+    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+    margin-bottom: 2px;
+    text-align: justify !important;
+}
+.dark #summary {
+    color: #d1d5db !important;
+    background: rgba(30, 30, 46, 0.8) !important;
+}
+#help_text {
+    color: #1f2937;
+    background: #f0f9ff;
+    padding: 15px;
+    border-left: 5px solid #3b82f6;
+    border-radius: 12px;
+    box-shadow: 0 4px 6px rgba(0,0,0,0.05);
+    margin-bottom: 20px;
+    text-align: justify !important;
+}
+.dark #help_text {
+    color: #d1d5db !important;
+    background: rgba(30, 30, 46, 0.8) !important;
+    border-left: 5px solid #60a5fa !important;
+}
+.persian {
+    direction: rtl;
+    text-align: right;
+}
+#summary.persian, #help_text.persian {
+    text-align: justify !important;
+}

assets/markdown/english_help.md ADDED Viewed

	@@ -0,0 +1,4 @@

+**How to Generate Music**
+1. **Pick a seed:** Use a few seconds of an existing chorale from the validation set (unseen by the model) as the initial seed.
+2. **Control randomness:** Adjust the **Hotness slider**—higher values produce more diverse outputs, lower values are more conservative.
+3. **Adjust lengths:** Set the **seed length** for initial context and the **generated length** for output duration.

assets/markdown/english_summary.md ADDED Viewed

	@@ -0,0 +1,5 @@

+**BachNet🎵** is a production‑ready deep learning system for generating music in the style of J.S. Bach. Trained on a corpus of 382 chorales with a multi‑layer, sequence‑to‑sequence LSTM network, it learns both melodic patterns and temporal structures from sequences of 256 notes.
+From a short seed segment of a chorale, BachNet can autoregressively compose entirely new pieces. Notes are sampled from a categorical distribution, with the degree of variation controlled by a temperature parameter.
+The project also incorporates a fully in‑graph TensorFlow data‑streaming pipeline, enabling efficient, on‑the‑fly creation and batching of training samples. This design keeps the CPU busy preparing data while the GPU remains fully utilized for model training, maximizing both throughput and performance. Project GitHUB: [github.com/hoom4n/BachNet](https://github.com/hoom4n/BachNet)

assets/markdown/persian_help.md ADDED Viewed

	@@ -0,0 +1,4 @@

+**راهنمای تولید موسیقی**
+1. با Pick Random Seed بصورت تصادفی از میان کرال های داده ولیدیشن (که در آموزش استفاده نشده اند) یکی را بعنوان دانه اولیه انتخاب کنید.
+2. با اسلایدر Generated Length طول دلخواه موسیقی تولیدشده را مشخص کنید. توجه داشته باشید که طولانی‌تر شدن اثر به زمان تولید بیشتری نیاز دارد.
+3. با اسلایدر Temperature میزان تصادفی بودن نت‌ها را تنظیم کنید: دمای بالاتر انتخاب نت‌ها را تصادفی‌تر می‌کند و دمای پایین‌تر منظم‌تر شدن انتخاب نت‌ها را به همراه دارد.

assets/markdown/persian_summary.md ADDED Viewed

	@@ -0,0 +1,6 @@

+**باخ‌نت🎵** سامانه‌ای بر پایه‌ی یادگیری ماشین است که برای آفرینش موسیقی در سبک یوهان سباستیان باخ ساخته شده. این مدل با بهره‌گیری از شبکه‌ی LSTM چندلایه و معماری دنباله‌به‌دنباله، روی مجموعه‌ای شامل ۳۸۲ کرال باخ آموزش دیده و از دنباله‌های ۲۵۶ نتی، هم الگوهای ملودیک و هم ساختارهای زمانی موسیقی را می‌آموزد.
+باخ‌نت قادر است تنها با دریافت بخشی کوتاه از یک کرال به‌عنوان «بذر»، ادامه‌ی قطعه را به‌صورت خودبازگشتی بسازد و هر بار نت‌های تازه‌ای را به توالی بیفزاید. فرایند انتخاب نت‌ها بر اساس توزیع احتمالاتی انجام می‌شود و میزان خلاقیت یا پیش‌بینی‌پذیری خروجی با پارامتر «دما» قابل تنظیم است.
+این پروژه همچنین از یک خط لوله‌ی داده‌ی درون‌گرافی در TensorFlow بهره می‌برد که امکان تولید و دسته‌بندی نمونه‌های دنباله‌به‌دنباله را به‌صورت برخط و کارآمد فراهم می‌کند. در این طراحی، پردازش داده‌ها بر عهده‌ی CPU است در حالی که GPU به‌طور کامل صرف آموزش می‌شود؛ نتیجه آن است که هم سرعت و هم کارایی سامانه در بالاترین سطح باقی می‌ماند.
+لینک گیت هاب پروژه: [Github.com/hoom4n/BachNet](https://github.com/hoom4n/BachNet)

data/jsb_chorales.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe69a909ee4d54fd7a3054db335c4899f89ab39552edfd1708a3ea6c062c8cb4
+size 215768

model/bach_model.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:837f3a94da07a6dc701066079d8c510e1d0d0757288c56a6e21e14e43c5b40bd
+size 68031959

samples/sample.mid ADDED Viewed

Binary file (888 Bytes). View file

src/config.py ADDED Viewed

	@@ -0,0 +1,14 @@

+URL = "https://github.com/Hoom4n/BachNet/raw/refs/heads/main/dataset/jsb_chorales.zip"
+SEED = 42
+SEQ_LEN = 256
+WINDOW_SHIFT = 1
+BATCH_SIZE = 256
+LEARNING_RATE = 1e-3
+WEIGHT_DECAY = 1e-4
+EMBEDDING_DIM = 128
+LSTM_LAYERS = 3
+LSTM_UNITS = 512
+LSTM_DROPOUT = 0.3
+DENSE_UNITS = 256
+DROPOUT = 0.3
+N_EPOCHS = 10

src/dataset.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import tensorflow as tf
+import numpy as np
+import keras
+import glob
+import os
+AUTOTUNE = tf.data.AUTOTUNE
+def NoteEncoder(vocab_path, samples_path=None):
+    """Loads or builds a vocabulary from CSV note files and returns IntegerLookup layers for encoding and decoding notes"""
+    vocab_file = os.path.join(vocab_path, "vocab.npy")
+    if os.path.exists(vocab_file):
+        print("vocab.npy found, loading from disk...")
+        vocab = np.load(vocab_file)
+    elif samples_path is not None:
+        print("vocab.npy not found, adapting from sample files...")
+        files = glob.glob(os.path.join(samples_path, "*.csv"))
+        vocab = np.unique(np.hstack([np.loadtxt(p, delimiter=",", skiprows=1).flatten() for p in files]))
+        os.makedirs(vocab_path, exist_ok=True)
+        np.save(vocab_file, vocab)
+        print(f"vocab adapted and saved to {vocab_file}")
+    else:
+        raise ValueError("vocab file not found and samples_path not provided.")
+    note2id = keras.layers.IntegerLookup(num_oov_indices=0, vocabulary=vocab)
+    id2note = keras.layers.IntegerLookup(num_oov_indices=0, vocabulary=vocab, invert=True)
+    return note2id, id2note, vocab
+def parse_and_flatten(line):
+    """Parses a line of csv note data and flattens it into individual note tensors."""
+    fields = tf.io.decode_csv(line, [0,0,0,0])
+    return tf.data.Dataset.from_tensor_slices(fields)
+def seq2seq_from_chorale(path, seq_len, window_shift):
+    """creates seq2seq overlapping windows from a sequence"""
+    return tf.data.TextLineDataset(path).skip(1)\
+        .flat_map(parse_and_flatten)\
+        .window(seq_len + window_shift, shift=window_shift, drop_remainder=True)\
+        .flat_map(lambda yushi: yushi.batch(seq_len + window_shift))\
+        .map(lambda aiden: (aiden[:-window_shift] , aiden[window_shift:]), AUTOTUNE)
+def seq2seq_dataset(files_path, lookup_fn, seq_len=256, window_shift=1,
+                    batch_size=64, shuffle_buffer=None, seed=42):
+    """Converts a single chorale CSV file into input–target note sequences using sliding windows."""
+    dataset = tf.data.Dataset.list_files(files_path, shuffle=False)\
+    .map(lambda geralt: seq2seq_from_chorale(geralt, seq_len, window_shift), AUTOTUNE)\
+    .flat_map(lambda joe:joe)\
+    .map(lambda inp, tar: (lookup_fn(inp), lookup_fn(tar)), AUTOTUNE)\
+    .cache()
+    if shuffle_buffer:
+        dataset = dataset.shuffle(shuffle_buffer, seed=seed)
+    return dataset.batch(batch_size).prefetch(AUTOTUNE)

src/inference.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from music21 import stream, chord
+import tensorflow as tf
+import keras
+import numpy as np
+import random
+import glob
+def predict_next_token(model, input_sequence, temperature=1, seed=42):
+    "predict next token given a context, sample from a categorical distribution controllable via temperature"
+    assert keras.ops.ndim(input_sequence) == 2, "function expects input_sequence to be (batch_size, sequence_len)"
+    logits = model.predict_on_batch(input_sequence)[:, -1, :]
+    scaled_logits = logits / temperature
+    return tf.random.categorical(scaled_logits, num_samples=1, seed=seed)
+def generate_sequence(init_context, model, include_init_context=False, max_len=25, temperature=1 ,seed=42):
+    """Generates a continuation of a given seed sequence by autoregressively sampling from the trained model."""
+    assert keras.ops.ndim(init_context) == 2, "function expects init_context to be (batch_size, sequence_len)"
+    seq_len_init_context = init_context.shape[1]
+    context = init_context
+    for _ in range(max_len * 4):
+        next_token = predict_next_token(model, context, temperature=temperature, seed=seed)
+        context = keras.ops.concatenate([context, next_token], axis=1)
+    return context if include_init_context else context[:,seq_len_init_context:]
+def generate_chorale(model, sample_seed_path,note2id,id2note,  file_name= "samples/chorale.mid", max_len=25, temperature=1,
+                     sample_seed_rows: slice = slice(0,100), include_init_context=False, seed=42):
+    """Generates a Bach-style MIDI chorale from a random seed sequence using the trained model."""
+    sample_seed = np.loadtxt(sample_seed_path, skiprows=1, delimiter=",").flatten()[sample_seed_rows].reshape(1,-1)
+    sample_seed = note2id(sample_seed)
+    chorale = generate_sequence(sample_seed, model, include_init_context=include_init_context,
+                      max_len=max_len, temperature=temperature ,seed=seed)
+    chorale = keras.ops.convert_to_numpy(keras.ops.reshape(id2note(chorale), (-1,4)))
+    strm = stream.Stream([chord.Chord(chorale[s].tolist()) for s in range(len(chorale))])
+    strm.write('midi', fp=file_name)
+    print(f"chorale saved as {file_name}")
+def draw_random_sample(csv_dir, seed=42):
+    """Selects and returns a random CSV file path from the given directory for sampling."""
+    files = glob.glob(csv_dir + '/*.csv')
+    random.seed(seed)
+    random.shuffle(files)
+    return files[0]

src/metrics.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import keras
+class Preplexity(keras.metrics.Metric):
+    """Custom Keras metric that measures model uncertainty by exponentiating average cross-entropy loss."""
+    def __init__(self, name="Preplexity", **kwargs):
+        super().__init__(name=name, **kwargs)
+        self.cross_entropy = keras.metrics.Mean()
+    def update_state(self, y_true, y_pred, sample_weight=None):
+        """expects y_pred to be logits"""
+        ce = keras.losses.sparse_categorical_crossentropy(y_true, y_pred , from_logits=True)
+        # mean over batch and seq_len dimmensions
+        self.cross_entropy.update_state(ce, sample_weight=sample_weight)
+    def result(self):
+        return keras.ops.exp(self.cross_entropy.result())
+    def reset_state(self):
+        self.cross_entropy.reset_state()

src/model.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import keras
+from metrics import Preplexity
+def get_model(lr, weight_decay, emb_in, emb_out, lstm_layers, lstm_units, lstm_dropout, dense_units, dropout):
+    """Constructs and compiles the multi-layer LSTM model for next-note prediction with embedding, dropout, and normalization."""
+    assert lstm_layers >= 1, "expect at least one LSTM layer"
+    model = keras.Sequential([], name="BachModel")
+    model.add(keras.layers.Embedding(emb_in ,emb_out, name="Embedding_Layer", input_shape=(None,)))
+    for layer in range(lstm_layers):
+        model.add(keras.layers.LSTM(lstm_units, return_sequences= True, dropout= lstm_dropout, name=f"LSTM_Layer_{layer}"))
+        model.add(keras.layers.LayerNormalization(name=f"Layer_Norm_{layer}"))
+    if dense_units > 0:
+        model.add(keras.layers.Dense(dense_units, activation="relu", name="Dense_Layer",
+                                     kernel_regularizer=keras.regularizers.L2(1e-5)))
+        model.add(keras.layers.Dropout(dropout, name="Dropout_Layer"))
+    model.add(keras.layers.Dense(emb_in, name="Logits"))
+    model.compile(
+                loss= keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+                optimizer= keras.optimizers.Nadam(lr, weight_decay = weight_decay, clipnorm=1.0),
+                metrics= [Preplexity(), "accuracy"] , jit_compile=True
+                )
+    return model

src/trainer.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import keras
+import json
+import os
+def exp_decay(epoch, lr):
+    return lr * 0.1 ** (epoch / 40)
+def train_model(bach_model, train, val, n_epochs, ARTIFACTS_PATH, MODEL_PATH):
+    callbacks = [
+        keras.callbacks.LearningRateScheduler(exp_decay),
+        keras.callbacks.EarlyStopping(patience= 3, restore_best_weights= False, verbose= True, min_delta= 5e-5),
+        keras.callbacks.ModelCheckpoint(os.path.join(ARTIFACTS_PATH , "checkpoint.keras"), verbose= 1),
+                ]
+    train_logs = bach_model.fit(train, validation_data= val, epochs= n_epochs, callbacks= callbacks)
+    bach_model.save(os.path.join(MODEL_PATH, "bach_model.keras"))
+    with open(os.path.join(ARTIFACTS_PATH, "train_logs.json"), "w") as f:
+        json.dump(train_logs.history, f, indent=4)

src/utils.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import keras
+import os
+import subprocess
+from pathlib import Path
+def get_dataset_path(root_dir, URL):
+    """Downlods chorales csv dataset and confgirues files path"""
+    DATASET_PATH = keras.utils.get_file(
+        "jsb_chorales.zip",
+        URL,
+        extract= True,
+        cache_dir= root_dir,
+        cache_subdir= "data"
+        )
+    TRAIN_PATH = os.path.join(DATASET_PATH, "jsb_chorales/train")
+    VAL_PATH = os.path.join(DATASET_PATH, "jsb_chorales/val")
+    ARTIFACTS_PATH = os.path.join(root_dir, "artifacts")
+    MODEL_PATH = os.path.join(root_dir, "model")
+    os.makedirs(ARTIFACTS_PATH, exist_ok=True)
+    os.makedirs(MODEL_PATH, exist_ok=True)
+    return TRAIN_PATH, VAL_PATH, ARTIFACTS_PATH, MODEL_PATH
+def midi_to_wave(midi_file_path, SF2_PATH, wave_path="samples/sample.wav"):
+    """Converts a MIDI file to a WAV audio file using FluidSynth."""
+    if not os.path.exists(midi_file_path):
+        raise FileNotFoundError(f"MIDI file not found: {midi_file_path}")
+    if not os.path.exists(SF2_PATH):
+        raise FileNotFoundError(f"SoundFont file not found: {SF2_PATH}")
+    os.makedirs(os.path.dirname(wave_path), exist_ok=True)
+    cmd = ["fluidsynth", "-ni", "-F", wave_path, "-r", "44100", SF2_PATH, midi_file_path]
+    try:
+        subprocess.run(cmd, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(f"FluidSynth failed: {e.stderr}")
+    print(f"WAV file saved at {wave_path}")
+ASSETS_DIR = Path(__file__).parent.parent / "assets"
+def load_css():
+    return (ASSETS_DIR / "css/theme.css").read_text(encoding="utf-8")
+def load_markdown(name):
+    return (ASSETS_DIR / f"markdown/{name}.md").read_text(encoding="utf-8")