Spaces:

achillefssourlas
/

auto-beater

Runtime error

App Files Files Community

Achillefs Sourlas commited on May 29, 2023

Commit

31adff7

1 Parent(s): 2ac38d9

Initial commit

Browse files

Files changed (14) hide show

.gitignore +2 -0
app.py +100 -0
beatgenerator.py +81 -0
customtokenencoderdecoder.py +203 -0
model/config.json +39 -0
model/generation_config.json +6 -0
model/pytorch_model.bin +3 -0
model/training_args.bin +3 -0
requirements.txt +4 -0
tokenizer/added_tokens.json +66 -0
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +24 -0
tokenizer/tokenizer_config.json +33 -0
tokenizer/vocab.json +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv/
2	+ __pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import gradio as gr
+from beatgenerator import BeatGenerator
+from datetime import datetime
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+import json
+STEP_COUNT = 32
+INSTRUMENT_COUNT = 9
+model = GPT2LMHeadModel.from_pretrained("./model")
+tokenizer = GPT2Tokenizer.from_pretrained("./tokenizer")
+tokenizer.pad_token = tokenizer.eos_token
+beat_generator = BeatGenerator(model=model, tokenizer=tokenizer)
+# def on_submit(*grid_rows) -> [str]:
+#     step_data_container = []
+#     for grid_row_id in range(INSTRUMENT_COUNT):
+#         grid_row_as_ints = list(map(lambda x: int(x) - 1, grid_rows[grid_row_id]))
+#         step_data_container.append(grid_row_as_ints)
+#     temperature: float = grid_rows[9]
+#     tempo: int = grid_rows[10]
+#     now = datetime.now()
+#     date_string = now.strftime("%Y-%m-%d_%H-%M")
+#     genre, midi_data = beat_generator.generate_beat(user_prompt=step_data_container, temperature=temperature, tempo=tempo)
+#     return ["""<div><h3>Genre: {0}</h3></div><br/><div><a href="data:audio/midi;base64,{1}" download="beat-{0}-{2}.mid">Download beat</a></div>""".format(genre, midi_data, date_string)]
+# checkbox_rows = [
+#     ["{:02d}".format(col + 1) for col in range(STEP_COUNT)] for _ in range(INSTRUMENT_COUNT)
+# ]
+# inputs = [
+#     gr.inputs.CheckboxGroup(checkbox_rows[0], label=f"Kick"),
+#     gr.inputs.CheckboxGroup(checkbox_rows[1], label=f"Snare"),
+#     gr.inputs.CheckboxGroup(checkbox_rows[2], label=f"Clap"),
+#     gr.inputs.CheckboxGroup(checkbox_rows[2], label=f"Hat"),
+#     gr.inputs.CheckboxGroup(checkbox_rows[2], label=f"L tom"),
+#     gr.inputs.CheckboxGroup(checkbox_rows[2], label=f"Open hat"),
+#     gr.inputs.CheckboxGroup(checkbox_rows[2], label=f"M tom"),
+#     gr.inputs.CheckboxGroup(checkbox_rows[2], label=f"Crash cymbal"),
+#     gr.inputs.CheckboxGroup(checkbox_rows[2], label=f"Ride cymbal"),
+#     gr.inputs.Slider(minimum=0.1, maximum=1.0, step=0.1, default=0.7, label="Temperature"),
+#     gr.inputs.Slider(minimum=60, maximum=200, step=1, default=120, label="Tempo")
+# ]
+# iface = gr.Interface(
+#     fn=on_submit,
+#     inputs=inputs,
+#     outputs=["html"],
+#     title="Simple (MIDI) Beat Generator",
+#     description="A simple beat generator that creates an 8-bar MIDI beats on every run, based on a 32-step (2 bars) prompt in the form of a step sequencer. The generator uses a small fine-tuned GPT-2 model to recognise the genre (currently only Trap and Deep House) and generate the beat."
+# )
+# iface.launch()
+# Create a custom block for JSON input
+class JSONInput(gr.inputs.Textbox):
+    def preprocess(self, x):
+        try:
+            # Parse the JSON string into a Python object
+            parsed_json = json.loads(x)
+            return parsed_json
+        except json.JSONDecodeError:
+            return None
+# Define your processing function
+def on_did_receive_input(text: [str]):
+    try:
+        # Parse the JSON string into a Python object
+        # return text
+        input_json_value = json.loads(text)
+        try:
+            tempo: int = input_json_value["tempo"]
+            temperature: float = input_json_value["temperature"]
+            data: [[int]] = input_json_value["music_data"]
+        except KeyError:
+            return "Error! Message was not found in JSON input"
+        dict = {}
+        dict["tempo"] = tempo
+        dict["temperature"] = temperature
+        dict["music_data"] = data
+        return json.dumps(dict)
+    except json.JSONDecodeError:
+        return "Error! Invalid JSON input"
+# Create the Gradio interface using Blocks
+iface = gr.Interface(
+    fn=on_did_receive_input,
+    inputs="text",
+    outputs="text"
+)
+iface.launch()

beatgenerator.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from midiutil import MIDIFile
+import base64
+from io import BytesIO
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+from customtokenencoderdecoder import CustomTokenEncoderDecoder
+class BeatGenerator:
+    STEP_SIZE = 0.25
+    STEPS_PER_SEQUENCE = 32
+    def __init__(self, model: GPT2LMHeadModel, tokenizer: GPT2Tokenizer):
+        self.__model = model
+        self.__tokenizer = tokenizer
+        self.__sections = ["a", "b", "c", "d"]
+    def generate_beat(self, user_prompt: [[int]], temperature: float, tempo: float) -> [str, str]:
+        # pitches = [36, 38, 42]
+        pitches = [36, 38, 39, 42, 45, 46, 47, 49, 51]
+        assert len(user_prompt) == len(pitches), "User prompt length must be equal to the number of pitches"
+        user_events: [[int, int]] = []
+        for pitch_id, pitch in enumerate(pitches):
+            for step in user_prompt[pitch_id]:
+                user_events.append((step, pitch))
+        custom_token_encoder_decoder = CustomTokenEncoderDecoder(
+            events=user_events,
+            sections=self.__sections,
+            steps_per_section=self.STEPS_PER_SEQUENCE,
+            model=self.__model,
+            tokenizer=self.__tokenizer,
+        )
+        result = custom_token_encoder_decoder.generate_events(temperature=temperature)
+        genre = result["genre"]
+        events = result["events"]
+        midi_buffer = self.__make_midi_buffer(
+            data_container=events,
+            tempo=tempo,
+            verbose=False
+        )
+        midi_base64 = base64.b64encode(midi_buffer.read()).decode("utf-8")
+        return genre, midi_base64
+    def __make_midi_buffer(self, data_container: [(int, int)], tempo: int, verbose: bool = False) -> BytesIO:
+        track_count = 1
+        out_midi_file = MIDIFile(1)
+        out_midi_file.addTempo(0, 0, tempo)
+        for data in data_container:
+            step = data[0]
+            pitch = data[1]
+            velocity = 100
+            if verbose is True:
+                print("Processing: {0} in step range: {1}".format(data, step_ranges[section_id]))
+            if step >= 0 and step < 128 and pitch >= 0 and pitch < 128:
+                start_time = float(step) * self.STEP_SIZE
+                volume = int(velocity)
+                out_midi_file.addNote(
+                    track=0,
+                    channel=9,
+                    pitch=pitch,
+                    time=start_time,
+                    duration=self.STEP_SIZE,
+                    volume=volume
+                )
+        buffer = BytesIO()
+        out_midi_file.writeFile(buffer)
+        buffer.seek(0)
+        with open("out.mid", "wb") as output_file:
+            out_midi_file.writeFile(output_file)
+        return buffer

customtokenencoderdecoder.py ADDED Viewed

	@@ -0,0 +1,203 @@

+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+class CustomTokenEncoderDecoder:
+    CUSTOM_CLASSIFICATION_TOKEN = "which_genre_section"
+    def __init__(self, events: [[int, int]], sections: [str], steps_per_section: int, model: GPT2LMHeadModel, tokenizer: GPT2Tokenizer):
+        self.__model = model
+        self.__tokenizer = tokenizer
+        self.__events = events
+        self.__steps_per_section = steps_per_section
+        self.__sections = sections
+        self.__events_tokens = self.events_to_tokens(events)
+    def is_step_token(self, token: str) -> bool:
+        return token.startswith("step:")
+    def is_pitch_token(self, token: str) -> bool:
+        return token.startswith("pitch:")
+    def is_genre_token(self, token: str) -> bool:
+        return token.startswith("genre:")
+    def is_section_token(self, token: str) -> bool:
+        return token.startswith("section:")
+    def token_to_pitch(self, token: str) -> int:
+        return int(token.split(":")[1])
+    def token_to_step(self, token: str) -> int:
+        return int(token.split(":")[1])
+    def token_to_section(self, token: str) -> str:
+        return token.split(":")[1]
+    def token_to_genre(self, token: str) -> str:
+        return token.split(":")[1]
+    def pitch_to_token(self, pitch: int) -> str:
+        return "pitch:{0}".format(pitch)
+    def step_to_token(self, step: int) -> [str]:
+        return "step:{0}".format(step)
+    def section_to_token(self, section: str) -> [str]:
+        return "section:{0}".format(section)
+    def events_to_tokens(self, events: [[int, int]]) -> [str]:
+        result: [str] = []
+        for step_id in range(self.__steps_per_section):
+            step_data = list(filter(lambda x: x[0] == step_id, events))
+            if len(step_data) > 0:
+                result.append(self.step_to_token(step_id))
+                step_tokens = list(map(lambda x: self.pitch_to_token(x[1]), step_data))
+                if len(step_tokens) > 0:
+                    result += step_tokens
+        return result
+    def tokens_to_classification_prompt(self, tokens: [str]) -> str:
+        return " ".join(tokens + [self.CUSTOM_CLASSIFICATION_TOKEN])
+    def tokens_to_section_prompt(self, tokens: [str], section: str, prompted_section: str) -> str:
+        return " ".join([self.section_to_token(section)] + tokens + [self.section_to_token(prompted_section)])
+    def tokens_to_genre_section(self, tokens: [str]) -> dict:
+        genre: str = ""
+        section: str = ""
+        for token in tokens:
+            if self.is_genre_token(token):
+                genre = self.token_to_genre(token)
+            elif self.is_section_token(token):
+                section = self.token_to_section(token)
+        return { "genre": genre, "section": section }
+    def section_to_step_offset(self, section: str) -> int:
+        if section == "a":
+            return 0
+        elif section == "b":
+            return self.__steps_per_section
+        elif section == "c":
+            return 2 * self.__steps_per_section
+        elif section == "d":
+            return 3 * self.__steps_per_section
+        else:
+            raise Exception("Invalid section: {0}".format(section))
+    def tokens_to_section_events(self, tokens: [str], section: str, step_offset: int = None) -> [[int, int]]:
+        for (token_id, token) in enumerate(tokens):
+            if self.is_section_token(token):
+                if self.token_to_section(token) == section:
+                    offset: int = self.section_to_step_offset(section)
+                    if step_offset is not None:
+                        offset = step_offset
+                    return self.tokens_to_events(tokens=tokens[token_id:], step_offset=offset)
+        raise Exception("Section {0} not found in tokens".format(section))
+    def tokens_to_events(self, tokens: [str], step_offset: int) -> [[int, int]]:
+        result: [[int, int]] = []
+        for (token_id, token) in enumerate(tokens):
+            if self.is_step_token(token):
+                step = self.token_to_step(token) + step_offset
+                next_token_id = token_id + 1
+                while next_token_id < len(tokens) and self.is_pitch_token(tokens[next_token_id]):
+                    pitch = self.token_to_pitch(tokens[next_token_id])
+                    result.append((step, pitch))
+                    next_token_id += 1
+        return result
+    def convert_events_to_section_events(self, events: [[int, int]], section: str) -> [[int, int]]:
+        offset = self.step_offset_for_section(section)
+        return list(map(lambda x: (x[0] + offset, x[1]), events))
+    def generate_events(self, temperature: float) -> dict:
+        genre_section_data = self.make_classification_inference(temperature=temperature)
+        genre = genre_section_data["genre"]
+        section = genre_section_data["section"]
+        print("Classification results")
+        print("======================")
+        print("Found genre: {0}".format(genre))
+        print("Found section: {0}".format(section))
+        print("======================")
+        all_events: [[int, int]] = []
+        all_events += list(map(lambda x: (x[0] + self.section_to_step_offset(section=section), x[1]) ,self.__events))
+        if section not in self.__sections:
+            raise Exception("Section {0} not found in sections".format(section))
+        other_sections = list(filter(lambda x: x != section, self.__sections))
+        for other_section in other_sections:
+            prompt = self.tokens_to_section_prompt(tokens=self.__events_tokens, section=section, prompted_section=other_section)
+            events = self.make_section_events_inference(prompt=prompt, temperature=temperature, section=other_section, known_section=section)
+            all_events += events
+        return {
+            "events": all_events,
+            "genre": genre
+        }
+    def tokens_to_genre_and_section_information(self, tokens: [str]) -> dict:
+        genre: str = ""
+        section: str = ""
+        for token in tokens:
+            if self.is_genre_token(token):
+                genre = self.token_to_genre(token)
+            elif self.is_section_token(token):
+                section = self.token_to_section(token)
+        return { "genre": genre, "section": section }
+    def make_classification_inference(self, temperature: float) -> dict:
+        genre_and_section_prompt = self.tokens_to_classification_prompt(self.__events_tokens)
+        prompt = self.__tokenizer.encode(genre_and_section_prompt, add_special_tokens=True, return_tensors="pt")
+        generated_section_genre_sequence = self.__model.generate(
+            prompt,
+            max_length=1024,
+            do_sample=True,
+            temperature=0.1,
+            num_return_sequences=1,
+        )
+        section_genre_result = self.__tokenizer.decode(generated_section_genre_sequence[0], skip_special_tokens=True)
+        assert len(section_genre_result) > 0, "Empty result"
+        genre_section_data = self.tokens_to_genre_and_section_information(section_genre_result.split(" "))
+        return genre_section_data
+    def make_section_events_inference(self, prompt: str, section: str, temperature: float, known_section: str) -> [[int, int]]:
+        tokenised_prompt = self.__tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt")
+        assert len(tokenised_prompt[0]) <= 1024, "Prompt length exceeds maximum sequence length"
+        generated_sequence = self.__model.generate(
+                tokenised_prompt,
+                max_length=1024,
+                do_sample=True,
+                temperature=temperature,
+                num_return_sequences=1,
+        )
+        result = self.__tokenizer.decode(
+                generated_sequence[0], skip_special_tokens=True
+        )
+        events = self.tokens_to_section_events(tokens=result.split(" "), section=section)
+        # Fallback option when inference fails (sometimes the model generates a sequence that doesn't contain the section)
+        if len(events) == 0:
+            events = self.tokens_to_section_events(tokens=result.split(" "), section=known_section, step_offset=self.section_to_step_offset(section=section))
+        assert len(events) > 0, "Empty result"
+        return events

model/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.1",
+  "use_cache": true,
+  "vocab_size": 50321
+}

model/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.28.1"
+}

model/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e51cee355f39d25c000d17d50c4313dd1787f086ce23191b8a495f9c33a82b
+size 510594621

model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94b82dda5c87ea468fb088c62a5c04ce9158aed8f35b34ed6e7ab193f4cb4c8f
+size 3707

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+MIDIUtil
+transformers
+torch

tokenizer/added_tokens.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+  "\n\n###\n\n": 50320,
+  "genre:DHouse": 50314,
+  "genre:Trap": 50313,
+  "pitch:36": 50289,
+  "pitch:37": 50290,
+  "pitch:38": 50291,
+  "pitch:39": 50292,
+  "pitch:40": 50293,
+  "pitch:41": 50294,
+  "pitch:42": 50295,
+  "pitch:43": 50296,
+  "pitch:44": 50297,
+  "pitch:45": 50298,
+  "pitch:46": 50299,
+  "pitch:47": 50300,
+  "pitch:48": 50301,
+  "pitch:49": 50302,
+  "pitch:50": 50303,
+  "pitch:51": 50304,
+  "pitch:52": 50305,
+  "pitch:53": 50306,
+  "pitch:54": 50307,
+  "pitch:55": 50308,
+  "pitch:56": 50309,
+  "pitch:57": 50310,
+  "pitch:58": 50311,
+  "pitch:59": 50312,
+  "section:a": 50315,
+  "section:b": 50316,
+  "section:c": 50317,
+  "section:d": 50318,
+  "step:0": 50257,
+  "step:1": 50258,
+  "step:10": 50267,
+  "step:11": 50268,
+  "step:12": 50269,
+  "step:13": 50270,
+  "step:14": 50271,
+  "step:15": 50272,
+  "step:16": 50273,
+  "step:17": 50274,
+  "step:18": 50275,
+  "step:19": 50276,
+  "step:2": 50259,
+  "step:20": 50277,
+  "step:21": 50278,
+  "step:22": 50279,
+  "step:23": 50280,
+  "step:24": 50281,
+  "step:25": 50282,
+  "step:26": 50283,
+  "step:27": 50284,
+  "step:28": 50285,
+  "step:29": 50286,
+  "step:3": 50260,
+  "step:30": 50287,
+  "step:31": 50288,
+  "step:4": 50261,
+  "step:5": 50262,
+  "step:6": 50263,
+  "step:7": 50264,
+  "step:8": 50265,
+  "step:9": 50266,
+  "which_genre_section": 50319
+}

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff