Instructions to use B-K/song2midi-processor with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use B-K/song2midi-processor with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("B-K/song2midi-processor", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| import os | |
| from pathlib import Path | |
| from typing import Union | |
| from transformers import BatchEncoding, PythonBackend | |
| from transformers.tokenization_utils_base import TruncationStrategy | |
| from transformers.utils.generic import PaddingStrategy, TensorType | |
| try: | |
| from miditok import PerTok, TokSequence | |
| from symusic.types import Score | |
| import symusic | |
| except ImportError: | |
| raise ImportError( | |
| "The `miditok` library is required for processing MIDI files. " | |
| "Please install it with `pip install miditok`." | |
| ) | |
| class Song2MIDIPerTokTokenizer(PythonBackend): | |
| vocab_files_names = {"vocab_file": "vocab.json"} | |
| def __init__( | |
| self, | |
| vocab_file: str | os.PathLike | Path, | |
| unk_token: str = "UNK_None", | |
| bos_token: str = "BOS_None", | |
| eos_token: str = "EOS_None", | |
| pad_token: str = "PAD_None", | |
| **kwargs, | |
| ): | |
| self._tokenizer = PerTok(params=vocab_file) | |
| # PerTok as of miditok version 3.0.6.post1 does not load position token locations from the vocab file. | |
| # use_position_toks workaround | |
| if self._tokenizer.use_position_toks and not getattr(self._tokenizer, "position_locations", None): | |
| self._tokenizer.position_locations = self._tokenizer._create_position_tok_locations() | |
| self._decoder = {value: key for key, value in self._tokenizer.vocab.items()} | |
| super().__init__( | |
| unk_token=unk_token, | |
| bos_token=bos_token, | |
| eos_token=eos_token, | |
| pad_token=pad_token, | |
| **kwargs, | |
| ) | |
| def vocab_size(self): | |
| return len(self._tokenizer) | |
| def get_vocab(self): | |
| return self._tokenizer.vocab | |
| def _encode_plus( | |
| self, | |
| text: Union[Score, Path, bytes, list[Union[Score, Path, bytes]], list[int]], | |
| text_pair: Union[Score, Path, list[Union[Score, Path]], list[int], None] = None, | |
| add_special_tokens: bool = True, | |
| padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD, | |
| truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE, | |
| max_length: int | None = None, | |
| stride: int = 0, | |
| pad_to_multiple_of: int | None = None, | |
| padding_side: str | None = None, | |
| return_tensors: str | TensorType | None = None, | |
| return_token_type_ids: bool | None = None, | |
| return_attention_mask: bool | None = None, | |
| return_overflowing_tokens: bool = False, | |
| return_special_tokens_mask: bool = False, | |
| return_length: bool = False, | |
| verbose: bool = True, | |
| **kwargs, | |
| ): # ty: ignore[invalid-method-override] | |
| midi = text | |
| midi_pair = text_pair | |
| # From https://github.com/huggingface/transformers/blob/main/src/transformers/tokenization_python.py (v5.3.0) | |
| is_batched = isinstance(midi, (list, tuple)) and ( | |
| (not midi) or (midi and isinstance(midi[0], (str, Path, Score, bytes))) | |
| ) | |
| if is_batched: | |
| if midi_pair is not None: | |
| if not isinstance(midi_pair, (list, tuple)) or len(midi_pair) != len( | |
| midi | |
| ): | |
| raise ValueError( | |
| "If `midi` is a batch, `midi_pair` must be a batch of the same length." | |
| ) | |
| pairs = midi_pair if midi_pair is not None else [None] * len(midi) | |
| batch_outputs = {} | |
| for current_midi, current_pair in zip(midi, pairs): | |
| current_output = self._encode_plus( | |
| text=current_midi, | |
| text_pair=current_pair, | |
| add_special_tokens=add_special_tokens, | |
| padding_strategy=PaddingStrategy.DO_NOT_PAD, # we pad in batch afterward | |
| truncation_strategy=truncation_strategy, | |
| max_length=max_length, | |
| stride=stride, | |
| pad_to_multiple_of=None, # we pad in batch afterward | |
| padding_side=None, # we pad in batch afterward | |
| return_tensors=None, # we convert the whole batch to tensors at the end | |
| return_token_type_ids=return_token_type_ids, | |
| return_attention_mask=False, # we pad in batch afterward | |
| return_overflowing_tokens=return_overflowing_tokens, | |
| return_special_tokens_mask=return_special_tokens_mask, | |
| return_length=return_length, | |
| verbose=verbose, | |
| **kwargs, | |
| ) | |
| for key, value in current_output.items(): | |
| batch_outputs.setdefault(key, []).append(value) | |
| # Remove overflow-related keys before tensor conversion if return_tensors is set | |
| # Slow tokenizers don't support returning these as tensors | |
| if return_tensors and return_overflowing_tokens: | |
| batch_outputs.pop("overflowing_tokens", None) | |
| batch_outputs.pop("num_truncated_tokens", None) | |
| batch_outputs = self.pad( | |
| batch_outputs, | |
| padding=padding_strategy.value, | |
| max_length=max_length, | |
| pad_to_multiple_of=pad_to_multiple_of, | |
| padding_side=padding_side, | |
| return_attention_mask=return_attention_mask, | |
| ) | |
| return BatchEncoding(batch_outputs, tensor_type=return_tensors) | |
| # Single sequence handling | |
| def get_input_ids(midi_input): | |
| if not midi_input: | |
| return [] | |
| if isinstance(midi_input, (str, Path, Score, bytes)): | |
| if isinstance(midi_input, bytes): | |
| midi_input = symusic.Score.from_midi(midi_input) | |
| return self._tokenizer.encode(midi_input).ids | |
| if isinstance(midi_input, (list, tuple)) and midi_input: | |
| if isinstance(midi_input[0], int): | |
| return midi_input | |
| raise ValueError( | |
| "Input must be a Score, a path to a MIDI file, or a list of token IDs." | |
| ) | |
| first_ids = get_input_ids(midi) | |
| second_ids = get_input_ids(midi_pair) if midi_pair is not None else None | |
| return self.prepare_for_model( | |
| first_ids, | |
| pair_ids=second_ids, | |
| add_special_tokens=add_special_tokens, | |
| padding=padding_strategy.value, | |
| truncation=truncation_strategy.value, | |
| max_length=max_length, | |
| stride=stride, | |
| pad_to_multiple_of=pad_to_multiple_of, | |
| padding_side=padding_side, | |
| prepend_batch_axis=True, | |
| return_attention_mask=return_attention_mask, | |
| return_token_type_ids=return_token_type_ids, | |
| return_overflowing_tokens=return_overflowing_tokens, | |
| return_special_tokens_mask=return_special_tokens_mask, | |
| return_length=return_length, | |
| verbose=verbose, | |
| ) | |
| def _decode( | |
| self, | |
| token_ids: int | list[int], | |
| skip_special_tokens: bool = False, | |
| clean_up_tokenization_spaces: bool | None = None, | |
| **kwargs, | |
| ) -> str: | |
| if isinstance(token_ids, int): | |
| token_ids = [token_ids] | |
| tok_sequence = TokSequence(ids=token_ids, are_ids_encoded=True) | |
| self._tokenizer.decode_token_ids(tok_sequence) | |
| tokens = [self._decoder[token_id] for token_id in tok_sequence.ids] | |
| if skip_special_tokens: | |
| tokens = [ | |
| token for token in tokens if token not in self._tokenizer.special_tokens | |
| ] | |
| return " ".join(tokens) | |
| def decode_score( | |
| self, | |
| token_ids: int | list[int], | |
| skip_special_tokens: bool = False, | |
| clean_up_tokenization_spaces: bool | None = None, | |
| **kwargs, | |
| ) -> Score: | |
| if isinstance(token_ids, int): | |
| token_ids = [token_ids] | |
| tok_sequence = TokSequence(ids=token_ids, are_ids_encoded=True) | |
| return self._tokenizer.decode(tok_sequence) | |
| def save_vocabulary( | |
| self, save_directory: str, filename_prefix: str | None = None | |
| ) -> tuple[str, ...]: | |
| """Save the MidiTok tokenizer params to disk.""" | |
| if not os.path.isdir(save_directory): | |
| return () | |
| prefix = f"{filename_prefix}-" if filename_prefix else "" | |
| vocab_file = os.path.join(save_directory, prefix + "vocab.json") | |
| # Use MidiTok's own serialization | |
| self._tokenizer.save(vocab_file) | |
| return (vocab_file,) | |