Commit
·
b8b9847
1
Parent(s):
30ce9c6
Debuged generate.py
Browse files- .DS_Store +0 -0
- .gitignore +1 -0
- gen_res/First chunk copy.mid +0 -0
- generate.py +33 -7
- runs/.DS_Store +0 -0
.DS_Store
DELETED
|
Binary file (6.15 kB)
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.DS_Store
|
gen_res/First chunk copy.mid
ADDED
|
Binary file (369 Bytes). View file
|
|
|
generate.py
CHANGED
|
@@ -8,13 +8,40 @@ from torch import Tensor, argmax
|
|
| 8 |
from torch.utils.data import DataLoader
|
| 9 |
from torch.cuda import is_available as cuda_available, is_bf16_supported
|
| 10 |
from torch.backends.mps import is_available as mps_available
|
| 11 |
-
from transformers import AutoModelForCausalLM, MistralConfig, Trainer, TrainingArguments, GenerationConfig, AutoTokenizer,
|
| 12 |
from transformers.trainer_utils import set_seed
|
| 13 |
from evaluate import load as load_metric
|
| 14 |
from miditok import REMI, TokenizerConfig
|
| 15 |
from miditok.pytorch_data import DatasetTok, DataCollator
|
| 16 |
from tqdm import tqdm
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
# Seed
|
| 19 |
set_seed(777)
|
| 20 |
|
|
@@ -39,7 +66,7 @@ collator = DataCollator(
|
|
| 39 |
)
|
| 40 |
|
| 41 |
# Creates model using the correct configuration
|
| 42 |
-
model =
|
| 43 |
|
| 44 |
collator = DataCollator(tokenizer["PAD_None"], tokenizer["BOS_None"], tokenizer["EOS_None"], copy_inputs_as_labels=True)
|
| 45 |
|
|
@@ -53,7 +80,6 @@ generation_config = GenerationConfig(
|
|
| 53 |
top_p=0.95,
|
| 54 |
epsilon_cutoff=3e-4,
|
| 55 |
eta_cutoff=1e-3,
|
| 56 |
-
pad_token_id=tokenizer.padding_token_id,
|
| 57 |
)
|
| 58 |
|
| 59 |
# Here the sequences are padded to the left, so that the last token along the time dimension
|
|
@@ -77,10 +103,10 @@ for batch in tqdm(dataloader_test, desc='Testing model / Generating results'):
|
|
| 77 |
tokens = [seq.tolist() for seq in tokens]
|
| 78 |
for tok_seq in tokens[1:]:
|
| 79 |
_midi = tokenizer.tokens_to_midi([deepcopy(tok_seq)])
|
| 80 |
-
midi.
|
| 81 |
-
midi.
|
| 82 |
-
midi.
|
| 83 |
-
midi.
|
| 84 |
midi.dump_midi(gen_results_path / f'{count}.mid')
|
| 85 |
tokenizer.save_tokens(tokens, gen_results_path / f'{count}.json')
|
| 86 |
|
|
|
|
| 8 |
from torch.utils.data import DataLoader
|
| 9 |
from torch.cuda import is_available as cuda_available, is_bf16_supported
|
| 10 |
from torch.backends.mps import is_available as mps_available
|
| 11 |
+
from transformers import AutoModelForCausalLM, MistralConfig, Trainer, TrainingArguments, GenerationConfig, AutoTokenizer, MistralForCausalLM
|
| 12 |
from transformers.trainer_utils import set_seed
|
| 13 |
from evaluate import load as load_metric
|
| 14 |
from miditok import REMI, TokenizerConfig
|
| 15 |
from miditok.pytorch_data import DatasetTok, DataCollator
|
| 16 |
from tqdm import tqdm
|
| 17 |
|
| 18 |
+
# Our tokenizer's configuration
|
| 19 |
+
PITCH_RANGE = (21, 109)
|
| 20 |
+
BEAT_RES = {(0, 1): 8, (1, 2): 4, (2, 4): 2, (4, 8): 1}
|
| 21 |
+
NUM_VELOCITIES = 24
|
| 22 |
+
SPECIAL_TOKENS = ["PAD", "MASK", "BOS", "EOS"]
|
| 23 |
+
USE_CHORDS = False
|
| 24 |
+
USE_RESTS = False
|
| 25 |
+
USE_TEMPOS = True
|
| 26 |
+
USE_TIME_SIGNATURE = False
|
| 27 |
+
USE_PROGRAMS = False
|
| 28 |
+
NUM_TEMPOS = 32
|
| 29 |
+
TEMPO_RANGE = (50, 200) # (min_tempo, max_tempo)
|
| 30 |
+
TOKENIZER_PARAMS = {
|
| 31 |
+
"pitch_range": PITCH_RANGE,
|
| 32 |
+
"beat_res": BEAT_RES,
|
| 33 |
+
"num_velocities": NUM_VELOCITIES,
|
| 34 |
+
"special_tokens": SPECIAL_TOKENS,
|
| 35 |
+
"use_chords": USE_CHORDS,
|
| 36 |
+
"use_rests": USE_RESTS,
|
| 37 |
+
"use_tempos": USE_TEMPOS,
|
| 38 |
+
"use_time_signatures": USE_TIME_SIGNATURE,
|
| 39 |
+
"use_programs": USE_PROGRAMS,
|
| 40 |
+
"num_tempos": NUM_TEMPOS,
|
| 41 |
+
"tempo_range": TEMPO_RANGE,
|
| 42 |
+
}
|
| 43 |
+
config = TokenizerConfig(**TOKENIZER_PARAMS)
|
| 44 |
+
|
| 45 |
# Seed
|
| 46 |
set_seed(777)
|
| 47 |
|
|
|
|
| 66 |
)
|
| 67 |
|
| 68 |
# Creates model using the correct configuration
|
| 69 |
+
model = MistralForCausalLM.from_pretrained("./runs")
|
| 70 |
|
| 71 |
collator = DataCollator(tokenizer["PAD_None"], tokenizer["BOS_None"], tokenizer["EOS_None"], copy_inputs_as_labels=True)
|
| 72 |
|
|
|
|
| 80 |
top_p=0.95,
|
| 81 |
epsilon_cutoff=3e-4,
|
| 82 |
eta_cutoff=1e-3,
|
|
|
|
| 83 |
)
|
| 84 |
|
| 85 |
# Here the sequences are padded to the left, so that the last token along the time dimension
|
|
|
|
| 103 |
tokens = [seq.tolist() for seq in tokens]
|
| 104 |
for tok_seq in tokens[1:]:
|
| 105 |
_midi = tokenizer.tokens_to_midi([deepcopy(tok_seq)])
|
| 106 |
+
midi.tracks.append(_midi.tracks[0])
|
| 107 |
+
midi.tracks[0].name = f'Continuation of original sample ({len(generated)} tokens)'
|
| 108 |
+
midi.tracks[1].name = f'Original sample ({len(prompt)} tokens)'
|
| 109 |
+
midi.tracks[2].name = f'Original sample and continuation'
|
| 110 |
midi.dump_midi(gen_results_path / f'{count}.mid')
|
| 111 |
tokenizer.save_tokens(tokens, gen_results_path / f'{count}.json')
|
| 112 |
|
runs/.DS_Store
DELETED
|
Binary file (6.15 kB)
|
|
|