Spaces:
Sleeping
Sleeping
Init
Browse files- .gitattributes +1 -0
- MIDI.py +1735 -0
- README.md +47 -5
- TimGM6mb.sf2 +3 -0
- app.py +934 -0
- gitattributes +2 -0
- gitignore +2 -0
- meltysynth.py +0 -0
- model_base_int8.onnx +3 -0
- model_token_int8.onnx +3 -0
- requirements.txt +13 -0
- skytnt_generator.py +433 -0
- skytnt_tokenizer.py +1196 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
TimGM6mb.sf2 filter=lfs diff=lfs merge=lfs -text
|
MIDI.py
ADDED
|
@@ -0,0 +1,1735 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#! /usr/bin/python3
|
| 2 |
+
# unsupported 20091104 ...
|
| 3 |
+
# ['set_sequence_number', dtime, sequence]
|
| 4 |
+
# ['raw_data', dtime, raw]
|
| 5 |
+
|
| 6 |
+
# 20150914 jimbo1qaz MIDI.py str/bytes bug report
|
| 7 |
+
# I found a MIDI file which had Shift-JIS titles. When midi.py decodes it as
|
| 8 |
+
# latin-1, it produces a string which cannot even be accessed without raising
|
| 9 |
+
# a UnicodeDecodeError. Maybe, when converting raw byte strings from MIDI,
|
| 10 |
+
# you should keep them as bytes, not improperly decode them. However, this
|
| 11 |
+
# would change the API. (ie: text = a "string" ? of 0 or more bytes). It
|
| 12 |
+
# could break compatiblity, but there's not much else you can do to fix the bug
|
| 13 |
+
# https://en.wikipedia.org/wiki/Shift_JIS
|
| 14 |
+
|
| 15 |
+
r'''
|
| 16 |
+
This module offers functions: concatenate_scores(), grep(),
|
| 17 |
+
merge_scores(), mix_scores(), midi2opus(), midi2score(), opus2midi(),
|
| 18 |
+
opus2score(), play_score(), score2midi(), score2opus(), score2stats(),
|
| 19 |
+
score_type(), segment(), timeshift() and to_millisecs(),
|
| 20 |
+
where "midi" means the MIDI-file bytes (as can be put in a .mid file,
|
| 21 |
+
or piped into aplaymidi), and "opus" and "score" are list-structures
|
| 22 |
+
as inspired by Sean Burke's MIDI-Perl CPAN module.
|
| 23 |
+
|
| 24 |
+
Warning: Version 6.4 is not necessarily backward-compatible with
|
| 25 |
+
previous versions, in that text-data is now bytes, not strings.
|
| 26 |
+
This reflects the fact that many MIDI files have text data in
|
| 27 |
+
encodings other that ISO-8859-1, for example in Shift-JIS.
|
| 28 |
+
|
| 29 |
+
Download MIDI.py from http://www.pjb.com.au/midi/free/MIDI.py
|
| 30 |
+
and put it in your PYTHONPATH. MIDI.py depends on Python3.
|
| 31 |
+
|
| 32 |
+
There is also a call-compatible translation into Lua of this
|
| 33 |
+
module: see http://www.pjb.com.au/comp/lua/MIDI.html
|
| 34 |
+
|
| 35 |
+
The "opus" is a direct translation of the midi-file-events, where
|
| 36 |
+
the times are delta-times, in ticks, since the previous event.
|
| 37 |
+
|
| 38 |
+
The "score" is more human-centric; it uses absolute times, and
|
| 39 |
+
combines the separate note_on and note_off events into one "note"
|
| 40 |
+
event, with a duration:
|
| 41 |
+
['note', start_time, duration, channel, note, velocity] # in a "score"
|
| 42 |
+
|
| 43 |
+
EVENTS (in an "opus" structure)
|
| 44 |
+
['note_off', dtime, channel, note, velocity] # in an "opus"
|
| 45 |
+
['note_on', dtime, channel, note, velocity] # in an "opus"
|
| 46 |
+
['key_after_touch', dtime, channel, note, velocity]
|
| 47 |
+
['control_change', dtime, channel, controller(0-127), value(0-127)]
|
| 48 |
+
['patch_change', dtime, channel, patch]
|
| 49 |
+
['channel_after_touch', dtime, channel, velocity]
|
| 50 |
+
['pitch_wheel_change', dtime, channel, pitch_wheel]
|
| 51 |
+
['text_event', dtime, text]
|
| 52 |
+
['copyright_text_event', dtime, text]
|
| 53 |
+
['track_name', dtime, text]
|
| 54 |
+
['instrument_name', dtime, text]
|
| 55 |
+
['lyric', dtime, text]
|
| 56 |
+
['marker', dtime, text]
|
| 57 |
+
['cue_point', dtime, text]
|
| 58 |
+
['text_event_08', dtime, text]
|
| 59 |
+
['text_event_09', dtime, text]
|
| 60 |
+
['text_event_0a', dtime, text]
|
| 61 |
+
['text_event_0b', dtime, text]
|
| 62 |
+
['text_event_0c', dtime, text]
|
| 63 |
+
['text_event_0d', dtime, text]
|
| 64 |
+
['text_event_0e', dtime, text]
|
| 65 |
+
['text_event_0f', dtime, text]
|
| 66 |
+
['end_track', dtime]
|
| 67 |
+
['set_tempo', dtime, tempo]
|
| 68 |
+
['smpte_offset', dtime, hr, mn, se, fr, ff]
|
| 69 |
+
['time_signature', dtime, nn, dd, cc, bb]
|
| 70 |
+
['key_signature', dtime, sf, mi]
|
| 71 |
+
['sequencer_specific', dtime, raw]
|
| 72 |
+
['raw_meta_event', dtime, command(0-255), raw]
|
| 73 |
+
['sysex_f0', dtime, raw]
|
| 74 |
+
['sysex_f7', dtime, raw]
|
| 75 |
+
['song_position', dtime, song_pos]
|
| 76 |
+
['song_select', dtime, song_number]
|
| 77 |
+
['tune_request', dtime]
|
| 78 |
+
|
| 79 |
+
DATA TYPES
|
| 80 |
+
channel = a value 0 to 15
|
| 81 |
+
controller = 0 to 127 (see http://www.pjb.com.au/muscript/gm.html#cc )
|
| 82 |
+
dtime = time measured in "ticks", 0 to 268435455
|
| 83 |
+
velocity = a value 0 (soft) to 127 (loud)
|
| 84 |
+
note = a value 0 to 127 (middle-C is 60)
|
| 85 |
+
patch = 0 to 127 (see http://www.pjb.com.au/muscript/gm.html )
|
| 86 |
+
pitch_wheel = a value -8192 to 8191 (0x1FFF)
|
| 87 |
+
raw = bytes, of length 0 or more (for sysex events see below)
|
| 88 |
+
sequence_number = a value 0 to 65,535 (0xFFFF)
|
| 89 |
+
song_pos = a value 0 to 16,383 (0x3FFF)
|
| 90 |
+
song_number = a value 0 to 127
|
| 91 |
+
tempo = microseconds per crochet (quarter-note), 0 to 16777215
|
| 92 |
+
text = bytes, of length 0 or more
|
| 93 |
+
ticks = the number of ticks per crochet (quarter-note)
|
| 94 |
+
|
| 95 |
+
In sysex_f0 events, the raw data must not start with a \xF0 byte,
|
| 96 |
+
since this gets added automatically;
|
| 97 |
+
but it must end with an explicit \xF7 byte!
|
| 98 |
+
In the very unlikely case that you ever need to split sysex data
|
| 99 |
+
into one sysex_f0 followed by one or more sysex_f7s, then only the
|
| 100 |
+
last of those sysex_f7 events must end with the explicit \xF7 byte
|
| 101 |
+
(again, the raw data of individual sysex_f7 events must not start
|
| 102 |
+
with any \xF7 byte, since this gets added automatically).
|
| 103 |
+
|
| 104 |
+
Since version 6.4, text data is in bytes, not in a ISO-8859-1 string.
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
GOING THROUGH A SCORE WITHIN A PYTHON PROGRAM
|
| 108 |
+
channels = {2,3,5,8,13}
|
| 109 |
+
itrack = 1 # skip 1st element which is ticks
|
| 110 |
+
while itrack < len(score):
|
| 111 |
+
for event in score[itrack]:
|
| 112 |
+
if event[0] == 'note': # for example,
|
| 113 |
+
pass # do something to all notes
|
| 114 |
+
# or, to work on events in only particular channels...
|
| 115 |
+
channel_index = MIDI.Event2channelindex.get(event[0], False)
|
| 116 |
+
if channel_index and (event[channel_index] in channels):
|
| 117 |
+
pass # do something to channels 2,3,5,8 and 13
|
| 118 |
+
itrack += 1
|
| 119 |
+
|
| 120 |
+
'''
|
| 121 |
+
|
| 122 |
+
import sys, struct, copy
|
| 123 |
+
# sys.stdout = os.fdopen(sys.stdout.fileno(), 'wb')
|
| 124 |
+
Version = '6.7'
|
| 125 |
+
VersionDate = '20201120'
|
| 126 |
+
# 20201120 6.7 call to bytest() removed, and protect _unshift_ber_int
|
| 127 |
+
# 20160702 6.6 to_millisecs() now handles set_tempo across multiple Tracks
|
| 128 |
+
# 20150921 6.5 segment restores controllers as well as patch and tempo
|
| 129 |
+
# 20150914 6.4 text data is bytes or bytearray, not ISO-8859-1 strings
|
| 130 |
+
# 20150628 6.3 absent any set_tempo, default is 120bpm (see MIDI file spec 1.1)
|
| 131 |
+
# 20150101 6.2 all text events can be 8-bit; let user get the right encoding
|
| 132 |
+
# 20141231 6.1 fix _some_text_event; sequencer_specific data can be 8-bit
|
| 133 |
+
# 20141230 6.0 synth_specific data can be 8-bit
|
| 134 |
+
# 20120504 5.9 add the contents of mid_opus_tracks()
|
| 135 |
+
# 20120208 5.8 fix num_notes_by_channel() ; should be a dict
|
| 136 |
+
# 20120129 5.7 _encode handles empty tracks; score2stats num_notes_by_channel
|
| 137 |
+
# 20111111 5.6 fix patch 45 and 46 in Number2patch, should be Harp
|
| 138 |
+
# 20110129 5.5 add mix_opus_tracks() and event2alsaseq()
|
| 139 |
+
# 20110126 5.4 "previous message repeated N times" to save space on stderr
|
| 140 |
+
# 20110125 5.2 opus2score terminates unended notes at the end of the track
|
| 141 |
+
# 20110124 5.1 the warnings in midi2opus display track_num
|
| 142 |
+
# 21110122 5.0 if garbage, midi2opus returns the opus so far
|
| 143 |
+
# 21110119 4.9 non-ascii chars stripped out of the text_events
|
| 144 |
+
# 21110110 4.8 note_on with velocity=0 treated as a note-off
|
| 145 |
+
# 21110108 4.6 unknown F-series event correctly eats just one byte
|
| 146 |
+
# 21011010 4.2 segment() uses start_time, end_time named params
|
| 147 |
+
# 21011005 4.1 timeshift() must not pad the set_tempo command
|
| 148 |
+
# 21011003 4.0 pitch2note_event must be chapitch2note_event
|
| 149 |
+
# 21010918 3.9 set_sequence_number supported, FWIW
|
| 150 |
+
# 20100913 3.7 many small bugfixes; passes all tests
|
| 151 |
+
# 20100910 3.6 concatenate_scores enforce ticks=1000, just like merge_scores
|
| 152 |
+
# 20100908 3.5 minor bugs fixed in score2stats
|
| 153 |
+
# 20091104 3.4 tune_request now supported
|
| 154 |
+
# 20091104 3.3 fixed bug in decoding song_position and song_select
|
| 155 |
+
# 20091104 3.2 unsupported: set_sequence_number tune_request raw_data
|
| 156 |
+
# 20091101 3.1 document how to traverse a score within Python
|
| 157 |
+
# 20091021 3.0 fixed bug in score2stats detecting GM-mode = 0
|
| 158 |
+
# 20091020 2.9 score2stats reports GM-mode and bank msb,lsb events
|
| 159 |
+
# 20091019 2.8 in merge_scores, channel 9 must remain channel 9 (in GM)
|
| 160 |
+
# 20091018 2.7 handles empty tracks gracefully
|
| 161 |
+
# 20091015 2.6 grep() selects channels
|
| 162 |
+
# 20091010 2.5 merge_scores reassigns channels to avoid conflicts
|
| 163 |
+
# 20091010 2.4 fixed bug in to_millisecs which now only does opusses
|
| 164 |
+
# 20091010 2.3 score2stats returns channels & patch_changes, by_track & total
|
| 165 |
+
# 20091010 2.2 score2stats() returns also pitches and percussion dicts
|
| 166 |
+
# 20091010 2.1 bugs: >= not > in segment, to notice patch_change at time 0
|
| 167 |
+
# 20091010 2.0 bugs: spurious pop(0) ( in _decode sysex
|
| 168 |
+
# 20091008 1.9 bugs: ISO decoding in sysex; str( not int( in note-off warning
|
| 169 |
+
# 20091008 1.8 add concatenate_scores()
|
| 170 |
+
# 20091006 1.7 score2stats() measures nticks and ticks_per_quarter
|
| 171 |
+
# 20091004 1.6 first mix_scores() and merge_scores()
|
| 172 |
+
# 20090424 1.5 timeshift() bugfix: earliest only sees events after from_time
|
| 173 |
+
# 20090330 1.4 timeshift() has also a from_time argument
|
| 174 |
+
# 20090322 1.3 timeshift() has also a start_time argument
|
| 175 |
+
# 20090319 1.2 add segment() and timeshift()
|
| 176 |
+
# 20090301 1.1 add to_millisecs()
|
| 177 |
+
|
| 178 |
+
_previous_warning = '' # 5.4
|
| 179 |
+
_previous_times = 0 # 5.4
|
| 180 |
+
_no_warning = True
|
| 181 |
+
#------------------------------- Encoding stuff --------------------------
|
| 182 |
+
|
| 183 |
+
def opus2midi(opus=[]):
|
| 184 |
+
r'''The argument is a list: the first item in the list is the "ticks"
|
| 185 |
+
parameter, the others are the tracks. Each track is a list
|
| 186 |
+
of midi-events, and each event is itself a list; see above.
|
| 187 |
+
opus2midi() returns a bytestring of the MIDI, which can then be
|
| 188 |
+
written either to a file opened in binary mode (mode='wb'),
|
| 189 |
+
or to stdout by means of: sys.stdout.buffer.write()
|
| 190 |
+
|
| 191 |
+
my_opus = [
|
| 192 |
+
96,
|
| 193 |
+
[ # track 0:
|
| 194 |
+
['patch_change', 0, 1, 8], # and these are the events...
|
| 195 |
+
['note_on', 5, 1, 25, 96],
|
| 196 |
+
['note_off', 96, 1, 25, 0],
|
| 197 |
+
['note_on', 0, 1, 29, 96],
|
| 198 |
+
['note_off', 96, 1, 29, 0],
|
| 199 |
+
], # end of track 0
|
| 200 |
+
]
|
| 201 |
+
my_midi = opus2midi(my_opus)
|
| 202 |
+
sys.stdout.buffer.write(my_midi)
|
| 203 |
+
'''
|
| 204 |
+
if len(opus) < 2:
|
| 205 |
+
opus=[1000, [],]
|
| 206 |
+
tracks = copy.deepcopy(opus)
|
| 207 |
+
ticks = int(tracks.pop(0))
|
| 208 |
+
ntracks = len(tracks)
|
| 209 |
+
if ntracks == 1:
|
| 210 |
+
format = 0
|
| 211 |
+
else:
|
| 212 |
+
format = 1
|
| 213 |
+
|
| 214 |
+
my_midi = b"MThd\x00\x00\x00\x06"+struct.pack('>HHH',format,ntracks,ticks)
|
| 215 |
+
for track in tracks:
|
| 216 |
+
events = _encode(track)
|
| 217 |
+
my_midi += b'MTrk' + struct.pack('>I',len(events)) + events
|
| 218 |
+
_clean_up_warnings()
|
| 219 |
+
return my_midi
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def score2opus(score=None):
|
| 223 |
+
r'''
|
| 224 |
+
The argument is a list: the first item in the list is the "ticks"
|
| 225 |
+
parameter, the others are the tracks. Each track is a list
|
| 226 |
+
of score-events, and each event is itself a list. A score-event
|
| 227 |
+
is similar to an opus-event (see above), except that in a score:
|
| 228 |
+
1) the times are expressed as an absolute number of ticks
|
| 229 |
+
from the track's start time
|
| 230 |
+
2) the pairs of 'note_on' and 'note_off' events in an "opus"
|
| 231 |
+
are abstracted into a single 'note' event in a "score":
|
| 232 |
+
['note', start_time, duration, channel, pitch, velocity]
|
| 233 |
+
score2opus() returns a list specifying the equivalent "opus".
|
| 234 |
+
|
| 235 |
+
my_score = [
|
| 236 |
+
96,
|
| 237 |
+
[ # track 0:
|
| 238 |
+
['patch_change', 0, 1, 8],
|
| 239 |
+
['note', 5, 96, 1, 25, 96],
|
| 240 |
+
['note', 101, 96, 1, 29, 96]
|
| 241 |
+
], # end of track 0
|
| 242 |
+
]
|
| 243 |
+
my_opus = score2opus(my_score)
|
| 244 |
+
'''
|
| 245 |
+
if len(score) < 2:
|
| 246 |
+
score=[1000, [],]
|
| 247 |
+
tracks = copy.deepcopy(score)
|
| 248 |
+
ticks = int(tracks.pop(0))
|
| 249 |
+
opus_tracks = []
|
| 250 |
+
for scoretrack in tracks:
|
| 251 |
+
time2events = dict([])
|
| 252 |
+
for scoreevent in scoretrack:
|
| 253 |
+
if scoreevent[0] == 'note':
|
| 254 |
+
note_on_event = ['note_on',scoreevent[1],
|
| 255 |
+
scoreevent[3],scoreevent[4],scoreevent[5]]
|
| 256 |
+
note_off_event = ['note_off',scoreevent[1]+scoreevent[2],
|
| 257 |
+
scoreevent[3],scoreevent[4],scoreevent[5]]
|
| 258 |
+
if time2events.get(note_on_event[1]):
|
| 259 |
+
time2events[note_on_event[1]].append(note_on_event)
|
| 260 |
+
else:
|
| 261 |
+
time2events[note_on_event[1]] = [note_on_event,]
|
| 262 |
+
if time2events.get(note_off_event[1]):
|
| 263 |
+
time2events[note_off_event[1]].append(note_off_event)
|
| 264 |
+
else:
|
| 265 |
+
time2events[note_off_event[1]] = [note_off_event,]
|
| 266 |
+
continue
|
| 267 |
+
if time2events.get(scoreevent[1]):
|
| 268 |
+
time2events[scoreevent[1]].append(scoreevent)
|
| 269 |
+
else:
|
| 270 |
+
time2events[scoreevent[1]] = [scoreevent,]
|
| 271 |
+
|
| 272 |
+
sorted_times = [] # list of keys
|
| 273 |
+
for k in time2events.keys():
|
| 274 |
+
sorted_times.append(k)
|
| 275 |
+
sorted_times.sort()
|
| 276 |
+
|
| 277 |
+
sorted_events = [] # once-flattened list of values sorted by key
|
| 278 |
+
for time in sorted_times:
|
| 279 |
+
sorted_events.extend(time2events[time])
|
| 280 |
+
|
| 281 |
+
abs_time = 0
|
| 282 |
+
for event in sorted_events: # convert abs times => delta times
|
| 283 |
+
delta_time = event[1] - abs_time
|
| 284 |
+
abs_time = event[1]
|
| 285 |
+
event[1] = delta_time
|
| 286 |
+
opus_tracks.append(sorted_events)
|
| 287 |
+
opus_tracks.insert(0,ticks)
|
| 288 |
+
_clean_up_warnings()
|
| 289 |
+
return opus_tracks
|
| 290 |
+
|
| 291 |
+
def score2midi(score=None):
|
| 292 |
+
r'''
|
| 293 |
+
Translates a "score" into MIDI, using score2opus() then opus2midi()
|
| 294 |
+
'''
|
| 295 |
+
return opus2midi(score2opus(score))
|
| 296 |
+
|
| 297 |
+
#--------------------------- Decoding stuff ------------------------
|
| 298 |
+
|
| 299 |
+
def midi2opus(midi=b''):
|
| 300 |
+
r'''Translates MIDI into a "opus". For a description of the
|
| 301 |
+
"opus" format, see opus2midi()
|
| 302 |
+
'''
|
| 303 |
+
my_midi=bytearray(midi)
|
| 304 |
+
if len(my_midi) < 4:
|
| 305 |
+
_clean_up_warnings()
|
| 306 |
+
return [1000,[],]
|
| 307 |
+
id = bytes(my_midi[0:4])
|
| 308 |
+
if id != b'MThd':
|
| 309 |
+
_warn("midi2opus: midi starts with "+str(id)+" instead of 'MThd'")
|
| 310 |
+
_clean_up_warnings()
|
| 311 |
+
return [1000,[],]
|
| 312 |
+
[length, format, tracks_expected, ticks] = struct.unpack(
|
| 313 |
+
'>IHHH', bytes(my_midi[4:14]))
|
| 314 |
+
if length != 6:
|
| 315 |
+
_warn("midi2opus: midi header length was "+str(length)+" instead of 6")
|
| 316 |
+
_clean_up_warnings()
|
| 317 |
+
return [1000,[],]
|
| 318 |
+
my_opus = [ticks,]
|
| 319 |
+
my_midi = my_midi[14:]
|
| 320 |
+
track_num = 1 # 5.1
|
| 321 |
+
while len(my_midi) >= 8:
|
| 322 |
+
track_type = bytes(my_midi[0:4])
|
| 323 |
+
if track_type != b'MTrk':
|
| 324 |
+
_warn('midi2opus: Warning: track #'+str(track_num)+' type is '+str(track_type)+" instead of b'MTrk'")
|
| 325 |
+
[track_length] = struct.unpack('>I', my_midi[4:8])
|
| 326 |
+
my_midi = my_midi[8:]
|
| 327 |
+
if track_length > len(my_midi):
|
| 328 |
+
_warn('midi2opus: track #'+str(track_num)+' length '+str(track_length)+' is too large')
|
| 329 |
+
_clean_up_warnings()
|
| 330 |
+
return my_opus # 5.0
|
| 331 |
+
my_midi_track = my_midi[0:track_length]
|
| 332 |
+
my_track = _decode(my_midi_track)
|
| 333 |
+
my_opus.append(my_track)
|
| 334 |
+
my_midi = my_midi[track_length:]
|
| 335 |
+
track_num += 1 # 5.1
|
| 336 |
+
_clean_up_warnings()
|
| 337 |
+
return my_opus
|
| 338 |
+
|
| 339 |
+
def opus2score(opus=[]):
|
| 340 |
+
r'''For a description of the "opus" and "score" formats,
|
| 341 |
+
see opus2midi() and score2opus().
|
| 342 |
+
'''
|
| 343 |
+
if len(opus) < 2:
|
| 344 |
+
_clean_up_warnings()
|
| 345 |
+
return [1000,[],]
|
| 346 |
+
tracks = copy.deepcopy(opus) # couple of slices probably quicker...
|
| 347 |
+
ticks = int(tracks.pop(0))
|
| 348 |
+
score = [ticks,]
|
| 349 |
+
for opus_track in tracks:
|
| 350 |
+
ticks_so_far = 0
|
| 351 |
+
score_track = []
|
| 352 |
+
chapitch2note_on_events = dict([]) # 4.0
|
| 353 |
+
for opus_event in opus_track:
|
| 354 |
+
ticks_so_far += opus_event[1]
|
| 355 |
+
if opus_event[0] == 'note_off' or (opus_event[0] == 'note_on' and opus_event[4] == 0): # 4.8
|
| 356 |
+
cha = opus_event[2]
|
| 357 |
+
pitch = opus_event[3]
|
| 358 |
+
key = cha*128 + pitch
|
| 359 |
+
if chapitch2note_on_events.get(key):
|
| 360 |
+
new_event = chapitch2note_on_events[key].pop(0)
|
| 361 |
+
new_event[2] = ticks_so_far - new_event[1]
|
| 362 |
+
score_track.append(new_event)
|
| 363 |
+
elif pitch > 127:
|
| 364 |
+
pass #_warn('opus2score: note_off with no note_on, bad pitch='+str(pitch))
|
| 365 |
+
else:
|
| 366 |
+
pass #_warn('opus2score: note_off with no note_on cha='+str(cha)+' pitch='+str(pitch))
|
| 367 |
+
elif opus_event[0] == 'note_on':
|
| 368 |
+
cha = opus_event[2]
|
| 369 |
+
pitch = opus_event[3]
|
| 370 |
+
key = cha*128 + pitch
|
| 371 |
+
new_event = ['note',ticks_so_far,0,cha,pitch, opus_event[4]]
|
| 372 |
+
if chapitch2note_on_events.get(key):
|
| 373 |
+
chapitch2note_on_events[key].append(new_event)
|
| 374 |
+
else:
|
| 375 |
+
chapitch2note_on_events[key] = [new_event,]
|
| 376 |
+
else:
|
| 377 |
+
opus_event[1] = ticks_so_far
|
| 378 |
+
score_track.append(opus_event)
|
| 379 |
+
# check for unterminated notes (Oisín) -- 5.2
|
| 380 |
+
for chapitch in chapitch2note_on_events:
|
| 381 |
+
note_on_events = chapitch2note_on_events[chapitch]
|
| 382 |
+
for new_e in note_on_events:
|
| 383 |
+
new_e[2] = ticks_so_far - new_e[1]
|
| 384 |
+
score_track.append(new_e)
|
| 385 |
+
pass #_warn("opus2score: note_on with no note_off cha="+str(new_e[3])+' pitch='+str(new_e[4])+'; adding note_off at end')
|
| 386 |
+
score.append(score_track)
|
| 387 |
+
_clean_up_warnings()
|
| 388 |
+
return score
|
| 389 |
+
|
| 390 |
+
def midi2score(midi=b''):
|
| 391 |
+
r'''
|
| 392 |
+
Translates MIDI into a "score", using midi2opus() then opus2score()
|
| 393 |
+
'''
|
| 394 |
+
return opus2score(midi2opus(midi))
|
| 395 |
+
|
| 396 |
+
def midi2ms_score(midi=b''):
|
| 397 |
+
r'''
|
| 398 |
+
Translates MIDI into a "score" with one beat per second and one
|
| 399 |
+
tick per millisecond, using midi2opus() then to_millisecs()
|
| 400 |
+
then opus2score()
|
| 401 |
+
'''
|
| 402 |
+
return opus2score(to_millisecs(midi2opus(midi)))
|
| 403 |
+
|
| 404 |
+
#------------------------ Other Transformations ---------------------
|
| 405 |
+
|
| 406 |
+
def to_millisecs(old_opus=None):
|
| 407 |
+
r'''Recallibrates all the times in an "opus" to use one beat
|
| 408 |
+
per second and one tick per millisecond. This makes it
|
| 409 |
+
hard to retrieve any information about beats or barlines,
|
| 410 |
+
but it does make it easy to mix different scores together.
|
| 411 |
+
'''
|
| 412 |
+
if old_opus == None:
|
| 413 |
+
return [1000,[],]
|
| 414 |
+
try:
|
| 415 |
+
old_tpq = int(old_opus[0])
|
| 416 |
+
except IndexError: # 5.0
|
| 417 |
+
_warn('to_millisecs: the opus '+str(type(old_opus))+' has no elements')
|
| 418 |
+
return [1000,[],]
|
| 419 |
+
new_opus = [1000,]
|
| 420 |
+
# 6.7 first go through building a table of set_tempos by absolute-tick
|
| 421 |
+
ticks2tempo = {}
|
| 422 |
+
itrack = 1
|
| 423 |
+
while itrack < len(old_opus):
|
| 424 |
+
ticks_so_far = 0
|
| 425 |
+
for old_event in old_opus[itrack]:
|
| 426 |
+
if old_event[0] == 'note':
|
| 427 |
+
raise TypeError('to_millisecs needs an opus, not a score')
|
| 428 |
+
ticks_so_far += old_event[1]
|
| 429 |
+
if old_event[0] == 'set_tempo':
|
| 430 |
+
ticks2tempo[ticks_so_far] = old_event[2]
|
| 431 |
+
itrack += 1
|
| 432 |
+
# then get the sorted-array of their keys
|
| 433 |
+
tempo_ticks = [] # list of keys
|
| 434 |
+
for k in ticks2tempo.keys():
|
| 435 |
+
tempo_ticks.append(k)
|
| 436 |
+
tempo_ticks.sort()
|
| 437 |
+
# then go through converting to millisec, testing if the next
|
| 438 |
+
# set_tempo lies before the next track-event, and using it if so.
|
| 439 |
+
itrack = 1
|
| 440 |
+
while itrack < len(old_opus):
|
| 441 |
+
ms_per_old_tick = 500.0 / old_tpq # float: will round later 6.3
|
| 442 |
+
i_tempo_ticks = 0
|
| 443 |
+
ticks_so_far = 0
|
| 444 |
+
ms_so_far = 0.0
|
| 445 |
+
previous_ms_so_far = 0.0
|
| 446 |
+
new_track = [['set_tempo',0,1000000],] # new "crochet" is 1 sec
|
| 447 |
+
for old_event in old_opus[itrack]:
|
| 448 |
+
# detect if ticks2tempo has something before this event
|
| 449 |
+
# 20160702 if ticks2tempo is at the same time, leave it
|
| 450 |
+
event_delta_ticks = old_event[1]
|
| 451 |
+
if (i_tempo_ticks < len(tempo_ticks) and
|
| 452 |
+
tempo_ticks[i_tempo_ticks] < (ticks_so_far + old_event[1])):
|
| 453 |
+
delta_ticks = tempo_ticks[i_tempo_ticks] - ticks_so_far
|
| 454 |
+
ms_so_far += (ms_per_old_tick * delta_ticks)
|
| 455 |
+
ticks_so_far = tempo_ticks[i_tempo_ticks]
|
| 456 |
+
ms_per_old_tick = ticks2tempo[ticks_so_far] / (1000.0*old_tpq)
|
| 457 |
+
i_tempo_ticks += 1
|
| 458 |
+
event_delta_ticks -= delta_ticks
|
| 459 |
+
new_event = copy.deepcopy(old_event) # now handle the new event
|
| 460 |
+
ms_so_far += (ms_per_old_tick * old_event[1])
|
| 461 |
+
new_event[1] = round(ms_so_far - previous_ms_so_far)
|
| 462 |
+
if old_event[0] != 'set_tempo':
|
| 463 |
+
previous_ms_so_far = ms_so_far
|
| 464 |
+
new_track.append(new_event)
|
| 465 |
+
ticks_so_far += event_delta_ticks
|
| 466 |
+
new_opus.append(new_track)
|
| 467 |
+
itrack += 1
|
| 468 |
+
_clean_up_warnings()
|
| 469 |
+
return new_opus
|
| 470 |
+
|
| 471 |
+
def event2alsaseq(event=None): # 5.5
|
| 472 |
+
r'''Converts an event into the format needed by the alsaseq module,
|
| 473 |
+
http://pp.com.mx/python/alsaseq
|
| 474 |
+
The type of track (opus or score) is autodetected.
|
| 475 |
+
'''
|
| 476 |
+
pass
|
| 477 |
+
|
| 478 |
+
def grep(score=None, channels=None):
|
| 479 |
+
r'''Returns a "score" containing only the channels specified
|
| 480 |
+
'''
|
| 481 |
+
if score == None:
|
| 482 |
+
return [1000,[],]
|
| 483 |
+
ticks = score[0]
|
| 484 |
+
new_score = [ticks,]
|
| 485 |
+
if channels == None:
|
| 486 |
+
return new_score
|
| 487 |
+
channels = set(channels)
|
| 488 |
+
global Event2channelindex
|
| 489 |
+
itrack = 1
|
| 490 |
+
while itrack < len(score):
|
| 491 |
+
new_score.append([])
|
| 492 |
+
for event in score[itrack]:
|
| 493 |
+
channel_index = Event2channelindex.get(event[0], False)
|
| 494 |
+
if channel_index:
|
| 495 |
+
if event[channel_index] in channels:
|
| 496 |
+
new_score[itrack].append(event)
|
| 497 |
+
else:
|
| 498 |
+
new_score[itrack].append(event)
|
| 499 |
+
itrack += 1
|
| 500 |
+
return new_score
|
| 501 |
+
|
| 502 |
+
def play_score(score=None):
|
| 503 |
+
r'''Converts the "score" to midi, and feeds it into 'aplaymidi -'
|
| 504 |
+
'''
|
| 505 |
+
if score == None:
|
| 506 |
+
return
|
| 507 |
+
import subprocess
|
| 508 |
+
pipe = subprocess.Popen(['aplaymidi','-'], stdin=subprocess.PIPE)
|
| 509 |
+
if score_type(score) == 'opus':
|
| 510 |
+
pipe.stdin.write(opus2midi(score))
|
| 511 |
+
else:
|
| 512 |
+
pipe.stdin.write(score2midi(score))
|
| 513 |
+
pipe.stdin.close()
|
| 514 |
+
|
| 515 |
+
def timeshift(score=None, shift=None, start_time=None, from_time=0, tracks={0,1,2,3,4,5,6,7,8,10,12,13,14,15}):
|
| 516 |
+
r'''Returns a "score" shifted in time by "shift" ticks, or shifted
|
| 517 |
+
so that the first event starts at "start_time" ticks.
|
| 518 |
+
|
| 519 |
+
If "from_time" is specified, only those events in the score
|
| 520 |
+
that begin after it are shifted. If "start_time" is less than
|
| 521 |
+
"from_time" (or "shift" is negative), then the intermediate
|
| 522 |
+
notes are deleted, though patch-change events are preserved.
|
| 523 |
+
|
| 524 |
+
If "tracks" are specified, then only those tracks get shifted.
|
| 525 |
+
"tracks" can be a list, tuple or set; it gets converted to set
|
| 526 |
+
internally.
|
| 527 |
+
|
| 528 |
+
It is deprecated to specify both "shift" and "start_time".
|
| 529 |
+
If this does happen, timeshift() will print a warning to
|
| 530 |
+
stderr and ignore the "shift" argument.
|
| 531 |
+
|
| 532 |
+
If "shift" is negative and sufficiently large that it would
|
| 533 |
+
leave some event with a negative tick-value, then the score
|
| 534 |
+
is shifted so that the first event occurs at time 0. This
|
| 535 |
+
also occurs if "start_time" is negative, and is also the
|
| 536 |
+
default if neither "shift" nor "start_time" are specified.
|
| 537 |
+
'''
|
| 538 |
+
#_warn('tracks='+str(tracks))
|
| 539 |
+
if score == None or len(score) < 2:
|
| 540 |
+
return [1000, [],]
|
| 541 |
+
new_score = [score[0],]
|
| 542 |
+
my_type = score_type(score)
|
| 543 |
+
if my_type == '':
|
| 544 |
+
return new_score
|
| 545 |
+
if my_type == 'opus':
|
| 546 |
+
_warn("timeshift: opus format is not supported\n")
|
| 547 |
+
# _clean_up_scores() 6.2; doesn't exist! what was it supposed to do?
|
| 548 |
+
return new_score
|
| 549 |
+
if not (shift == None) and not (start_time == None):
|
| 550 |
+
_warn("timeshift: shift and start_time specified: ignoring shift\n")
|
| 551 |
+
shift = None
|
| 552 |
+
if shift == None:
|
| 553 |
+
if (start_time == None) or (start_time < 0):
|
| 554 |
+
start_time = 0
|
| 555 |
+
# shift = start_time - from_time
|
| 556 |
+
|
| 557 |
+
i = 1 # ignore first element (ticks)
|
| 558 |
+
tracks = set(tracks) # defend against tuples and lists
|
| 559 |
+
earliest = 1000000000
|
| 560 |
+
if not (start_time == None) or shift < 0: # first find the earliest event
|
| 561 |
+
while i < len(score):
|
| 562 |
+
if len(tracks) and not ((i-1) in tracks):
|
| 563 |
+
i += 1
|
| 564 |
+
continue
|
| 565 |
+
for event in score[i]:
|
| 566 |
+
if event[1] < from_time:
|
| 567 |
+
continue # just inspect the to_be_shifted events
|
| 568 |
+
if event[1] < earliest:
|
| 569 |
+
earliest = event[1]
|
| 570 |
+
i += 1
|
| 571 |
+
if earliest > 999999999:
|
| 572 |
+
earliest = 0
|
| 573 |
+
if shift == None:
|
| 574 |
+
shift = start_time - earliest
|
| 575 |
+
elif (earliest + shift) < 0:
|
| 576 |
+
start_time = 0
|
| 577 |
+
shift = 0 - earliest
|
| 578 |
+
|
| 579 |
+
i = 1 # ignore first element (ticks)
|
| 580 |
+
while i < len(score):
|
| 581 |
+
if len(tracks) == 0 or not ((i-1) in tracks): # 3.8
|
| 582 |
+
new_score.append(score[i])
|
| 583 |
+
i += 1
|
| 584 |
+
continue
|
| 585 |
+
new_track = []
|
| 586 |
+
for event in score[i]:
|
| 587 |
+
new_event = list(event)
|
| 588 |
+
#if new_event[1] == 0 and shift > 0 and new_event[0] != 'note':
|
| 589 |
+
# pass
|
| 590 |
+
#elif new_event[1] >= from_time:
|
| 591 |
+
if new_event[1] >= from_time:
|
| 592 |
+
# 4.1 must not rightshift set_tempo
|
| 593 |
+
if new_event[0] != 'set_tempo' or shift<0:
|
| 594 |
+
new_event[1] += shift
|
| 595 |
+
elif (shift < 0) and (new_event[1] >= (from_time+shift)):
|
| 596 |
+
continue
|
| 597 |
+
new_track.append(new_event)
|
| 598 |
+
if len(new_track) > 0:
|
| 599 |
+
new_score.append(new_track)
|
| 600 |
+
i += 1
|
| 601 |
+
_clean_up_warnings()
|
| 602 |
+
return new_score
|
| 603 |
+
|
| 604 |
+
def segment(score=None, start_time=None, end_time=None, start=0, end=100000000,
|
| 605 |
+
tracks={0,1,2,3,4,5,6,7,8,10,11,12,13,14,15}):
|
| 606 |
+
r'''Returns a "score" which is a segment of the one supplied
|
| 607 |
+
as the argument, beginning at "start_time" ticks and ending
|
| 608 |
+
at "end_time" ticks (or at the end if "end_time" is not supplied).
|
| 609 |
+
If the set "tracks" is specified, only those tracks will
|
| 610 |
+
be returned.
|
| 611 |
+
'''
|
| 612 |
+
if score == None or len(score) < 2:
|
| 613 |
+
return [1000, [],]
|
| 614 |
+
if start_time == None: # as of 4.2 start_time is recommended
|
| 615 |
+
start_time = start # start is legacy usage
|
| 616 |
+
if end_time == None: # likewise
|
| 617 |
+
end_time = end
|
| 618 |
+
new_score = [score[0],]
|
| 619 |
+
my_type = score_type(score)
|
| 620 |
+
if my_type == '':
|
| 621 |
+
return new_score
|
| 622 |
+
if my_type == 'opus':
|
| 623 |
+
# more difficult (disconnecting note_on's from their note_off's)...
|
| 624 |
+
_warn("segment: opus format is not supported\n")
|
| 625 |
+
_clean_up_warnings()
|
| 626 |
+
return new_score
|
| 627 |
+
i = 1 # ignore first element (ticks); we count in ticks anyway
|
| 628 |
+
tracks = set(tracks) # defend against tuples and lists
|
| 629 |
+
while i < len(score):
|
| 630 |
+
if len(tracks) and not ((i-1) in tracks):
|
| 631 |
+
i += 1
|
| 632 |
+
continue
|
| 633 |
+
new_track = []
|
| 634 |
+
channel2cc_num = {} # most recent controller change before start
|
| 635 |
+
channel2cc_val = {}
|
| 636 |
+
channel2cc_time = {}
|
| 637 |
+
channel2patch_num = {} # keep most recent patch change before start
|
| 638 |
+
channel2patch_time = {}
|
| 639 |
+
set_tempo_num = 500000 # most recent tempo change before start 6.3
|
| 640 |
+
set_tempo_time = 0
|
| 641 |
+
earliest_note_time = end_time
|
| 642 |
+
for event in score[i]:
|
| 643 |
+
if event[0] == 'control_change': # 6.5
|
| 644 |
+
cc_time = channel2cc_time.get(event[2]) or 0
|
| 645 |
+
if (event[1] <= start_time) and (event[1] >= cc_time):
|
| 646 |
+
channel2cc_num[event[2]] = event[3]
|
| 647 |
+
channel2cc_val[event[2]] = event[4]
|
| 648 |
+
channel2cc_time[event[2]] = event[1]
|
| 649 |
+
elif event[0] == 'patch_change':
|
| 650 |
+
patch_time = channel2patch_time.get(event[2]) or 0
|
| 651 |
+
if (event[1]<=start_time) and (event[1] >= patch_time): # 2.0
|
| 652 |
+
channel2patch_num[event[2]] = event[3]
|
| 653 |
+
channel2patch_time[event[2]] = event[1]
|
| 654 |
+
elif event[0] == 'set_tempo':
|
| 655 |
+
if (event[1]<=start_time) and (event[1]>=set_tempo_time): #6.4
|
| 656 |
+
set_tempo_num = event[2]
|
| 657 |
+
set_tempo_time = event[1]
|
| 658 |
+
if (event[1] >= start_time) and (event[1] <= end_time):
|
| 659 |
+
new_track.append(event)
|
| 660 |
+
if (event[0] == 'note') and (event[1] < earliest_note_time):
|
| 661 |
+
earliest_note_time = event[1]
|
| 662 |
+
if len(new_track) > 0:
|
| 663 |
+
new_track.append(['set_tempo', start_time, set_tempo_num])
|
| 664 |
+
for c in channel2patch_num:
|
| 665 |
+
new_track.append(['patch_change',start_time,c,channel2patch_num[c]],)
|
| 666 |
+
for c in channel2cc_num: # 6.5
|
| 667 |
+
new_track.append(['control_change',start_time,c,channel2cc_num[c],channel2cc_val[c]])
|
| 668 |
+
new_score.append(new_track)
|
| 669 |
+
i += 1
|
| 670 |
+
_clean_up_warnings()
|
| 671 |
+
return new_score
|
| 672 |
+
|
| 673 |
+
def score_type(opus_or_score=None):
|
| 674 |
+
r'''Returns a string, either 'opus' or 'score' or ''
|
| 675 |
+
'''
|
| 676 |
+
if opus_or_score == None or str(type(opus_or_score)).find('list')<0 or len(opus_or_score) < 2:
|
| 677 |
+
return ''
|
| 678 |
+
i = 1 # ignore first element
|
| 679 |
+
while i < len(opus_or_score):
|
| 680 |
+
for event in opus_or_score[i]:
|
| 681 |
+
if event[0] == 'note':
|
| 682 |
+
return 'score'
|
| 683 |
+
elif event[0] == 'note_on':
|
| 684 |
+
return 'opus'
|
| 685 |
+
i += 1
|
| 686 |
+
return ''
|
| 687 |
+
|
| 688 |
+
def concatenate_scores(scores):
|
| 689 |
+
r'''Concatenates a list of scores into one score.
|
| 690 |
+
If the scores differ in their "ticks" parameter,
|
| 691 |
+
they will all get converted to millisecond-tick format.
|
| 692 |
+
'''
|
| 693 |
+
# the deepcopys are needed if the input_score's are refs to the same obj
|
| 694 |
+
# e.g. if invoked by midisox's repeat()
|
| 695 |
+
input_scores = _consistentise_ticks(scores) # 3.7
|
| 696 |
+
output_score = copy.deepcopy(input_scores[0])
|
| 697 |
+
for input_score in input_scores[1:]:
|
| 698 |
+
output_stats = score2stats(output_score)
|
| 699 |
+
delta_ticks = output_stats['nticks']
|
| 700 |
+
itrack = 1
|
| 701 |
+
while itrack < len(input_score):
|
| 702 |
+
if itrack >= len(output_score): # new output track if doesn't exist
|
| 703 |
+
output_score.append([])
|
| 704 |
+
for event in input_score[itrack]:
|
| 705 |
+
output_score[itrack].append(copy.deepcopy(event))
|
| 706 |
+
output_score[itrack][-1][1] += delta_ticks
|
| 707 |
+
itrack += 1
|
| 708 |
+
return output_score
|
| 709 |
+
|
| 710 |
+
def merge_scores(scores):
|
| 711 |
+
r'''Merges a list of scores into one score. A merged score comprises
|
| 712 |
+
all of the tracks from all of the input scores; un-merging is possible
|
| 713 |
+
by selecting just some of the tracks. If the scores differ in their
|
| 714 |
+
"ticks" parameter, they will all get converted to millisecond-tick
|
| 715 |
+
format. merge_scores attempts to resolve channel-conflicts,
|
| 716 |
+
but there are of course only 15 available channels...
|
| 717 |
+
'''
|
| 718 |
+
input_scores = _consistentise_ticks(scores) # 3.6
|
| 719 |
+
output_score = [1000]
|
| 720 |
+
channels_so_far = set()
|
| 721 |
+
all_channels = {0,1,2,3,4,5,6,7,8,10,11,12,13,14,15}
|
| 722 |
+
global Event2channelindex
|
| 723 |
+
for input_score in input_scores:
|
| 724 |
+
new_channels = set(score2stats(input_score).get('channels_total', []))
|
| 725 |
+
new_channels.discard(9) # 2.8 cha9 must remain cha9 (in GM)
|
| 726 |
+
for channel in channels_so_far & new_channels:
|
| 727 |
+
# consistently choose lowest avaiable, to ease testing
|
| 728 |
+
free_channels = list(all_channels - (channels_so_far|new_channels))
|
| 729 |
+
if len(free_channels) > 0:
|
| 730 |
+
free_channels.sort()
|
| 731 |
+
free_channel = free_channels[0]
|
| 732 |
+
else:
|
| 733 |
+
free_channel = None
|
| 734 |
+
break
|
| 735 |
+
itrack = 1
|
| 736 |
+
while itrack < len(input_score):
|
| 737 |
+
for input_event in input_score[itrack]:
|
| 738 |
+
channel_index=Event2channelindex.get(input_event[0],False)
|
| 739 |
+
if channel_index and input_event[channel_index]==channel:
|
| 740 |
+
input_event[channel_index] = free_channel
|
| 741 |
+
itrack += 1
|
| 742 |
+
channels_so_far.add(free_channel)
|
| 743 |
+
|
| 744 |
+
channels_so_far |= new_channels
|
| 745 |
+
output_score.extend(input_score[1:])
|
| 746 |
+
return output_score
|
| 747 |
+
|
| 748 |
+
def _ticks(event):
|
| 749 |
+
return event[1]
|
| 750 |
+
def mix_opus_tracks(input_tracks): # 5.5
|
| 751 |
+
r'''Mixes an array of tracks into one track. A mixed track
|
| 752 |
+
cannot be un-mixed. It is assumed that the tracks share the same
|
| 753 |
+
ticks parameter and the same tempo.
|
| 754 |
+
Mixing score-tracks is trivial (just insert all events into one array).
|
| 755 |
+
Mixing opus-tracks is only slightly harder, but it's common enough
|
| 756 |
+
that a dedicated function is useful.
|
| 757 |
+
'''
|
| 758 |
+
output_score = [1000, []]
|
| 759 |
+
for input_track in input_tracks: # 5.8
|
| 760 |
+
input_score = opus2score([1000, input_track])
|
| 761 |
+
for event in input_score[1]:
|
| 762 |
+
output_score[1].append(event)
|
| 763 |
+
output_score[1].sort(key=_ticks)
|
| 764 |
+
output_opus = score2opus(output_score)
|
| 765 |
+
return output_opus[1]
|
| 766 |
+
|
| 767 |
+
def mix_scores(scores):
|
| 768 |
+
r'''Mixes a list of scores into one one-track score.
|
| 769 |
+
A mixed score cannot be un-mixed. Hopefully the scores
|
| 770 |
+
have no undesirable channel-conflicts between them.
|
| 771 |
+
If the scores differ in their "ticks" parameter,
|
| 772 |
+
they will all get converted to millisecond-tick format.
|
| 773 |
+
'''
|
| 774 |
+
input_scores = _consistentise_ticks(scores) # 3.6
|
| 775 |
+
output_score = [1000, []]
|
| 776 |
+
for input_score in input_scores:
|
| 777 |
+
for input_track in input_score[1:]:
|
| 778 |
+
output_score[1].extend(input_track)
|
| 779 |
+
return output_score
|
| 780 |
+
|
| 781 |
+
def score2stats(opus_or_score=None):
|
| 782 |
+
r'''Returns a dict of some basic stats about the score, like
|
| 783 |
+
bank_select (list of tuples (msb,lsb)),
|
| 784 |
+
channels_by_track (list of lists), channels_total (set),
|
| 785 |
+
general_midi_mode (list),
|
| 786 |
+
ntracks, nticks, patch_changes_by_track (list of dicts),
|
| 787 |
+
num_notes_by_channel (list of numbers),
|
| 788 |
+
patch_changes_total (set),
|
| 789 |
+
percussion (dict histogram of channel 9 events),
|
| 790 |
+
pitches (dict histogram of pitches on channels other than 9),
|
| 791 |
+
pitch_range_by_track (list, by track, of two-member-tuples),
|
| 792 |
+
pitch_range_sum (sum over tracks of the pitch_ranges),
|
| 793 |
+
'''
|
| 794 |
+
bank_select_msb = -1
|
| 795 |
+
bank_select_lsb = -1
|
| 796 |
+
bank_select = []
|
| 797 |
+
channels_by_track = []
|
| 798 |
+
channels_total = set([])
|
| 799 |
+
general_midi_mode = []
|
| 800 |
+
num_notes_by_channel = dict([])
|
| 801 |
+
patches_used_by_track = []
|
| 802 |
+
patches_used_total = set([])
|
| 803 |
+
patch_changes_by_track = []
|
| 804 |
+
patch_changes_total = set([])
|
| 805 |
+
percussion = dict([]) # histogram of channel 9 "pitches"
|
| 806 |
+
pitches = dict([]) # histogram of pitch-occurrences channels 0-8,10-15
|
| 807 |
+
pitch_range_sum = 0 # u pitch-ranges of each track
|
| 808 |
+
pitch_range_by_track = []
|
| 809 |
+
is_a_score = True
|
| 810 |
+
if opus_or_score == None:
|
| 811 |
+
return {'bank_select':[], 'channels_by_track':[], 'channels_total':[],
|
| 812 |
+
'general_midi_mode':[], 'ntracks':0, 'nticks':0,
|
| 813 |
+
'num_notes_by_channel':dict([]),
|
| 814 |
+
'patch_changes_by_track':[], 'patch_changes_total':[],
|
| 815 |
+
'percussion':{}, 'pitches':{}, 'pitch_range_by_track':[],
|
| 816 |
+
'ticks_per_quarter':0, 'pitch_range_sum':0}
|
| 817 |
+
ticks_per_quarter = opus_or_score[0]
|
| 818 |
+
i = 1 # ignore first element, which is ticks
|
| 819 |
+
nticks = 0
|
| 820 |
+
while i < len(opus_or_score):
|
| 821 |
+
highest_pitch = 0
|
| 822 |
+
lowest_pitch = 128
|
| 823 |
+
channels_this_track = set([])
|
| 824 |
+
patch_changes_this_track = dict({})
|
| 825 |
+
for event in opus_or_score[i]:
|
| 826 |
+
if event[0] == 'note':
|
| 827 |
+
num_notes_by_channel[event[3]] = num_notes_by_channel.get(event[3],0) + 1
|
| 828 |
+
if event[3] == 9:
|
| 829 |
+
percussion[event[4]] = percussion.get(event[4],0) + 1
|
| 830 |
+
else:
|
| 831 |
+
pitches[event[4]] = pitches.get(event[4],0) + 1
|
| 832 |
+
if event[4] > highest_pitch:
|
| 833 |
+
highest_pitch = event[4]
|
| 834 |
+
if event[4] < lowest_pitch:
|
| 835 |
+
lowest_pitch = event[4]
|
| 836 |
+
channels_this_track.add(event[3])
|
| 837 |
+
channels_total.add(event[3])
|
| 838 |
+
finish_time = event[1] + event[2]
|
| 839 |
+
if finish_time > nticks:
|
| 840 |
+
nticks = finish_time
|
| 841 |
+
elif event[0] == 'note_off' or (event[0] == 'note_on' and event[4] == 0): # 4.8
|
| 842 |
+
finish_time = event[1]
|
| 843 |
+
if finish_time > nticks:
|
| 844 |
+
nticks = finish_time
|
| 845 |
+
elif event[0] == 'note_on':
|
| 846 |
+
is_a_score = False
|
| 847 |
+
num_notes_by_channel[event[2]] = num_notes_by_channel.get(event[2],0) + 1
|
| 848 |
+
if event[2] == 9:
|
| 849 |
+
percussion[event[3]] = percussion.get(event[3],0) + 1
|
| 850 |
+
else:
|
| 851 |
+
pitches[event[3]] = pitches.get(event[3],0) + 1
|
| 852 |
+
if event[3] > highest_pitch:
|
| 853 |
+
highest_pitch = event[3]
|
| 854 |
+
if event[3] < lowest_pitch:
|
| 855 |
+
lowest_pitch = event[3]
|
| 856 |
+
channels_this_track.add(event[2])
|
| 857 |
+
channels_total.add(event[2])
|
| 858 |
+
elif event[0] == 'patch_change':
|
| 859 |
+
patch_changes_this_track[event[2]] = event[3]
|
| 860 |
+
patch_changes_total.add(event[3])
|
| 861 |
+
elif event[0] == 'control_change':
|
| 862 |
+
if event[3] == 0: # bank select MSB
|
| 863 |
+
bank_select_msb = event[4]
|
| 864 |
+
elif event[3] == 32: # bank select LSB
|
| 865 |
+
bank_select_lsb = event[4]
|
| 866 |
+
if bank_select_msb >= 0 and bank_select_lsb >= 0:
|
| 867 |
+
bank_select.append((bank_select_msb,bank_select_lsb))
|
| 868 |
+
bank_select_msb = -1
|
| 869 |
+
bank_select_lsb = -1
|
| 870 |
+
elif event[0] == 'sysex_f0':
|
| 871 |
+
if _sysex2midimode.get(event[2], -1) >= 0:
|
| 872 |
+
general_midi_mode.append(_sysex2midimode.get(event[2]))
|
| 873 |
+
if is_a_score:
|
| 874 |
+
if event[1] > nticks:
|
| 875 |
+
nticks = event[1]
|
| 876 |
+
else:
|
| 877 |
+
nticks += event[1]
|
| 878 |
+
if lowest_pitch == 128:
|
| 879 |
+
lowest_pitch = 0
|
| 880 |
+
channels_by_track.append(channels_this_track)
|
| 881 |
+
patch_changes_by_track.append(patch_changes_this_track)
|
| 882 |
+
pitch_range_by_track.append((lowest_pitch,highest_pitch))
|
| 883 |
+
pitch_range_sum += (highest_pitch-lowest_pitch)
|
| 884 |
+
i += 1
|
| 885 |
+
|
| 886 |
+
return {'bank_select':bank_select,
|
| 887 |
+
'channels_by_track':channels_by_track,
|
| 888 |
+
'channels_total':channels_total,
|
| 889 |
+
'general_midi_mode':general_midi_mode,
|
| 890 |
+
'ntracks':len(opus_or_score)-1,
|
| 891 |
+
'nticks':nticks,
|
| 892 |
+
'num_notes_by_channel':num_notes_by_channel,
|
| 893 |
+
'patch_changes_by_track':patch_changes_by_track,
|
| 894 |
+
'patch_changes_total':patch_changes_total,
|
| 895 |
+
'percussion':percussion,
|
| 896 |
+
'pitches':pitches,
|
| 897 |
+
'pitch_range_by_track':pitch_range_by_track,
|
| 898 |
+
'pitch_range_sum':pitch_range_sum,
|
| 899 |
+
'ticks_per_quarter':ticks_per_quarter}
|
| 900 |
+
|
| 901 |
+
#----------------------------- Event stuff --------------------------
|
| 902 |
+
|
| 903 |
+
_sysex2midimode = {
|
| 904 |
+
"\x7E\x7F\x09\x01\xF7": 1,
|
| 905 |
+
"\x7E\x7F\x09\x02\xF7": 0,
|
| 906 |
+
"\x7E\x7F\x09\x03\xF7": 2,
|
| 907 |
+
}
|
| 908 |
+
|
| 909 |
+
# Some public-access tuples:
|
| 910 |
+
MIDI_events = tuple('''note_off note_on key_after_touch
|
| 911 |
+
control_change patch_change channel_after_touch
|
| 912 |
+
pitch_wheel_change'''.split())
|
| 913 |
+
|
| 914 |
+
Text_events = tuple('''text_event copyright_text_event
|
| 915 |
+
track_name instrument_name lyric marker cue_point text_event_08
|
| 916 |
+
text_event_09 text_event_0a text_event_0b text_event_0c
|
| 917 |
+
text_event_0d text_event_0e text_event_0f'''.split())
|
| 918 |
+
|
| 919 |
+
Nontext_meta_events = tuple('''end_track set_tempo
|
| 920 |
+
smpte_offset time_signature key_signature sequencer_specific
|
| 921 |
+
raw_meta_event sysex_f0 sysex_f7 song_position song_select
|
| 922 |
+
tune_request'''.split())
|
| 923 |
+
# unsupported: raw_data
|
| 924 |
+
|
| 925 |
+
# Actually, 'tune_request' is is F-series event, not strictly a meta-event...
|
| 926 |
+
Meta_events = Text_events + Nontext_meta_events
|
| 927 |
+
All_events = MIDI_events + Meta_events
|
| 928 |
+
|
| 929 |
+
# And three dictionaries:
|
| 930 |
+
Number2patch = { # General MIDI patch numbers:
|
| 931 |
+
0:'Acoustic Grand',
|
| 932 |
+
1:'Bright Acoustic',
|
| 933 |
+
2:'Electric Grand',
|
| 934 |
+
3:'Honky-Tonk',
|
| 935 |
+
4:'Electric Piano 1',
|
| 936 |
+
5:'Electric Piano 2',
|
| 937 |
+
6:'Harpsichord',
|
| 938 |
+
7:'Clav',
|
| 939 |
+
8:'Celesta',
|
| 940 |
+
9:'Glockenspiel',
|
| 941 |
+
10:'Music Box',
|
| 942 |
+
11:'Vibraphone',
|
| 943 |
+
12:'Marimba',
|
| 944 |
+
13:'Xylophone',
|
| 945 |
+
14:'Tubular Bells',
|
| 946 |
+
15:'Dulcimer',
|
| 947 |
+
16:'Drawbar Organ',
|
| 948 |
+
17:'Percussive Organ',
|
| 949 |
+
18:'Rock Organ',
|
| 950 |
+
19:'Church Organ',
|
| 951 |
+
20:'Reed Organ',
|
| 952 |
+
21:'Accordion',
|
| 953 |
+
22:'Harmonica',
|
| 954 |
+
23:'Tango Accordion',
|
| 955 |
+
24:'Acoustic Guitar(nylon)',
|
| 956 |
+
25:'Acoustic Guitar(steel)',
|
| 957 |
+
26:'Electric Guitar(jazz)',
|
| 958 |
+
27:'Electric Guitar(clean)',
|
| 959 |
+
28:'Electric Guitar(muted)',
|
| 960 |
+
29:'Overdriven Guitar',
|
| 961 |
+
30:'Distortion Guitar',
|
| 962 |
+
31:'Guitar Harmonics',
|
| 963 |
+
32:'Acoustic Bass',
|
| 964 |
+
33:'Electric Bass(finger)',
|
| 965 |
+
34:'Electric Bass(pick)',
|
| 966 |
+
35:'Fretless Bass',
|
| 967 |
+
36:'Slap Bass 1',
|
| 968 |
+
37:'Slap Bass 2',
|
| 969 |
+
38:'Synth Bass 1',
|
| 970 |
+
39:'Synth Bass 2',
|
| 971 |
+
40:'Violin',
|
| 972 |
+
41:'Viola',
|
| 973 |
+
42:'Cello',
|
| 974 |
+
43:'Contrabass',
|
| 975 |
+
44:'Tremolo Strings',
|
| 976 |
+
45:'Pizzicato Strings',
|
| 977 |
+
46:'Orchestral Harp',
|
| 978 |
+
47:'Timpani',
|
| 979 |
+
48:'String Ensemble 1',
|
| 980 |
+
49:'String Ensemble 2',
|
| 981 |
+
50:'SynthStrings 1',
|
| 982 |
+
51:'SynthStrings 2',
|
| 983 |
+
52:'Choir Aahs',
|
| 984 |
+
53:'Voice Oohs',
|
| 985 |
+
54:'Synth Voice',
|
| 986 |
+
55:'Orchestra Hit',
|
| 987 |
+
56:'Trumpet',
|
| 988 |
+
57:'Trombone',
|
| 989 |
+
58:'Tuba',
|
| 990 |
+
59:'Muted Trumpet',
|
| 991 |
+
60:'French Horn',
|
| 992 |
+
61:'Brass Section',
|
| 993 |
+
62:'SynthBrass 1',
|
| 994 |
+
63:'SynthBrass 2',
|
| 995 |
+
64:'Soprano Sax',
|
| 996 |
+
65:'Alto Sax',
|
| 997 |
+
66:'Tenor Sax',
|
| 998 |
+
67:'Baritone Sax',
|
| 999 |
+
68:'Oboe',
|
| 1000 |
+
69:'English Horn',
|
| 1001 |
+
70:'Bassoon',
|
| 1002 |
+
71:'Clarinet',
|
| 1003 |
+
72:'Piccolo',
|
| 1004 |
+
73:'Flute',
|
| 1005 |
+
74:'Recorder',
|
| 1006 |
+
75:'Pan Flute',
|
| 1007 |
+
76:'Blown Bottle',
|
| 1008 |
+
77:'Skakuhachi',
|
| 1009 |
+
78:'Whistle',
|
| 1010 |
+
79:'Ocarina',
|
| 1011 |
+
80:'Lead 1 (square)',
|
| 1012 |
+
81:'Lead 2 (sawtooth)',
|
| 1013 |
+
82:'Lead 3 (calliope)',
|
| 1014 |
+
83:'Lead 4 (chiff)',
|
| 1015 |
+
84:'Lead 5 (charang)',
|
| 1016 |
+
85:'Lead 6 (voice)',
|
| 1017 |
+
86:'Lead 7 (fifths)',
|
| 1018 |
+
87:'Lead 8 (bass+lead)',
|
| 1019 |
+
88:'Pad 1 (new age)',
|
| 1020 |
+
89:'Pad 2 (warm)',
|
| 1021 |
+
90:'Pad 3 (polysynth)',
|
| 1022 |
+
91:'Pad 4 (choir)',
|
| 1023 |
+
92:'Pad 5 (bowed)',
|
| 1024 |
+
93:'Pad 6 (metallic)',
|
| 1025 |
+
94:'Pad 7 (halo)',
|
| 1026 |
+
95:'Pad 8 (sweep)',
|
| 1027 |
+
96:'FX 1 (rain)',
|
| 1028 |
+
97:'FX 2 (soundtrack)',
|
| 1029 |
+
98:'FX 3 (crystal)',
|
| 1030 |
+
99:'FX 4 (atmosphere)',
|
| 1031 |
+
100:'FX 5 (brightness)',
|
| 1032 |
+
101:'FX 6 (goblins)',
|
| 1033 |
+
102:'FX 7 (echoes)',
|
| 1034 |
+
103:'FX 8 (sci-fi)',
|
| 1035 |
+
104:'Sitar',
|
| 1036 |
+
105:'Banjo',
|
| 1037 |
+
106:'Shamisen',
|
| 1038 |
+
107:'Koto',
|
| 1039 |
+
108:'Kalimba',
|
| 1040 |
+
109:'Bagpipe',
|
| 1041 |
+
110:'Fiddle',
|
| 1042 |
+
111:'Shanai',
|
| 1043 |
+
112:'Tinkle Bell',
|
| 1044 |
+
113:'Agogo',
|
| 1045 |
+
114:'Steel Drums',
|
| 1046 |
+
115:'Woodblock',
|
| 1047 |
+
116:'Taiko Drum',
|
| 1048 |
+
117:'Melodic Tom',
|
| 1049 |
+
118:'Synth Drum',
|
| 1050 |
+
119:'Reverse Cymbal',
|
| 1051 |
+
120:'Guitar Fret Noise',
|
| 1052 |
+
121:'Breath Noise',
|
| 1053 |
+
122:'Seashore',
|
| 1054 |
+
123:'Bird Tweet',
|
| 1055 |
+
124:'Telephone Ring',
|
| 1056 |
+
125:'Helicopter',
|
| 1057 |
+
126:'Applause',
|
| 1058 |
+
127:'Gunshot',
|
| 1059 |
+
}
|
| 1060 |
+
Notenum2percussion = { # General MIDI Percussion (on Channel 9):
|
| 1061 |
+
35:'Acoustic Bass Drum',
|
| 1062 |
+
36:'Bass Drum 1',
|
| 1063 |
+
37:'Side Stick',
|
| 1064 |
+
38:'Acoustic Snare',
|
| 1065 |
+
39:'Hand Clap',
|
| 1066 |
+
40:'Electric Snare',
|
| 1067 |
+
41:'Low Floor Tom',
|
| 1068 |
+
42:'Closed Hi-Hat',
|
| 1069 |
+
43:'High Floor Tom',
|
| 1070 |
+
44:'Pedal Hi-Hat',
|
| 1071 |
+
45:'Low Tom',
|
| 1072 |
+
46:'Open Hi-Hat',
|
| 1073 |
+
47:'Low-Mid Tom',
|
| 1074 |
+
48:'Hi-Mid Tom',
|
| 1075 |
+
49:'Crash Cymbal 1',
|
| 1076 |
+
50:'High Tom',
|
| 1077 |
+
51:'Ride Cymbal 1',
|
| 1078 |
+
52:'Chinese Cymbal',
|
| 1079 |
+
53:'Ride Bell',
|
| 1080 |
+
54:'Tambourine',
|
| 1081 |
+
55:'Splash Cymbal',
|
| 1082 |
+
56:'Cowbell',
|
| 1083 |
+
57:'Crash Cymbal 2',
|
| 1084 |
+
58:'Vibraslap',
|
| 1085 |
+
59:'Ride Cymbal 2',
|
| 1086 |
+
60:'Hi Bongo',
|
| 1087 |
+
61:'Low Bongo',
|
| 1088 |
+
62:'Mute Hi Conga',
|
| 1089 |
+
63:'Open Hi Conga',
|
| 1090 |
+
64:'Low Conga',
|
| 1091 |
+
65:'High Timbale',
|
| 1092 |
+
66:'Low Timbale',
|
| 1093 |
+
67:'High Agogo',
|
| 1094 |
+
68:'Low Agogo',
|
| 1095 |
+
69:'Cabasa',
|
| 1096 |
+
70:'Maracas',
|
| 1097 |
+
71:'Short Whistle',
|
| 1098 |
+
72:'Long Whistle',
|
| 1099 |
+
73:'Short Guiro',
|
| 1100 |
+
74:'Long Guiro',
|
| 1101 |
+
75:'Claves',
|
| 1102 |
+
76:'Hi Wood Block',
|
| 1103 |
+
77:'Low Wood Block',
|
| 1104 |
+
78:'Mute Cuica',
|
| 1105 |
+
79:'Open Cuica',
|
| 1106 |
+
80:'Mute Triangle',
|
| 1107 |
+
81:'Open Triangle',
|
| 1108 |
+
}
|
| 1109 |
+
|
| 1110 |
+
Event2channelindex = { 'note':3, 'note_off':2, 'note_on':2,
|
| 1111 |
+
'key_after_touch':2, 'control_change':2, 'patch_change':2,
|
| 1112 |
+
'channel_after_touch':2, 'pitch_wheel_change':2
|
| 1113 |
+
}
|
| 1114 |
+
|
| 1115 |
+
################################################################
|
| 1116 |
+
# The code below this line is full of frightening things, all to
|
| 1117 |
+
# do with the actual encoding and decoding of binary MIDI data.
|
| 1118 |
+
|
| 1119 |
+
def _twobytes2int(byte_a):
|
| 1120 |
+
r'''decode a 16 bit quantity from two bytes,'''
|
| 1121 |
+
return (byte_a[1] | (byte_a[0] << 8))
|
| 1122 |
+
|
| 1123 |
+
def _int2twobytes(int_16bit):
|
| 1124 |
+
r'''encode a 16 bit quantity into two bytes,'''
|
| 1125 |
+
return bytes([(int_16bit>>8) & 0xFF, int_16bit & 0xFF])
|
| 1126 |
+
|
| 1127 |
+
def _read_14_bit(byte_a):
|
| 1128 |
+
r'''decode a 14 bit quantity from two bytes,'''
|
| 1129 |
+
return (byte_a[0] | (byte_a[1] << 7))
|
| 1130 |
+
|
| 1131 |
+
def _write_14_bit(int_14bit):
|
| 1132 |
+
r'''encode a 14 bit quantity into two bytes,'''
|
| 1133 |
+
return bytes([int_14bit & 0x7F, (int_14bit>>7) & 0x7F])
|
| 1134 |
+
|
| 1135 |
+
def _ber_compressed_int(integer):
|
| 1136 |
+
r'''BER compressed integer (not an ASN.1 BER, see perlpacktut for
|
| 1137 |
+
details). Its bytes represent an unsigned integer in base 128,
|
| 1138 |
+
most significant digit first, with as few digits as possible.
|
| 1139 |
+
Bit eight (the high bit) is set on each byte except the last.
|
| 1140 |
+
'''
|
| 1141 |
+
ber = bytearray(b'')
|
| 1142 |
+
seven_bits = 0x7F & integer
|
| 1143 |
+
ber.insert(0, seven_bits) # XXX surely should convert to a char ?
|
| 1144 |
+
integer >>= 7
|
| 1145 |
+
while integer > 0:
|
| 1146 |
+
seven_bits = 0x7F & integer
|
| 1147 |
+
ber.insert(0, 0x80|seven_bits) # XXX surely should convert to a char ?
|
| 1148 |
+
integer >>= 7
|
| 1149 |
+
return ber
|
| 1150 |
+
|
| 1151 |
+
def _unshift_ber_int(ba):
|
| 1152 |
+
r'''Given a bytearray, returns a tuple of (the ber-integer at the
|
| 1153 |
+
start, and the remainder of the bytearray).
|
| 1154 |
+
'''
|
| 1155 |
+
if not len(ba): # 6.7
|
| 1156 |
+
_warn('_unshift_ber_int: no integer found')
|
| 1157 |
+
return ((0, b""))
|
| 1158 |
+
byte = ba.pop(0)
|
| 1159 |
+
integer = 0
|
| 1160 |
+
while True:
|
| 1161 |
+
integer += (byte & 0x7F)
|
| 1162 |
+
if not (byte & 0x80):
|
| 1163 |
+
return ((integer, ba))
|
| 1164 |
+
if not len(ba):
|
| 1165 |
+
_warn('_unshift_ber_int: no end-of-integer found')
|
| 1166 |
+
return ((0, ba))
|
| 1167 |
+
byte = ba.pop(0)
|
| 1168 |
+
integer <<= 7
|
| 1169 |
+
|
| 1170 |
+
def _clean_up_warnings(): # 5.4
|
| 1171 |
+
# Call this before returning from any publicly callable function
|
| 1172 |
+
# whenever there's a possibility that a warning might have been printed
|
| 1173 |
+
# by the function, or by any private functions it might have called.
|
| 1174 |
+
if _no_warning:
|
| 1175 |
+
return
|
| 1176 |
+
global _previous_times
|
| 1177 |
+
global _previous_warning
|
| 1178 |
+
if _previous_times > 1:
|
| 1179 |
+
# E:1176, 0: invalid syntax (<string>, line 1176) (syntax-error) ???
|
| 1180 |
+
# print(' previous message repeated '+str(_previous_times)+' times', file=sys.stderr)
|
| 1181 |
+
# 6.7
|
| 1182 |
+
sys.stderr.write(' previous message repeated {0} times\n'.format(_previous_times))
|
| 1183 |
+
elif _previous_times > 0:
|
| 1184 |
+
sys.stderr.write(' previous message repeated\n')
|
| 1185 |
+
_previous_times = 0
|
| 1186 |
+
_previous_warning = ''
|
| 1187 |
+
|
| 1188 |
+
def _warn(s=''):
|
| 1189 |
+
if _no_warning:
|
| 1190 |
+
return
|
| 1191 |
+
global _previous_times
|
| 1192 |
+
global _previous_warning
|
| 1193 |
+
if s == _previous_warning: # 5.4
|
| 1194 |
+
_previous_times = _previous_times + 1
|
| 1195 |
+
else:
|
| 1196 |
+
_clean_up_warnings()
|
| 1197 |
+
sys.stderr.write(str(s)+"\n")
|
| 1198 |
+
_previous_warning = s
|
| 1199 |
+
|
| 1200 |
+
def _some_text_event(which_kind=0x01, text=b'some_text'):
|
| 1201 |
+
if str(type(text)).find("'str'") >= 0: # 6.4 test for back-compatibility
|
| 1202 |
+
data = bytes(text, encoding='ISO-8859-1')
|
| 1203 |
+
else:
|
| 1204 |
+
data = bytes(text)
|
| 1205 |
+
return b'\xFF'+bytes((which_kind,))+_ber_compressed_int(len(data))+data
|
| 1206 |
+
|
| 1207 |
+
def _consistentise_ticks(scores): # 3.6
|
| 1208 |
+
# used by mix_scores, merge_scores, concatenate_scores
|
| 1209 |
+
if len(scores) == 1:
|
| 1210 |
+
return copy.deepcopy(scores)
|
| 1211 |
+
are_consistent = True
|
| 1212 |
+
ticks = scores[0][0]
|
| 1213 |
+
iscore = 1
|
| 1214 |
+
while iscore < len(scores):
|
| 1215 |
+
if scores[iscore][0] != ticks:
|
| 1216 |
+
are_consistent = False
|
| 1217 |
+
break
|
| 1218 |
+
iscore += 1
|
| 1219 |
+
if are_consistent:
|
| 1220 |
+
return copy.deepcopy(scores)
|
| 1221 |
+
new_scores = []
|
| 1222 |
+
iscore = 0
|
| 1223 |
+
while iscore < len(scores):
|
| 1224 |
+
score = scores[iscore]
|
| 1225 |
+
new_scores.append(opus2score(to_millisecs(score2opus(score))))
|
| 1226 |
+
iscore += 1
|
| 1227 |
+
return new_scores
|
| 1228 |
+
|
| 1229 |
+
|
| 1230 |
+
###########################################################################
|
| 1231 |
+
|
| 1232 |
+
def _decode(trackdata=b'', exclude=None, include=None,
|
| 1233 |
+
event_callback=None, exclusive_event_callback=None, no_eot_magic=False):
|
| 1234 |
+
r'''Decodes MIDI track data into an opus-style list of events.
|
| 1235 |
+
The options:
|
| 1236 |
+
'exclude' is a list of event types which will be ignored SHOULD BE A SET
|
| 1237 |
+
'include' (and no exclude), makes exclude a list
|
| 1238 |
+
of all possible events, /minus/ what include specifies
|
| 1239 |
+
'event_callback' is a coderef
|
| 1240 |
+
'exclusive_event_callback' is a coderef
|
| 1241 |
+
'''
|
| 1242 |
+
trackdata = bytearray(trackdata)
|
| 1243 |
+
if exclude == None:
|
| 1244 |
+
exclude = []
|
| 1245 |
+
if include == None:
|
| 1246 |
+
include = []
|
| 1247 |
+
if include and not exclude:
|
| 1248 |
+
exclude = All_events
|
| 1249 |
+
include = set(include)
|
| 1250 |
+
exclude = set(exclude)
|
| 1251 |
+
|
| 1252 |
+
# Pointer = 0; not used here; we eat through the bytearray instead.
|
| 1253 |
+
event_code = -1; # used for running status
|
| 1254 |
+
event_count = 0;
|
| 1255 |
+
events = []
|
| 1256 |
+
|
| 1257 |
+
while(len(trackdata)):
|
| 1258 |
+
# loop while there's anything to analyze ...
|
| 1259 |
+
eot = False # When True, the event registrar aborts this loop
|
| 1260 |
+
event_count += 1
|
| 1261 |
+
|
| 1262 |
+
E = []
|
| 1263 |
+
# E for events - we'll feed it to the event registrar at the end.
|
| 1264 |
+
|
| 1265 |
+
# Slice off the delta time code, and analyze it
|
| 1266 |
+
[time, remainder] = _unshift_ber_int(trackdata)
|
| 1267 |
+
|
| 1268 |
+
# Now let's see what we can make of the command
|
| 1269 |
+
first_byte = trackdata.pop(0) & 0xFF
|
| 1270 |
+
|
| 1271 |
+
if (first_byte < 0xF0): # It's a MIDI event
|
| 1272 |
+
if (first_byte & 0x80):
|
| 1273 |
+
event_code = first_byte
|
| 1274 |
+
else:
|
| 1275 |
+
# It wants running status; use last event_code value
|
| 1276 |
+
trackdata.insert(0, first_byte)
|
| 1277 |
+
if (event_code == -1):
|
| 1278 |
+
_warn("Running status not set; Aborting track.")
|
| 1279 |
+
return []
|
| 1280 |
+
|
| 1281 |
+
command = event_code & 0xF0
|
| 1282 |
+
channel = event_code & 0x0F
|
| 1283 |
+
|
| 1284 |
+
if (command == 0xF6): # 0-byte argument
|
| 1285 |
+
pass
|
| 1286 |
+
elif (command == 0xC0 or command == 0xD0): # 1-byte argument
|
| 1287 |
+
parameter = trackdata.pop(0) # could be B
|
| 1288 |
+
else: # 2-byte argument could be BB or 14-bit
|
| 1289 |
+
parameter = (trackdata.pop(0), trackdata.pop(0))
|
| 1290 |
+
|
| 1291 |
+
#################################################################
|
| 1292 |
+
# MIDI events
|
| 1293 |
+
|
| 1294 |
+
if (command == 0x80):
|
| 1295 |
+
if 'note_off' in exclude:
|
| 1296 |
+
continue
|
| 1297 |
+
E = ['note_off', time, channel, parameter[0], parameter[1]]
|
| 1298 |
+
elif (command == 0x90):
|
| 1299 |
+
if 'note_on' in exclude:
|
| 1300 |
+
continue
|
| 1301 |
+
E = ['note_on', time, channel, parameter[0], parameter[1]]
|
| 1302 |
+
elif (command == 0xA0):
|
| 1303 |
+
if 'key_after_touch' in exclude:
|
| 1304 |
+
continue
|
| 1305 |
+
E = ['key_after_touch',time,channel,parameter[0],parameter[1]]
|
| 1306 |
+
elif (command == 0xB0):
|
| 1307 |
+
if 'control_change' in exclude:
|
| 1308 |
+
continue
|
| 1309 |
+
E = ['control_change',time,channel,parameter[0],parameter[1]]
|
| 1310 |
+
elif (command == 0xC0):
|
| 1311 |
+
if 'patch_change' in exclude:
|
| 1312 |
+
continue
|
| 1313 |
+
E = ['patch_change', time, channel, parameter]
|
| 1314 |
+
elif (command == 0xD0):
|
| 1315 |
+
if 'channel_after_touch' in exclude:
|
| 1316 |
+
continue
|
| 1317 |
+
E = ['channel_after_touch', time, channel, parameter]
|
| 1318 |
+
elif (command == 0xE0):
|
| 1319 |
+
if 'pitch_wheel_change' in exclude:
|
| 1320 |
+
continue
|
| 1321 |
+
E = ['pitch_wheel_change', time, channel,
|
| 1322 |
+
_read_14_bit(parameter)-0x2000]
|
| 1323 |
+
else:
|
| 1324 |
+
_warn("Shouldn't get here; command="+hex(command))
|
| 1325 |
+
|
| 1326 |
+
elif (first_byte == 0xFF): # It's a Meta-Event! ##################
|
| 1327 |
+
#[command, length, remainder] =
|
| 1328 |
+
# unpack("xCwa*", substr(trackdata, $Pointer, 6));
|
| 1329 |
+
#Pointer += 6 - len(remainder);
|
| 1330 |
+
# # Move past JUST the length-encoded.
|
| 1331 |
+
command = trackdata.pop(0) & 0xFF
|
| 1332 |
+
[length, trackdata] = _unshift_ber_int(trackdata)
|
| 1333 |
+
if (command == 0x00):
|
| 1334 |
+
if (length == 2):
|
| 1335 |
+
E = ['set_sequence_number',time,_twobytes2int(trackdata)]
|
| 1336 |
+
else:
|
| 1337 |
+
_warn('set_sequence_number: length must be 2, not '+str(length))
|
| 1338 |
+
E = ['set_sequence_number', time, 0]
|
| 1339 |
+
|
| 1340 |
+
elif command >= 0x01 and command <= 0x0f: # Text events
|
| 1341 |
+
# 6.2 take it in bytes; let the user get the right encoding.
|
| 1342 |
+
# text_str = trackdata[0:length].decode('ascii','ignore')
|
| 1343 |
+
# text_str = trackdata[0:length].decode('ISO-8859-1')
|
| 1344 |
+
# 6.4 take it in bytes; let the user get the right encoding.
|
| 1345 |
+
text_data = bytes(trackdata[0:length]) # 6.4
|
| 1346 |
+
# Defined text events
|
| 1347 |
+
if (command == 0x01):
|
| 1348 |
+
E = ['text_event', time, text_data]
|
| 1349 |
+
elif (command == 0x02):
|
| 1350 |
+
E = ['copyright_text_event', time, text_data]
|
| 1351 |
+
elif (command == 0x03):
|
| 1352 |
+
E = ['track_name', time, text_data]
|
| 1353 |
+
elif (command == 0x04):
|
| 1354 |
+
E = ['instrument_name', time, text_data]
|
| 1355 |
+
elif (command == 0x05):
|
| 1356 |
+
E = ['lyric', time, text_data]
|
| 1357 |
+
elif (command == 0x06):
|
| 1358 |
+
E = ['marker', time, text_data]
|
| 1359 |
+
elif (command == 0x07):
|
| 1360 |
+
E = ['cue_point', time, text_data]
|
| 1361 |
+
# Reserved but apparently unassigned text events
|
| 1362 |
+
elif (command == 0x08):
|
| 1363 |
+
E = ['text_event_08', time, text_data]
|
| 1364 |
+
elif (command == 0x09):
|
| 1365 |
+
E = ['text_event_09', time, text_data]
|
| 1366 |
+
elif (command == 0x0a):
|
| 1367 |
+
E = ['text_event_0a', time, text_data]
|
| 1368 |
+
elif (command == 0x0b):
|
| 1369 |
+
E = ['text_event_0b', time, text_data]
|
| 1370 |
+
elif (command == 0x0c):
|
| 1371 |
+
E = ['text_event_0c', time, text_data]
|
| 1372 |
+
elif (command == 0x0d):
|
| 1373 |
+
E = ['text_event_0d', time, text_data]
|
| 1374 |
+
elif (command == 0x0e):
|
| 1375 |
+
E = ['text_event_0e', time, text_data]
|
| 1376 |
+
elif (command == 0x0f):
|
| 1377 |
+
E = ['text_event_0f', time, text_data]
|
| 1378 |
+
|
| 1379 |
+
# Now the sticky events -------------------------------------
|
| 1380 |
+
elif (command == 0x2F):
|
| 1381 |
+
E = ['end_track', time]
|
| 1382 |
+
# The code for handling this, oddly, comes LATER,
|
| 1383 |
+
# in the event registrar.
|
| 1384 |
+
elif (command == 0x51): # DTime, Microseconds/Crochet
|
| 1385 |
+
if length != 3:
|
| 1386 |
+
_warn('set_tempo event, but length='+str(length))
|
| 1387 |
+
E = ['set_tempo', time,
|
| 1388 |
+
struct.unpack(">I", b'\x00'+trackdata[0:3])[0]]
|
| 1389 |
+
elif (command == 0x54):
|
| 1390 |
+
if length != 5: # DTime, HR, MN, SE, FR, FF
|
| 1391 |
+
_warn('smpte_offset event, but length='+str(length))
|
| 1392 |
+
E = ['smpte_offset',time] + list(struct.unpack(">BBBBB",trackdata[0:5]))
|
| 1393 |
+
elif (command == 0x58):
|
| 1394 |
+
if length != 4: # DTime, NN, DD, CC, BB
|
| 1395 |
+
_warn('time_signature event, but length='+str(length))
|
| 1396 |
+
E = ['time_signature', time]+list(trackdata[0:4])
|
| 1397 |
+
elif (command == 0x59):
|
| 1398 |
+
if length != 2: # DTime, SF(signed), MI
|
| 1399 |
+
_warn('key_signature event, but length='+str(length))
|
| 1400 |
+
E = ['key_signature',time] + list(struct.unpack(">bB",trackdata[0:2]))
|
| 1401 |
+
elif (command == 0x7F): # 6.4
|
| 1402 |
+
E = ['sequencer_specific',time, bytes(trackdata[0:length])]
|
| 1403 |
+
else:
|
| 1404 |
+
E = ['raw_meta_event', time, command,
|
| 1405 |
+
bytes(trackdata[0:length])] # 6.0
|
| 1406 |
+
#"[uninterpretable meta-event command of length length]"
|
| 1407 |
+
# DTime, Command, Binary Data
|
| 1408 |
+
# It's uninterpretable; record it as raw_data.
|
| 1409 |
+
|
| 1410 |
+
# Pointer += length; # Now move Pointer
|
| 1411 |
+
trackdata = trackdata[length:]
|
| 1412 |
+
|
| 1413 |
+
######################################################################
|
| 1414 |
+
elif (first_byte == 0xF0 or first_byte == 0xF7):
|
| 1415 |
+
# Note that sysexes in MIDI /files/ are different than sysexes
|
| 1416 |
+
# in MIDI transmissions!! The vast majority of system exclusive
|
| 1417 |
+
# messages will just use the F0 format. For instance, the
|
| 1418 |
+
# transmitted message F0 43 12 00 07 F7 would be stored in a
|
| 1419 |
+
# MIDI file as F0 05 43 12 00 07 F7. As mentioned above, it is
|
| 1420 |
+
# required to include the F7 at the end so that the reader of the
|
| 1421 |
+
# MIDI file knows that it has read the entire message. (But the F7
|
| 1422 |
+
# is omitted if this is a non-final block in a multiblock sysex;
|
| 1423 |
+
# but the F7 (if there) is counted in the message's declared
|
| 1424 |
+
# length, so we don't have to think about it anyway.)
|
| 1425 |
+
#command = trackdata.pop(0)
|
| 1426 |
+
[length, trackdata] = _unshift_ber_int(trackdata)
|
| 1427 |
+
if first_byte == 0xF0:
|
| 1428 |
+
# 20091008 added ISO-8859-1 to get an 8-bit str
|
| 1429 |
+
# 6.4 return bytes instead
|
| 1430 |
+
E = ['sysex_f0', time, bytes(trackdata[0:length])]
|
| 1431 |
+
else:
|
| 1432 |
+
E = ['sysex_f7', time, bytes(trackdata[0:length])]
|
| 1433 |
+
trackdata = trackdata[length:]
|
| 1434 |
+
|
| 1435 |
+
######################################################################
|
| 1436 |
+
# Now, the MIDI file spec says:
|
| 1437 |
+
# <track data> = <MTrk event>+
|
| 1438 |
+
# <MTrk event> = <delta-time> <event>
|
| 1439 |
+
# <event> = <MIDI event> | <sysex event> | <meta-event>
|
| 1440 |
+
# I know that, on the wire, <MIDI event> can include note_on,
|
| 1441 |
+
# note_off, and all the other 8x to Ex events, AND Fx events
|
| 1442 |
+
# other than F0, F7, and FF -- namely, <song position msg>,
|
| 1443 |
+
# <song select msg>, and <tune request>.
|
| 1444 |
+
#
|
| 1445 |
+
# Whether these can occur in MIDI files is not clear specified
|
| 1446 |
+
# from the MIDI file spec. So, I'm going to assume that
|
| 1447 |
+
# they CAN, in practice, occur. I don't know whether it's
|
| 1448 |
+
# proper for you to actually emit these into a MIDI file.
|
| 1449 |
+
|
| 1450 |
+
elif (first_byte == 0xF2): # DTime, Beats
|
| 1451 |
+
# <song position msg> ::= F2 <data pair>
|
| 1452 |
+
E = ['song_position', time, _read_14_bit(trackdata[:2])]
|
| 1453 |
+
trackdata = trackdata[2:]
|
| 1454 |
+
|
| 1455 |
+
elif (first_byte == 0xF3): # <song select msg> ::= F3 <data singlet>
|
| 1456 |
+
# E = ['song_select', time, struct.unpack('>B',trackdata.pop(0))[0]]
|
| 1457 |
+
E = ['song_select', time, trackdata[0]]
|
| 1458 |
+
trackdata = trackdata[1:]
|
| 1459 |
+
# DTime, Thing (what?! song number? whatever ...)
|
| 1460 |
+
|
| 1461 |
+
elif (first_byte == 0xF6): # DTime
|
| 1462 |
+
E = ['tune_request', time]
|
| 1463 |
+
# What would a tune request be doing in a MIDI /file/?
|
| 1464 |
+
|
| 1465 |
+
#########################################################
|
| 1466 |
+
# ADD MORE META-EVENTS HERE. TODO:
|
| 1467 |
+
# f1 -- MTC Quarter Frame Message. One data byte follows
|
| 1468 |
+
# the Status; it's the time code value, from 0 to 127.
|
| 1469 |
+
# f8 -- MIDI clock. no data.
|
| 1470 |
+
# fa -- MIDI start. no data.
|
| 1471 |
+
# fb -- MIDI continue. no data.
|
| 1472 |
+
# fc -- MIDI stop. no data.
|
| 1473 |
+
# fe -- Active sense. no data.
|
| 1474 |
+
# f4 f5 f9 fd -- unallocated
|
| 1475 |
+
|
| 1476 |
+
r'''
|
| 1477 |
+
elif (first_byte > 0xF0) { # Some unknown kinda F-series event ####
|
| 1478 |
+
# Here we only produce a one-byte piece of raw data.
|
| 1479 |
+
# But the encoder for 'raw_data' accepts any length of it.
|
| 1480 |
+
E = [ 'raw_data',
|
| 1481 |
+
time, substr(trackdata,Pointer,1) ]
|
| 1482 |
+
# DTime and the Data (in this case, the one Event-byte)
|
| 1483 |
+
++Pointer; # itself
|
| 1484 |
+
|
| 1485 |
+
'''
|
| 1486 |
+
elif first_byte > 0xF0: # Some unknown F-series event
|
| 1487 |
+
# Here we only produce a one-byte piece of raw data.
|
| 1488 |
+
# E = ['raw_data', time, bytest(trackdata[0])] # 6.4
|
| 1489 |
+
E = ['raw_data', time, trackdata[0]] # 6.4 6.7
|
| 1490 |
+
trackdata = trackdata[1:]
|
| 1491 |
+
else: # Fallthru.
|
| 1492 |
+
_warn("Aborting track. Command-byte first_byte="+hex(first_byte))
|
| 1493 |
+
break
|
| 1494 |
+
# End of the big if-group
|
| 1495 |
+
|
| 1496 |
+
|
| 1497 |
+
######################################################################
|
| 1498 |
+
# THE EVENT REGISTRAR...
|
| 1499 |
+
if E and (E[0] == 'end_track'):
|
| 1500 |
+
# This is the code for exceptional handling of the EOT event.
|
| 1501 |
+
eot = True
|
| 1502 |
+
if not no_eot_magic:
|
| 1503 |
+
if E[1] > 0: # a null text-event to carry the delta-time
|
| 1504 |
+
E = ['text_event', E[1], '']
|
| 1505 |
+
else:
|
| 1506 |
+
E = [] # EOT with a delta-time of 0; ignore it.
|
| 1507 |
+
|
| 1508 |
+
if E and not (E[0] in exclude):
|
| 1509 |
+
#if ( $exclusive_event_callback ):
|
| 1510 |
+
# &{ $exclusive_event_callback }( @E );
|
| 1511 |
+
#else:
|
| 1512 |
+
# &{ $event_callback }( @E ) if $event_callback;
|
| 1513 |
+
events.append(E)
|
| 1514 |
+
if eot:
|
| 1515 |
+
break
|
| 1516 |
+
|
| 1517 |
+
# End of the big "Event" while-block
|
| 1518 |
+
|
| 1519 |
+
return events
|
| 1520 |
+
|
| 1521 |
+
|
| 1522 |
+
###########################################################################
|
| 1523 |
+
def _encode(events_lol, unknown_callback=None, never_add_eot=False,
|
| 1524 |
+
no_eot_magic=False, no_running_status=False):
|
| 1525 |
+
# encode an event structure, presumably for writing to a file
|
| 1526 |
+
# Calling format:
|
| 1527 |
+
# $data_r = MIDI::Event::encode( \@event_lol, { options } );
|
| 1528 |
+
# Takes a REFERENCE to an event structure (a LoL)
|
| 1529 |
+
# Returns an (unblessed) REFERENCE to track data.
|
| 1530 |
+
|
| 1531 |
+
# If you want to use this to encode a /single/ event,
|
| 1532 |
+
# you still have to do it as a reference to an event structure (a LoL)
|
| 1533 |
+
# that just happens to have just one event. I.e.,
|
| 1534 |
+
# encode( [ $event ] ) or encode( [ [ 'note_on', 100, 5, 42, 64] ] )
|
| 1535 |
+
# If you're doing this, consider the never_add_eot track option, as in
|
| 1536 |
+
# print MIDI ${ encode( [ $event], { 'never_add_eot' => 1} ) };
|
| 1537 |
+
|
| 1538 |
+
data = [] # what I'll store the chunks of byte-data in
|
| 1539 |
+
|
| 1540 |
+
# This is so my end_track magic won't corrupt the original
|
| 1541 |
+
events = copy.deepcopy(events_lol)
|
| 1542 |
+
|
| 1543 |
+
if not never_add_eot:
|
| 1544 |
+
# One way or another, tack on an 'end_track'
|
| 1545 |
+
if events:
|
| 1546 |
+
last = events[-1]
|
| 1547 |
+
if not (last[0] == 'end_track'): # no end_track already
|
| 1548 |
+
if (last[0] == 'text_event' and len(last[2]) == 0):
|
| 1549 |
+
# 0-length text event at track-end.
|
| 1550 |
+
if no_eot_magic:
|
| 1551 |
+
# Exceptional case: don't mess with track-final
|
| 1552 |
+
# 0-length text_events; just peg on an end_track
|
| 1553 |
+
events.append(['end_track', 0])
|
| 1554 |
+
else:
|
| 1555 |
+
# NORMAL CASE: replace with an end_track, leaving DTime
|
| 1556 |
+
last[0] = 'end_track'
|
| 1557 |
+
else:
|
| 1558 |
+
# last event was neither 0-length text_event nor end_track
|
| 1559 |
+
events.append(['end_track', 0])
|
| 1560 |
+
else: # an eventless track!
|
| 1561 |
+
events = [['end_track', 0],]
|
| 1562 |
+
|
| 1563 |
+
# maybe_running_status = not no_running_status # unused? 4.7
|
| 1564 |
+
last_status = -1
|
| 1565 |
+
|
| 1566 |
+
for event_r in (events):
|
| 1567 |
+
E = copy.deepcopy(event_r)
|
| 1568 |
+
# otherwise the shifting'd corrupt the original
|
| 1569 |
+
if not E:
|
| 1570 |
+
continue
|
| 1571 |
+
|
| 1572 |
+
event = E.pop(0)
|
| 1573 |
+
if not len(event):
|
| 1574 |
+
continue
|
| 1575 |
+
|
| 1576 |
+
dtime = int(E.pop(0))
|
| 1577 |
+
# print('event='+str(event)+' dtime='+str(dtime))
|
| 1578 |
+
|
| 1579 |
+
event_data = ''
|
| 1580 |
+
|
| 1581 |
+
if ( # MIDI events -- eligible for running status
|
| 1582 |
+
event == 'note_on'
|
| 1583 |
+
or event == 'note_off'
|
| 1584 |
+
or event == 'control_change'
|
| 1585 |
+
or event == 'key_after_touch'
|
| 1586 |
+
or event == 'patch_change'
|
| 1587 |
+
or event == 'channel_after_touch'
|
| 1588 |
+
or event == 'pitch_wheel_change' ):
|
| 1589 |
+
|
| 1590 |
+
# This block is where we spend most of the time. Gotta be tight.
|
| 1591 |
+
if (event == 'note_off'):
|
| 1592 |
+
status = 0x80 | (int(E[0]) & 0x0F)
|
| 1593 |
+
parameters = struct.pack('>BB', int(E[1])&0x7F, int(E[2])&0x7F)
|
| 1594 |
+
elif (event == 'note_on'):
|
| 1595 |
+
status = 0x90 | (int(E[0]) & 0x0F)
|
| 1596 |
+
parameters = struct.pack('>BB', int(E[1])&0x7F, int(E[2])&0x7F)
|
| 1597 |
+
elif (event == 'key_after_touch'):
|
| 1598 |
+
status = 0xA0 | (int(E[0]) & 0x0F)
|
| 1599 |
+
parameters = struct.pack('>BB', int(E[1])&0x7F, int(E[2])&0x7F)
|
| 1600 |
+
elif (event == 'control_change'):
|
| 1601 |
+
status = 0xB0 | (int(E[0]) & 0x0F)
|
| 1602 |
+
parameters = struct.pack('>BB', int(E[1])&0xFF, int(E[2])&0xFF)
|
| 1603 |
+
elif (event == 'patch_change'):
|
| 1604 |
+
status = 0xC0 | (int(E[0]) & 0x0F)
|
| 1605 |
+
parameters = struct.pack('>B', int(E[1]) & 0xFF)
|
| 1606 |
+
elif (event == 'channel_after_touch'):
|
| 1607 |
+
status = 0xD0 | (int(E[0]) & 0x0F)
|
| 1608 |
+
parameters = struct.pack('>B', int(E[1]) & 0xFF)
|
| 1609 |
+
elif (event == 'pitch_wheel_change'):
|
| 1610 |
+
status = 0xE0 | (int(E[0]) & 0x0F)
|
| 1611 |
+
parameters = _write_14_bit(int(E[1]) + 0x2000)
|
| 1612 |
+
else:
|
| 1613 |
+
_warn("BADASS FREAKOUT ERROR 31415!")
|
| 1614 |
+
|
| 1615 |
+
# And now the encoding
|
| 1616 |
+
# w = BER compressed integer (not ASN.1 BER, see perlpacktut for
|
| 1617 |
+
# details). Its bytes represent an unsigned integer in base 128,
|
| 1618 |
+
# most significant digit first, with as few digits as possible.
|
| 1619 |
+
# Bit eight (the high bit) is set on each byte except the last.
|
| 1620 |
+
|
| 1621 |
+
data.append(_ber_compressed_int(dtime))
|
| 1622 |
+
if (status != last_status) or no_running_status:
|
| 1623 |
+
data.append(struct.pack('>B', status))
|
| 1624 |
+
data.append(parameters)
|
| 1625 |
+
|
| 1626 |
+
last_status = status
|
| 1627 |
+
continue
|
| 1628 |
+
else:
|
| 1629 |
+
# Not a MIDI event.
|
| 1630 |
+
# All the code in this block could be more efficient,
|
| 1631 |
+
# but this is not where the code needs to be tight.
|
| 1632 |
+
# print "zaz $event\n";
|
| 1633 |
+
last_status = -1
|
| 1634 |
+
|
| 1635 |
+
if event == 'raw_meta_event':
|
| 1636 |
+
event_data = _some_text_event(int(E[0]), E[1])
|
| 1637 |
+
elif (event == 'set_sequence_number'): # 3.9
|
| 1638 |
+
event_data = b'\xFF\x00\x02'+_int2twobytes(E[0])
|
| 1639 |
+
|
| 1640 |
+
# Text meta-events...
|
| 1641 |
+
# a case for a dict, I think (pjb) ...
|
| 1642 |
+
elif (event == 'text_event'):
|
| 1643 |
+
event_data = _some_text_event(0x01, E[0])
|
| 1644 |
+
elif (event == 'copyright_text_event'):
|
| 1645 |
+
event_data = _some_text_event(0x02, E[0])
|
| 1646 |
+
elif (event == 'track_name'):
|
| 1647 |
+
event_data = _some_text_event(0x03, E[0])
|
| 1648 |
+
elif (event == 'instrument_name'):
|
| 1649 |
+
event_data = _some_text_event(0x04, E[0])
|
| 1650 |
+
elif (event == 'lyric'):
|
| 1651 |
+
event_data = _some_text_event(0x05, E[0])
|
| 1652 |
+
elif (event == 'marker'):
|
| 1653 |
+
event_data = _some_text_event(0x06, E[0])
|
| 1654 |
+
elif (event == 'cue_point'):
|
| 1655 |
+
event_data = _some_text_event(0x07, E[0])
|
| 1656 |
+
elif (event == 'text_event_08'):
|
| 1657 |
+
event_data = _some_text_event(0x08, E[0])
|
| 1658 |
+
elif (event == 'text_event_09'):
|
| 1659 |
+
event_data = _some_text_event(0x09, E[0])
|
| 1660 |
+
elif (event == 'text_event_0a'):
|
| 1661 |
+
event_data = _some_text_event(0x0A, E[0])
|
| 1662 |
+
elif (event == 'text_event_0b'):
|
| 1663 |
+
event_data = _some_text_event(0x0B, E[0])
|
| 1664 |
+
elif (event == 'text_event_0c'):
|
| 1665 |
+
event_data = _some_text_event(0x0C, E[0])
|
| 1666 |
+
elif (event == 'text_event_0d'):
|
| 1667 |
+
event_data = _some_text_event(0x0D, E[0])
|
| 1668 |
+
elif (event == 'text_event_0e'):
|
| 1669 |
+
event_data = _some_text_event(0x0E, E[0])
|
| 1670 |
+
elif (event == 'text_event_0f'):
|
| 1671 |
+
event_data = _some_text_event(0x0F, E[0])
|
| 1672 |
+
# End of text meta-events
|
| 1673 |
+
|
| 1674 |
+
elif (event == 'end_track'):
|
| 1675 |
+
event_data = b"\xFF\x2F\x00"
|
| 1676 |
+
|
| 1677 |
+
elif (event == 'set_tempo'):
|
| 1678 |
+
#event_data = struct.pack(">BBwa*", 0xFF, 0x51, 3,
|
| 1679 |
+
# substr( struct.pack('>I', E[0]), 1, 3))
|
| 1680 |
+
event_data = b'\xFF\x51\x03'+struct.pack('>I',E[0])[1:]
|
| 1681 |
+
elif (event == 'smpte_offset'):
|
| 1682 |
+
# event_data = struct.pack(">BBwBBBBB", 0xFF, 0x54, 5, E[0:5] )
|
| 1683 |
+
event_data = struct.pack(">BBBbBBBB", 0xFF,0x54,0x05,E[0],E[1],E[2],E[3],E[4])
|
| 1684 |
+
elif (event == 'time_signature'):
|
| 1685 |
+
# event_data = struct.pack(">BBwBBBB", 0xFF, 0x58, 4, E[0:4] )
|
| 1686 |
+
event_data = struct.pack(">BBBbBBB", 0xFF, 0x58, 0x04, E[0],E[1],E[2],E[3])
|
| 1687 |
+
elif (event == 'key_signature'):
|
| 1688 |
+
event_data = struct.pack(">BBBbB", 0xFF, 0x59, 0x02, E[0],E[1])
|
| 1689 |
+
elif (event == 'sequencer_specific'):
|
| 1690 |
+
# event_data = struct.pack(">BBwa*", 0xFF,0x7F, len(E[0]), E[0])
|
| 1691 |
+
event_data = _some_text_event(0x7F, E[0])
|
| 1692 |
+
# End of Meta-events
|
| 1693 |
+
|
| 1694 |
+
# Other Things...
|
| 1695 |
+
elif (event == 'sysex_f0'):
|
| 1696 |
+
#event_data = struct.pack(">Bwa*", 0xF0, len(E[0]), E[0])
|
| 1697 |
+
#B=bitstring w=BER-compressed-integer a=null-padded-ascii-str
|
| 1698 |
+
event_data = bytearray(b'\xF0')+_ber_compressed_int(len(E[0]))+bytearray(E[0])
|
| 1699 |
+
elif (event == 'sysex_f7'):
|
| 1700 |
+
#event_data = struct.pack(">Bwa*", 0xF7, len(E[0]), E[0])
|
| 1701 |
+
event_data = bytearray(b'\xF7')+_ber_compressed_int(len(E[0]))+bytearray(E[0])
|
| 1702 |
+
|
| 1703 |
+
elif (event == 'song_position'):
|
| 1704 |
+
event_data = b"\xF2" + _write_14_bit( E[0] )
|
| 1705 |
+
elif (event == 'song_select'):
|
| 1706 |
+
event_data = struct.pack('>BB', 0xF3, E[0] )
|
| 1707 |
+
elif (event == 'tune_request'):
|
| 1708 |
+
event_data = b"\xF6"
|
| 1709 |
+
elif (event == 'raw_data'):
|
| 1710 |
+
_warn("_encode: raw_data event not supported")
|
| 1711 |
+
# event_data = E[0]
|
| 1712 |
+
continue
|
| 1713 |
+
# End of Other Stuff
|
| 1714 |
+
|
| 1715 |
+
else:
|
| 1716 |
+
# The Big Fallthru
|
| 1717 |
+
if unknown_callback:
|
| 1718 |
+
# push(@data, &{ $unknown_callback }( @$event_r ))
|
| 1719 |
+
pass
|
| 1720 |
+
else:
|
| 1721 |
+
_warn("Unknown event: "+str(event))
|
| 1722 |
+
# To surpress complaint here, just set
|
| 1723 |
+
# 'unknown_callback' => sub { return () }
|
| 1724 |
+
continue
|
| 1725 |
+
|
| 1726 |
+
#print "Event $event encoded part 2\n"
|
| 1727 |
+
if str(type(event_data)).find("'str'") >= 0:
|
| 1728 |
+
event_data = bytearray(event_data.encode('Latin1', 'ignore'))
|
| 1729 |
+
if len(event_data): # how could $event_data be empty
|
| 1730 |
+
# data.append(struct.pack('>wa*', dtime, event_data))
|
| 1731 |
+
# print(' event_data='+str(event_data))
|
| 1732 |
+
data.append(_ber_compressed_int(dtime)+event_data)
|
| 1733 |
+
|
| 1734 |
+
return b''.join(data)
|
| 1735 |
+
|
README.md
CHANGED
|
@@ -1,12 +1,54 @@
|
|
| 1 |
---
|
| 2 |
-
title: Multitrack
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 6.3.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Multitrack MIDI Composer
|
| 3 |
+
emoji: 🎹
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 6.3.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
short_description: AI MIDI generation with two transformer models
|
| 12 |
+
tags:
|
| 13 |
+
- midi
|
| 14 |
+
- music-generation
|
| 15 |
+
- audio
|
| 16 |
+
- cpu
|
| 17 |
---
|
| 18 |
|
| 19 |
+
# Multitrack MIDI Composer
|
| 20 |
+
|
| 21 |
+
AI-powered MIDI music generation with **two transformer models** to choose from.
|
| 22 |
+
|
| 23 |
+
## Models
|
| 24 |
+
|
| 25 |
+
### Multitrack Generator (Genre-based)
|
| 26 |
+
- Genre-based multi-instrument composition
|
| 27 |
+
- Uses `juancopi81/lmd-8bars-2048-epochs40_v4` transformer
|
| 28 |
+
- Builds music instrument by instrument
|
| 29 |
+
- Supports: ROCK, POP, JAZZ, ELECTRONIC, R&B/SOUL
|
| 30 |
+
|
| 31 |
+
### SkyTNT MIDI Model (Instrument-based)
|
| 32 |
+
- Uses `skytnt/midi-model` via ONNX (CPU optimized)
|
| 33 |
+
- Select specific instruments and drum kits
|
| 34 |
+
- Event-based generation with fine-grained control
|
| 35 |
+
- Supports all General MIDI instruments
|
| 36 |
+
|
| 37 |
+
## Features
|
| 38 |
+
|
| 39 |
+
- Pure Python audio synthesis (py-meltysynth)
|
| 40 |
+
- In-browser audio playback
|
| 41 |
+
- MIDI file export
|
| 42 |
+
- Piano roll visualization
|
| 43 |
+
- CPU-only (no GPU needed)
|
| 44 |
+
|
| 45 |
+
## Requirements
|
| 46 |
+
|
| 47 |
+
- CPU-only deployment
|
| 48 |
+
- ~500MB model downloads on first use
|
| 49 |
+
- ~6MB SoundFont file included
|
| 50 |
+
|
| 51 |
+
## Credits
|
| 52 |
+
|
| 53 |
+
- Multitrack Generator: Dr. Tristan Behrens
|
| 54 |
+
- SkyTNT Model: [skytnt/midi-model](https://github.com/SkyTNT/midi-model)
|
TimGM6mb.sf2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5378b62028c920cb11e4803327983fee2f2cdff5dc89c708e39da417e51c854
|
| 3 |
+
size 5969788
|
app.py
ADDED
|
@@ -0,0 +1,934 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Multitrack MIDI Composer - Combined MIDI generation tools
|
| 3 |
+
- Simple MIDI Composer: Demo mode chord progressions
|
| 4 |
+
- Multitrack Generator: AI multi-instrument composition with genre selection
|
| 5 |
+
|
| 6 |
+
CPU-only HuggingFace Space
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import sys
|
| 11 |
+
import tempfile
|
| 12 |
+
import argparse
|
| 13 |
+
import struct
|
| 14 |
+
import wave
|
| 15 |
+
from typing import List, Tuple, Optional
|
| 16 |
+
|
| 17 |
+
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
| 18 |
+
|
| 19 |
+
import gradio as gr
|
| 20 |
+
import numpy as np
|
| 21 |
+
|
| 22 |
+
# Import meltysynth for pure Python MIDI-to-audio synthesis
|
| 23 |
+
try:
|
| 24 |
+
import meltysynth as ms
|
| 25 |
+
MELTYSYNTH_AVAILABLE = True
|
| 26 |
+
except ImportError:
|
| 27 |
+
MELTYSYNTH_AVAILABLE = False
|
| 28 |
+
print("meltysynth not available - Audio playback disabled")
|
| 29 |
+
|
| 30 |
+
# Path to SoundFont file
|
| 31 |
+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 32 |
+
SOUNDFONT_PATH = os.path.join(SCRIPT_DIR, "TimGM6mb.sf2")
|
| 33 |
+
|
| 34 |
+
# Global synthesizer (loaded once)
|
| 35 |
+
_synthesizer = None
|
| 36 |
+
_synth_settings = None
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def get_synthesizer():
|
| 40 |
+
"""Load the SoundFont synthesizer (cached)."""
|
| 41 |
+
global _synthesizer, _synth_settings
|
| 42 |
+
if _synthesizer is None and MELTYSYNTH_AVAILABLE:
|
| 43 |
+
if os.path.exists(SOUNDFONT_PATH):
|
| 44 |
+
print(f"Loading SoundFont: {SOUNDFONT_PATH}")
|
| 45 |
+
sound_font = ms.SoundFont.from_file(SOUNDFONT_PATH)
|
| 46 |
+
_synth_settings = ms.SynthesizerSettings(44100)
|
| 47 |
+
_synthesizer = ms.Synthesizer(sound_font, _synth_settings)
|
| 48 |
+
print("SoundFont loaded successfully!")
|
| 49 |
+
else:
|
| 50 |
+
print(f"SoundFont not found: {SOUNDFONT_PATH}")
|
| 51 |
+
return _synthesizer, _synth_settings
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def render_midi_to_audio(midi_path: str) -> Optional[str]:
|
| 55 |
+
"""Render a MIDI file to WAV audio using meltysynth."""
|
| 56 |
+
if not MELTYSYNTH_AVAILABLE:
|
| 57 |
+
return None
|
| 58 |
+
|
| 59 |
+
synth, settings = get_synthesizer()
|
| 60 |
+
if synth is None:
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
# Load MIDI file
|
| 65 |
+
midi_file = ms.MidiFile.from_file(midi_path)
|
| 66 |
+
sequencer = ms.MidiFileSequencer(synth)
|
| 67 |
+
sequencer.play(midi_file, False)
|
| 68 |
+
|
| 69 |
+
# Calculate buffer size (duration + 1 second for tail)
|
| 70 |
+
duration = midi_file.length + 1.0
|
| 71 |
+
buffer_length = int(settings.sample_rate * duration)
|
| 72 |
+
|
| 73 |
+
# Create buffers and render
|
| 74 |
+
left = ms.create_buffer(buffer_length)
|
| 75 |
+
right = ms.create_buffer(buffer_length)
|
| 76 |
+
sequencer.render(left, right)
|
| 77 |
+
|
| 78 |
+
# Convert to interleaved stereo int16
|
| 79 |
+
samples = []
|
| 80 |
+
for i in range(buffer_length):
|
| 81 |
+
# Clamp and convert to int16
|
| 82 |
+
l_sample = max(-1.0, min(1.0, left[i]))
|
| 83 |
+
r_sample = max(-1.0, min(1.0, right[i]))
|
| 84 |
+
samples.append(int(l_sample * 32767))
|
| 85 |
+
samples.append(int(r_sample * 32767))
|
| 86 |
+
|
| 87 |
+
# Write WAV file
|
| 88 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
| 89 |
+
wav_path = f.name
|
| 90 |
+
|
| 91 |
+
with wave.open(wav_path, 'w') as wav_file:
|
| 92 |
+
wav_file.setnchannels(2)
|
| 93 |
+
wav_file.setsampwidth(2) # 16-bit
|
| 94 |
+
wav_file.setframerate(settings.sample_rate)
|
| 95 |
+
wav_file.writeframes(struct.pack(f'<{len(samples)}h', *samples))
|
| 96 |
+
|
| 97 |
+
return wav_path
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"Audio render error: {e}")
|
| 100 |
+
return None
|
| 101 |
+
|
| 102 |
+
# =============================================================================
|
| 103 |
+
# Tab 1: Simple MIDI Composer (Demo Mode)
|
| 104 |
+
# =============================================================================
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
from midiutil import MIDIFile
|
| 108 |
+
MIDIUTIL_AVAILABLE = True
|
| 109 |
+
except ImportError:
|
| 110 |
+
MIDIUTIL_AVAILABLE = False
|
| 111 |
+
print("midiutil not available - Demo Composer disabled")
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def create_piano_roll(notes_data, total_time, title="Piano Roll"):
|
| 115 |
+
"""Create a piano roll visualization and save as PNG image."""
|
| 116 |
+
import matplotlib
|
| 117 |
+
matplotlib.use('Agg')
|
| 118 |
+
import matplotlib.pyplot as plt
|
| 119 |
+
from matplotlib.patches import Rectangle
|
| 120 |
+
|
| 121 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
| 122 |
+
|
| 123 |
+
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8']
|
| 124 |
+
|
| 125 |
+
for i, (start, end, pitch, track) in enumerate(notes_data):
|
| 126 |
+
color = colors[track % len(colors)]
|
| 127 |
+
rect = Rectangle((start, pitch - 0.4), end - start, 0.8,
|
| 128 |
+
facecolor=color, edgecolor='black', linewidth=0.5, alpha=0.8)
|
| 129 |
+
ax.add_patch(rect)
|
| 130 |
+
|
| 131 |
+
ax.set_xlim(0, total_time)
|
| 132 |
+
ax.set_ylim(40, 90)
|
| 133 |
+
ax.set_xlabel('Time (beats)')
|
| 134 |
+
ax.set_ylabel('MIDI Pitch')
|
| 135 |
+
ax.set_title(title)
|
| 136 |
+
ax.grid(True, alpha=0.3)
|
| 137 |
+
|
| 138 |
+
# Save to PNG file
|
| 139 |
+
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
|
| 140 |
+
img_path = f.name
|
| 141 |
+
fig.savefig(img_path, dpi=100, bbox_inches='tight')
|
| 142 |
+
plt.close(fig)
|
| 143 |
+
|
| 144 |
+
return img_path
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def create_demo_midi(tempo: int = 120, length_bars: int = 4):
|
| 148 |
+
"""Create demo MIDI with chord progression and melody, with visualization and audio."""
|
| 149 |
+
if not MIDIUTIL_AVAILABLE:
|
| 150 |
+
return None, None, None, "midiutil not installed"
|
| 151 |
+
|
| 152 |
+
try:
|
| 153 |
+
midi = MIDIFile(2) # Two tracks: melody and chords
|
| 154 |
+
midi.addTempo(0, 0, tempo)
|
| 155 |
+
midi.addProgramChange(0, 0, 0, 0) # Piano for melody
|
| 156 |
+
midi.addProgramChange(1, 0, 0, 0) # Piano for chords
|
| 157 |
+
|
| 158 |
+
# Simple chord progression (C - Am - F - G)
|
| 159 |
+
chords = [
|
| 160 |
+
[60, 64, 67], # C major
|
| 161 |
+
[57, 60, 64], # A minor
|
| 162 |
+
[53, 57, 60], # F major
|
| 163 |
+
[55, 59, 62], # G major
|
| 164 |
+
]
|
| 165 |
+
|
| 166 |
+
# Melody notes
|
| 167 |
+
melody_notes = [
|
| 168 |
+
72, 74, 76, 77, 76, 74, 72, 71,
|
| 169 |
+
69, 71, 72, 74, 72, 71, 69, 67,
|
| 170 |
+
65, 67, 69, 71, 72, 71, 69, 67,
|
| 171 |
+
67, 69, 71, 72, 74, 76, 77, 79,
|
| 172 |
+
]
|
| 173 |
+
|
| 174 |
+
# Collect notes for visualization
|
| 175 |
+
notes_data = []
|
| 176 |
+
|
| 177 |
+
for bar in range(length_bars):
|
| 178 |
+
bar_time = bar * 4 # In beats
|
| 179 |
+
chord_idx = bar % len(chords)
|
| 180 |
+
|
| 181 |
+
# Add chord notes
|
| 182 |
+
for note in chords[chord_idx]:
|
| 183 |
+
midi.addNote(1, 0, note, bar_time, 4, 60)
|
| 184 |
+
notes_data.append((bar_time, bar_time + 4, note, 1))
|
| 185 |
+
|
| 186 |
+
# Add melody notes (8 per bar)
|
| 187 |
+
for i in range(8):
|
| 188 |
+
note_time = bar_time + (i * 0.5)
|
| 189 |
+
note_idx = (bar * 8 + i) % len(melody_notes)
|
| 190 |
+
midi.addNote(0, 0, melody_notes[note_idx], note_time, 0.4, 90)
|
| 191 |
+
notes_data.append((note_time, note_time + 0.4, melody_notes[note_idx], 0))
|
| 192 |
+
|
| 193 |
+
with tempfile.NamedTemporaryFile(suffix='.mid', delete=False) as f:
|
| 194 |
+
midi.writeFile(f)
|
| 195 |
+
midi_path = f.name
|
| 196 |
+
|
| 197 |
+
# Create visualization
|
| 198 |
+
total_time = length_bars * 4
|
| 199 |
+
fig = create_piano_roll(notes_data, total_time, f"Demo: {length_bars} bars at {tempo} BPM")
|
| 200 |
+
|
| 201 |
+
# Render audio
|
| 202 |
+
audio_path = render_midi_to_audio(midi_path)
|
| 203 |
+
|
| 204 |
+
status = f"Created: {length_bars} bars at {tempo} BPM"
|
| 205 |
+
if audio_path:
|
| 206 |
+
status += " - Audio rendered!"
|
| 207 |
+
else:
|
| 208 |
+
status += " - Download MIDI to play"
|
| 209 |
+
return midi_path, fig, audio_path, status
|
| 210 |
+
|
| 211 |
+
except Exception as e:
|
| 212 |
+
return None, None, None, f"Error: {str(e)}"
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
# =============================================================================
|
| 216 |
+
# Tab 2: Multitrack Generator (Transformer-based)
|
| 217 |
+
# =============================================================================
|
| 218 |
+
|
| 219 |
+
try:
|
| 220 |
+
import torch
|
| 221 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 222 |
+
import note_seq
|
| 223 |
+
from note_seq.protobuf.music_pb2 import NoteSequence
|
| 224 |
+
from note_seq.constants import STANDARD_PPQ
|
| 225 |
+
from matplotlib.figure import Figure
|
| 226 |
+
TORCH_AVAILABLE = True
|
| 227 |
+
except ImportError:
|
| 228 |
+
TORCH_AVAILABLE = False
|
| 229 |
+
print("PyTorch/Transformers not available - Multitrack Generator disabled")
|
| 230 |
+
|
| 231 |
+
SAMPLE_RATE = 44100
|
| 232 |
+
|
| 233 |
+
GM_INSTRUMENTS = [
|
| 234 |
+
"Acoustic Grand Piano", "Bright Acoustic Piano", "Electric Grand Piano",
|
| 235 |
+
"Honky-tonk Piano", "Electric Piano 1", "Electric Piano 2", "Harpsichord",
|
| 236 |
+
"Clavi", "Celesta", "Glockenspiel", "Music Box", "Vibraphone", "Marimba",
|
| 237 |
+
"Xylophone", "Tubular Bells", "Dulcimer", "Drawbar Organ", "Percussive Organ",
|
| 238 |
+
"Rock Organ", "Church Organ", "Reed Organ", "Accordion", "Harmonica",
|
| 239 |
+
"Tango Accordion", "Acoustic Guitar (nylon)", "Acoustic Guitar (steel)",
|
| 240 |
+
"Electric Guitar (jazz)", "Electric Guitar (clean)", "Electric Guitar (muted)",
|
| 241 |
+
"Overdriven Guitar", "Distortion Guitar", "Guitar Harmonics", "Acoustic Bass",
|
| 242 |
+
"Electric Bass (finger)", "Electric Bass (pick)", "Fretless Bass", "Slap Bass 1",
|
| 243 |
+
"Slap Bass 2", "Synth Bass 1", "Synth Bass 2", "Violin", "Viola", "Cello",
|
| 244 |
+
"Contrabass", "Tremolo Strings", "Pizzicato Strings", "Orchestral Harp",
|
| 245 |
+
"Timpani", "String Ensemble 1", "String Ensemble 2", "Synth Strings 1",
|
| 246 |
+
"Synth Strings 2", "Choir Aahs", "Voice Oohs", "Synth Choir", "Orchestra Hit",
|
| 247 |
+
"Trumpet", "Trombone", "Tuba", "Muted Trumpet", "French Horn", "Brass Section",
|
| 248 |
+
"Synth Brass 1", "Synth Brass 2", "Soprano Sax", "Alto Sax", "Tenor Sax",
|
| 249 |
+
"Baritone Sax", "Oboe", "English Horn", "Bassoon", "Clarinet", "Piccolo",
|
| 250 |
+
"Flute", "Recorder", "Pan Flute", "Blown Bottle", "Shakuhachi", "Whistle",
|
| 251 |
+
"Ocarina", "Lead 1 (square)", "Lead 2 (sawtooth)", "Lead 3 (calliope)",
|
| 252 |
+
"Lead 4 (chiff)", "Lead 5 (charang)", "Lead 6 (voice)", "Lead 7 (fifths)",
|
| 253 |
+
"Lead 8 (bass + lead)", "Pad 1 (new age)", "Pad 2 (warm)", "Pad 3 (polysynth)",
|
| 254 |
+
"Pad 4 (choir)", "Pad 5 (bowed)", "Pad 6 (metallic)", "Pad 7 (halo)",
|
| 255 |
+
"Pad 8 (sweep)", "FX 1 (rain)", "FX 2 (soundtrack)", "FX 3 (crystal)",
|
| 256 |
+
"FX 4 (atmosphere)", "FX 5 (brightness)", "FX 6 (goblins)", "FX 7 (echoes)",
|
| 257 |
+
"FX 8 (sci-fi)", "Sitar", "Banjo", "Shamisen", "Koto", "Kalimba", "Bagpipe",
|
| 258 |
+
"Fiddle", "Shanai", "Tinkle Bell", "Agogo", "Steel Drums", "Woodblock",
|
| 259 |
+
"Taiko Drum", "Melodic Tom", "Synth Drum", "Reverse Cymbal", "Guitar Fret Noise",
|
| 260 |
+
"Breath Noise", "Seashore", "Bird Tweet", "Telephone Ring", "Helicopter",
|
| 261 |
+
"Applause", "Gunshot",
|
| 262 |
+
]
|
| 263 |
+
|
| 264 |
+
# Global model and tokenizer
|
| 265 |
+
device = None
|
| 266 |
+
tokenizer = None
|
| 267 |
+
model = None
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def get_model_and_tokenizer():
|
| 271 |
+
"""Load model and tokenizer on CPU."""
|
| 272 |
+
global model, tokenizer, device
|
| 273 |
+
if model is None or tokenizer is None:
|
| 274 |
+
device = torch.device("cpu")
|
| 275 |
+
print("Loading tokenizer...")
|
| 276 |
+
tokenizer = AutoTokenizer.from_pretrained("juancopi81/lmd_8bars_tokenizer")
|
| 277 |
+
print("Loading model on CPU...")
|
| 278 |
+
model = AutoModelForCausalLM.from_pretrained("juancopi81/lmd-8bars-2048-epochs40_v4")
|
| 279 |
+
model = model.to(device)
|
| 280 |
+
model.eval()
|
| 281 |
+
print("Model loaded successfully!")
|
| 282 |
+
return model, tokenizer
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def empty_note_sequence(qpm: float = 120.0, total_time: float = 0.0) -> "NoteSequence":
|
| 286 |
+
"""Create an empty note sequence."""
|
| 287 |
+
note_sequence = NoteSequence()
|
| 288 |
+
note_sequence.tempos.add().qpm = qpm
|
| 289 |
+
note_sequence.ticks_per_quarter = STANDARD_PPQ
|
| 290 |
+
note_sequence.total_time = total_time
|
| 291 |
+
return note_sequence
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def token_sequence_to_note_sequence(
|
| 295 |
+
token_sequence: str,
|
| 296 |
+
qpm: float = 120.0,
|
| 297 |
+
use_program: bool = True,
|
| 298 |
+
use_drums: bool = True,
|
| 299 |
+
instrument_mapper: Optional[dict] = None,
|
| 300 |
+
only_piano: bool = False,
|
| 301 |
+
) -> "NoteSequence":
|
| 302 |
+
"""Convert a sequence of tokens into a sequence of notes."""
|
| 303 |
+
if isinstance(token_sequence, str):
|
| 304 |
+
token_sequence = token_sequence.split()
|
| 305 |
+
|
| 306 |
+
note_sequence = empty_note_sequence(qpm)
|
| 307 |
+
note_length_16th = 0.25 * 60 / qpm
|
| 308 |
+
bar_length = 4.0 * 60 / qpm
|
| 309 |
+
|
| 310 |
+
current_program = 1
|
| 311 |
+
current_is_drum = False
|
| 312 |
+
current_instrument = 0
|
| 313 |
+
track_count = 0
|
| 314 |
+
current_bar_index = 0
|
| 315 |
+
current_time = 0
|
| 316 |
+
current_notes = {}
|
| 317 |
+
|
| 318 |
+
for token in token_sequence:
|
| 319 |
+
if token == "PIECE_START":
|
| 320 |
+
pass
|
| 321 |
+
elif token == "PIECE_END":
|
| 322 |
+
break
|
| 323 |
+
elif token == "TRACK_START":
|
| 324 |
+
current_bar_index = 0
|
| 325 |
+
track_count += 1
|
| 326 |
+
elif token == "TRACK_END":
|
| 327 |
+
pass
|
| 328 |
+
elif token == "KEYS_START":
|
| 329 |
+
pass
|
| 330 |
+
elif token == "KEYS_END":
|
| 331 |
+
pass
|
| 332 |
+
elif token.startswith("KEY="):
|
| 333 |
+
pass
|
| 334 |
+
elif token.startswith("INST"):
|
| 335 |
+
instrument = token.split("=")[-1]
|
| 336 |
+
if instrument != "DRUMS" and use_program:
|
| 337 |
+
if instrument_mapper is not None:
|
| 338 |
+
if instrument in instrument_mapper:
|
| 339 |
+
instrument = instrument_mapper[instrument]
|
| 340 |
+
try:
|
| 341 |
+
current_program = int(instrument)
|
| 342 |
+
except ValueError:
|
| 343 |
+
current_program = 0
|
| 344 |
+
current_instrument = track_count
|
| 345 |
+
current_is_drum = False
|
| 346 |
+
if instrument == "DRUMS" and use_drums:
|
| 347 |
+
current_instrument = 0
|
| 348 |
+
current_program = 0
|
| 349 |
+
current_is_drum = True
|
| 350 |
+
elif token == "BAR_START":
|
| 351 |
+
current_time = current_bar_index * bar_length
|
| 352 |
+
current_notes = {}
|
| 353 |
+
elif token == "BAR_END":
|
| 354 |
+
current_bar_index += 1
|
| 355 |
+
elif token.startswith("NOTE_ON"):
|
| 356 |
+
try:
|
| 357 |
+
pitch = int(token.split("=")[-1])
|
| 358 |
+
note = note_sequence.notes.add()
|
| 359 |
+
note.start_time = current_time
|
| 360 |
+
note.end_time = current_time + 4 * note_length_16th
|
| 361 |
+
note.pitch = pitch
|
| 362 |
+
note.instrument = current_instrument
|
| 363 |
+
note.program = current_program
|
| 364 |
+
note.velocity = 80
|
| 365 |
+
note.is_drum = current_is_drum
|
| 366 |
+
current_notes[pitch] = note
|
| 367 |
+
except ValueError:
|
| 368 |
+
pass
|
| 369 |
+
elif token.startswith("NOTE_OFF"):
|
| 370 |
+
try:
|
| 371 |
+
pitch = int(token.split("=")[-1])
|
| 372 |
+
if pitch in current_notes:
|
| 373 |
+
note = current_notes[pitch]
|
| 374 |
+
note.end_time = current_time
|
| 375 |
+
except ValueError:
|
| 376 |
+
pass
|
| 377 |
+
elif token.startswith("TIME_DELTA"):
|
| 378 |
+
try:
|
| 379 |
+
delta = float(token.split("=")[-1]) * note_length_16th
|
| 380 |
+
current_time += delta
|
| 381 |
+
except ValueError:
|
| 382 |
+
pass
|
| 383 |
+
elif token.startswith("DENSITY="):
|
| 384 |
+
pass
|
| 385 |
+
elif token == "[PAD]":
|
| 386 |
+
pass
|
| 387 |
+
|
| 388 |
+
# Make the instruments right
|
| 389 |
+
instruments_drums = []
|
| 390 |
+
for note in note_sequence.notes:
|
| 391 |
+
pair = [note.program, note.is_drum]
|
| 392 |
+
if pair not in instruments_drums:
|
| 393 |
+
instruments_drums += [pair]
|
| 394 |
+
note.instrument = instruments_drums.index(pair)
|
| 395 |
+
|
| 396 |
+
if only_piano:
|
| 397 |
+
for note in note_sequence.notes:
|
| 398 |
+
if not note.is_drum:
|
| 399 |
+
note.instrument = 0
|
| 400 |
+
note.program = 0
|
| 401 |
+
|
| 402 |
+
return note_sequence
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
def create_seed_string(genre: str = "OTHER") -> str:
|
| 406 |
+
"""Create a seed string for generating a new piece."""
|
| 407 |
+
if genre == "RANDOM":
|
| 408 |
+
return "PIECE_START"
|
| 409 |
+
return f"PIECE_START GENRE={genre} TRACK_START"
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
def get_instruments(text_sequence: str) -> List[str]:
|
| 413 |
+
"""Extract the list of instruments from a text sequence."""
|
| 414 |
+
instruments = []
|
| 415 |
+
parts = text_sequence.split()
|
| 416 |
+
for part in parts:
|
| 417 |
+
if part.startswith("INST="):
|
| 418 |
+
if part[5:] == "DRUMS":
|
| 419 |
+
instruments.append("Drums")
|
| 420 |
+
else:
|
| 421 |
+
try:
|
| 422 |
+
index = int(part[5:])
|
| 423 |
+
if 0 <= index < len(GM_INSTRUMENTS):
|
| 424 |
+
instruments.append(GM_INSTRUMENTS[index])
|
| 425 |
+
else:
|
| 426 |
+
instruments.append(f"Program {index}")
|
| 427 |
+
except ValueError:
|
| 428 |
+
pass
|
| 429 |
+
return instruments
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
def generate_new_instrument(seed: str, temp: float = 0.75) -> str:
|
| 433 |
+
"""Generate a new instrument sequence from a given seed and temperature."""
|
| 434 |
+
model, tok = get_model_and_tokenizer()
|
| 435 |
+
seed_length = len(tok.encode(seed))
|
| 436 |
+
|
| 437 |
+
# Retry until we get a valid generation with notes
|
| 438 |
+
max_attempts = 5
|
| 439 |
+
for attempt in range(max_attempts):
|
| 440 |
+
input_ids = tok.encode(seed, return_tensors="pt")
|
| 441 |
+
input_ids = input_ids.to(device)
|
| 442 |
+
|
| 443 |
+
eos_token_id = tok.encode("TRACK_END")[0]
|
| 444 |
+
with torch.no_grad():
|
| 445 |
+
generated_ids = model.generate(
|
| 446 |
+
input_ids,
|
| 447 |
+
max_new_tokens=2048,
|
| 448 |
+
do_sample=True,
|
| 449 |
+
temperature=temp,
|
| 450 |
+
eos_token_id=eos_token_id,
|
| 451 |
+
)
|
| 452 |
+
generated_sequence = tok.decode(generated_ids[0])
|
| 453 |
+
|
| 454 |
+
new_generated_sequence = tok.decode(generated_ids[0][seed_length:])
|
| 455 |
+
if "NOTE_ON" in new_generated_sequence:
|
| 456 |
+
return generated_sequence
|
| 457 |
+
|
| 458 |
+
# Return last attempt even if no NOTE_ON found
|
| 459 |
+
return generated_sequence
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def create_noteseq_piano_roll(note_sequence, title="Generated Music"):
|
| 463 |
+
"""Create a piano roll visualization from a NoteSequence using matplotlib."""
|
| 464 |
+
import matplotlib
|
| 465 |
+
matplotlib.use('Agg')
|
| 466 |
+
import matplotlib.pyplot as plt
|
| 467 |
+
from matplotlib.patches import Rectangle
|
| 468 |
+
|
| 469 |
+
fig, ax = plt.subplots(figsize=(14, 6))
|
| 470 |
+
|
| 471 |
+
# Color by instrument
|
| 472 |
+
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8', '#F39C12', '#9B59B6', '#1ABC9C']
|
| 473 |
+
|
| 474 |
+
if len(note_sequence.notes) == 0:
|
| 475 |
+
ax.text(0.5, 0.5, 'No notes generated', ha='center', va='center', fontsize=14)
|
| 476 |
+
ax.set_xlim(0, 10)
|
| 477 |
+
ax.set_ylim(0, 127)
|
| 478 |
+
else:
|
| 479 |
+
min_pitch = min(n.pitch for n in note_sequence.notes)
|
| 480 |
+
max_pitch = max(n.pitch for n in note_sequence.notes)
|
| 481 |
+
max_time = max(n.end_time for n in note_sequence.notes)
|
| 482 |
+
|
| 483 |
+
for note in note_sequence.notes:
|
| 484 |
+
color = colors[note.instrument % len(colors)]
|
| 485 |
+
rect = Rectangle(
|
| 486 |
+
(note.start_time, note.pitch - 0.4),
|
| 487 |
+
note.end_time - note.start_time,
|
| 488 |
+
0.8,
|
| 489 |
+
facecolor=color,
|
| 490 |
+
edgecolor='black',
|
| 491 |
+
linewidth=0.3,
|
| 492 |
+
alpha=0.8
|
| 493 |
+
)
|
| 494 |
+
ax.add_patch(rect)
|
| 495 |
+
|
| 496 |
+
ax.set_xlim(0, max_time + 0.5)
|
| 497 |
+
ax.set_ylim(min_pitch - 2, max_pitch + 2)
|
| 498 |
+
|
| 499 |
+
ax.set_xlabel('Time (seconds)')
|
| 500 |
+
ax.set_ylabel('MIDI Pitch')
|
| 501 |
+
ax.set_title(title)
|
| 502 |
+
ax.grid(True, alpha=0.3)
|
| 503 |
+
|
| 504 |
+
# Save to PNG
|
| 505 |
+
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
|
| 506 |
+
img_path = f.name
|
| 507 |
+
fig.savefig(img_path, dpi=100, bbox_inches='tight')
|
| 508 |
+
plt.close(fig)
|
| 509 |
+
|
| 510 |
+
return img_path
|
| 511 |
+
|
| 512 |
+
|
| 513 |
+
def get_outputs_from_string(generated_sequence: str, qpm: int = 120):
|
| 514 |
+
"""Convert a generated sequence into various output formats."""
|
| 515 |
+
instruments = get_instruments(generated_sequence)
|
| 516 |
+
instruments_str = "\n".join(f"- {instrument}" for instrument in instruments)
|
| 517 |
+
note_sequence = token_sequence_to_note_sequence(generated_sequence, qpm=qpm)
|
| 518 |
+
|
| 519 |
+
# Create visualization using custom matplotlib function
|
| 520 |
+
img_path = create_noteseq_piano_roll(note_sequence, f"Generated at {qpm} BPM")
|
| 521 |
+
|
| 522 |
+
num_tokens = str(len(generated_sequence.split()))
|
| 523 |
+
|
| 524 |
+
# Save MIDI file
|
| 525 |
+
with tempfile.NamedTemporaryFile(suffix='.mid', delete=False) as f:
|
| 526 |
+
midi_path = f.name
|
| 527 |
+
note_seq.note_sequence_to_midi_file(note_sequence, midi_path)
|
| 528 |
+
|
| 529 |
+
# Render audio
|
| 530 |
+
audio_path = render_midi_to_audio(midi_path)
|
| 531 |
+
|
| 532 |
+
return midi_path, img_path, instruments_str, num_tokens, audio_path
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
def generate_song(genre: str = "OTHER", temp: float = 0.75, text_sequence: str = "", qpm: int = 120):
|
| 536 |
+
"""Generate a song given a genre, temperature, initial text sequence, and tempo."""
|
| 537 |
+
if not TORCH_AVAILABLE:
|
| 538 |
+
return None, None, "PyTorch not available", "", "0", None
|
| 539 |
+
|
| 540 |
+
if text_sequence == "":
|
| 541 |
+
seed_string = create_seed_string(genre)
|
| 542 |
+
else:
|
| 543 |
+
seed_string = text_sequence
|
| 544 |
+
|
| 545 |
+
generated_sequence = generate_new_instrument(seed=seed_string, temp=temp)
|
| 546 |
+
midi_file, fig, instruments_str, num_tokens, audio_path = get_outputs_from_string(
|
| 547 |
+
generated_sequence, qpm
|
| 548 |
+
)
|
| 549 |
+
return midi_file, fig, instruments_str, generated_sequence, num_tokens, audio_path
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
def remove_last_instrument(text_sequence: str, qpm: int = 120):
|
| 553 |
+
"""Remove the last instrument from a song string."""
|
| 554 |
+
if not TORCH_AVAILABLE:
|
| 555 |
+
return None, None, "PyTorch not available", "", "0", None
|
| 556 |
+
|
| 557 |
+
tracks = text_sequence.split("TRACK_START")
|
| 558 |
+
modified_tracks = tracks[:-1]
|
| 559 |
+
new_song = "TRACK_START".join(modified_tracks)
|
| 560 |
+
|
| 561 |
+
if len(tracks) == 2:
|
| 562 |
+
midi_file, fig, instruments_str, new_song, num_tokens, audio_path = generate_song(
|
| 563 |
+
text_sequence=new_song
|
| 564 |
+
)
|
| 565 |
+
elif len(tracks) == 1:
|
| 566 |
+
midi_file, fig, instruments_str, new_song, num_tokens, audio_path = generate_song(
|
| 567 |
+
text_sequence=""
|
| 568 |
+
)
|
| 569 |
+
else:
|
| 570 |
+
midi_file, fig, instruments_str, num_tokens, audio_path = get_outputs_from_string(
|
| 571 |
+
new_song, qpm
|
| 572 |
+
)
|
| 573 |
+
|
| 574 |
+
return midi_file, fig, instruments_str, new_song, num_tokens, audio_path
|
| 575 |
+
|
| 576 |
+
|
| 577 |
+
def regenerate_last_instrument(text_sequence: str, qpm: int = 120):
|
| 578 |
+
"""Regenerate the last instrument in a song string."""
|
| 579 |
+
if not TORCH_AVAILABLE:
|
| 580 |
+
return None, None, "PyTorch not available", "", "0", None
|
| 581 |
+
|
| 582 |
+
last_inst_index = text_sequence.rfind("INST=")
|
| 583 |
+
if last_inst_index == -1:
|
| 584 |
+
midi_file, fig, instruments_str, new_song, num_tokens, audio_path = generate_song(
|
| 585 |
+
text_sequence="", qpm=qpm
|
| 586 |
+
)
|
| 587 |
+
else:
|
| 588 |
+
next_space_index = text_sequence.find(" ", last_inst_index)
|
| 589 |
+
if next_space_index == -1:
|
| 590 |
+
# No space after INST=, use the whole remaining string
|
| 591 |
+
new_seed = text_sequence
|
| 592 |
+
else:
|
| 593 |
+
new_seed = text_sequence[:next_space_index]
|
| 594 |
+
midi_file, fig, instruments_str, new_song, num_tokens, audio_path = generate_song(
|
| 595 |
+
text_sequence=new_seed, qpm=qpm
|
| 596 |
+
)
|
| 597 |
+
return midi_file, fig, instruments_str, new_song, num_tokens, audio_path
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
def change_tempo(text_sequence: str, qpm: int):
|
| 601 |
+
"""Change the tempo of a song string."""
|
| 602 |
+
if not TORCH_AVAILABLE:
|
| 603 |
+
return None, None, "PyTorch not available", "", "0", None
|
| 604 |
+
|
| 605 |
+
if not text_sequence or text_sequence.strip() == "":
|
| 606 |
+
return None, None, "No sequence to process", "", "0", None
|
| 607 |
+
|
| 608 |
+
midi_file, fig, instruments_str, num_tokens, audio_path = get_outputs_from_string(
|
| 609 |
+
text_sequence, qpm=qpm
|
| 610 |
+
)
|
| 611 |
+
return midi_file, fig, instruments_str, text_sequence, num_tokens, audio_path
|
| 612 |
+
|
| 613 |
+
|
| 614 |
+
# =============================================================================
|
| 615 |
+
# SkyTNT Model Integration
|
| 616 |
+
# =============================================================================
|
| 617 |
+
|
| 618 |
+
try:
|
| 619 |
+
from skytnt_generator import generate_midi as skytnt_generate, get_available_instruments, get_available_drum_kits, ONNX_AVAILABLE
|
| 620 |
+
SKYTNT_AVAILABLE = ONNX_AVAILABLE
|
| 621 |
+
except ImportError:
|
| 622 |
+
SKYTNT_AVAILABLE = False
|
| 623 |
+
print("SkyTNT generator not available")
|
| 624 |
+
|
| 625 |
+
|
| 626 |
+
def generate_skytnt(instruments, drum_kit, bpm, max_events, temp, top_p, top_k, seed_rand, seed):
|
| 627 |
+
"""Generate music using SkyTNT model."""
|
| 628 |
+
if not SKYTNT_AVAILABLE:
|
| 629 |
+
return None, None, "SkyTNT model not available", None
|
| 630 |
+
|
| 631 |
+
# Parse instruments
|
| 632 |
+
instr_list = instruments if instruments else []
|
| 633 |
+
actual_seed = None if seed_rand else int(seed)
|
| 634 |
+
|
| 635 |
+
try:
|
| 636 |
+
midi_path = skytnt_generate(
|
| 637 |
+
instruments=instr_list,
|
| 638 |
+
drum_kit=drum_kit,
|
| 639 |
+
bpm=int(bpm),
|
| 640 |
+
max_events=int(max_events),
|
| 641 |
+
temp=temp,
|
| 642 |
+
top_p=top_p,
|
| 643 |
+
top_k=int(top_k),
|
| 644 |
+
seed=actual_seed
|
| 645 |
+
)
|
| 646 |
+
|
| 647 |
+
if midi_path is None:
|
| 648 |
+
return None, None, "Generation failed", None
|
| 649 |
+
|
| 650 |
+
# Create visualization
|
| 651 |
+
img_path = create_skytnt_piano_roll(midi_path)
|
| 652 |
+
|
| 653 |
+
# Render audio
|
| 654 |
+
audio_path = render_midi_to_audio(midi_path)
|
| 655 |
+
|
| 656 |
+
status = f"Generated with {len(instr_list)} instruments at {bpm} BPM"
|
| 657 |
+
if audio_path:
|
| 658 |
+
status += " - Audio rendered!"
|
| 659 |
+
|
| 660 |
+
return midi_path, img_path, status, audio_path
|
| 661 |
+
|
| 662 |
+
except Exception as e:
|
| 663 |
+
return None, None, f"Error: {str(e)}", None
|
| 664 |
+
|
| 665 |
+
|
| 666 |
+
def create_skytnt_piano_roll(midi_path: str):
|
| 667 |
+
"""Create piano roll visualization from MIDI file."""
|
| 668 |
+
import matplotlib
|
| 669 |
+
matplotlib.use('Agg')
|
| 670 |
+
import matplotlib.pyplot as plt
|
| 671 |
+
from matplotlib.patches import Rectangle
|
| 672 |
+
|
| 673 |
+
try:
|
| 674 |
+
import MIDI
|
| 675 |
+
midi_data = MIDI.midi2score(open(midi_path, 'rb').read())
|
| 676 |
+
|
| 677 |
+
fig, ax = plt.subplots(figsize=(14, 6))
|
| 678 |
+
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD', '#98D8C8', '#F39C12', '#9B59B6', '#1ABC9C']
|
| 679 |
+
|
| 680 |
+
ticks_per_beat = midi_data[0]
|
| 681 |
+
all_notes = []
|
| 682 |
+
|
| 683 |
+
for track_idx, track in enumerate(midi_data[1:]):
|
| 684 |
+
for event in track:
|
| 685 |
+
if event[0] == 'note':
|
| 686 |
+
start_time = event[1] / ticks_per_beat
|
| 687 |
+
duration = event[2] / ticks_per_beat
|
| 688 |
+
channel = event[3]
|
| 689 |
+
pitch = event[4]
|
| 690 |
+
all_notes.append((start_time, duration, pitch, channel))
|
| 691 |
+
|
| 692 |
+
if not all_notes:
|
| 693 |
+
ax.text(0.5, 0.5, 'No notes generated', ha='center', va='center', fontsize=14)
|
| 694 |
+
ax.set_xlim(0, 10)
|
| 695 |
+
ax.set_ylim(0, 127)
|
| 696 |
+
else:
|
| 697 |
+
min_pitch = min(n[2] for n in all_notes)
|
| 698 |
+
max_pitch = max(n[2] for n in all_notes)
|
| 699 |
+
max_time = max(n[0] + n[1] for n in all_notes)
|
| 700 |
+
|
| 701 |
+
for start, dur, pitch, channel in all_notes:
|
| 702 |
+
color = colors[channel % len(colors)]
|
| 703 |
+
rect = Rectangle((start, pitch - 0.4), dur, 0.8,
|
| 704 |
+
facecolor=color, edgecolor='black', linewidth=0.3, alpha=0.8)
|
| 705 |
+
ax.add_patch(rect)
|
| 706 |
+
|
| 707 |
+
ax.set_xlim(0, max_time + 0.5)
|
| 708 |
+
ax.set_ylim(min_pitch - 2, max_pitch + 2)
|
| 709 |
+
|
| 710 |
+
ax.set_xlabel('Time (beats)')
|
| 711 |
+
ax.set_ylabel('MIDI Pitch')
|
| 712 |
+
ax.set_title('Generated Music (SkyTNT)')
|
| 713 |
+
ax.grid(True, alpha=0.3)
|
| 714 |
+
|
| 715 |
+
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
|
| 716 |
+
img_path = f.name
|
| 717 |
+
fig.savefig(img_path, dpi=100, bbox_inches='tight')
|
| 718 |
+
plt.close(fig)
|
| 719 |
+
|
| 720 |
+
return img_path
|
| 721 |
+
except Exception as e:
|
| 722 |
+
print(f"Visualization error: {e}")
|
| 723 |
+
return None
|
| 724 |
+
|
| 725 |
+
|
| 726 |
+
# =============================================================================
|
| 727 |
+
# Gradio Interface
|
| 728 |
+
# =============================================================================
|
| 729 |
+
|
| 730 |
+
GENRES = ["ROCK", "POP", "OTHER", "R&B/SOUL", "JAZZ", "ELECTRONIC", "RANDOM"]
|
| 731 |
+
|
| 732 |
+
|
| 733 |
+
def build_ui():
|
| 734 |
+
"""Build Gradio interface for AI Multitrack MIDI Composer."""
|
| 735 |
+
with gr.Blocks(title="Multitrack MIDI Composer") as demo:
|
| 736 |
+
gr.Markdown("""
|
| 737 |
+
# 🎹 Multitrack MIDI Composer
|
| 738 |
+
|
| 739 |
+
AI-powered multi-instrument music generation. Choose a model and generate!
|
| 740 |
+
""")
|
| 741 |
+
|
| 742 |
+
with gr.Tabs():
|
| 743 |
+
# Tab 1: Multitrack Generator (juancopi81) - Default
|
| 744 |
+
with gr.TabItem("Multitrack Generator (Genre-based)"):
|
| 745 |
+
if not TORCH_AVAILABLE:
|
| 746 |
+
gr.Markdown("⚠️ PyTorch/Transformers not installed.")
|
| 747 |
+
else:
|
| 748 |
+
with gr.Row():
|
| 749 |
+
with gr.Column():
|
| 750 |
+
mt_temp = gr.Slider(0, 1, step=0.05, value=0.85, label="Temperature")
|
| 751 |
+
mt_genre = gr.Dropdown(choices=GENRES, value="POP", label="Select Genre")
|
| 752 |
+
|
| 753 |
+
with gr.Row():
|
| 754 |
+
btn_from_scratch = gr.Button("Start from scratch", variant="primary")
|
| 755 |
+
btn_continue = gr.Button("Continue Generation")
|
| 756 |
+
with gr.Row():
|
| 757 |
+
btn_remove_last = gr.Button("Remove last instrument")
|
| 758 |
+
btn_regenerate_last = gr.Button("Regenerate last instrument")
|
| 759 |
+
|
| 760 |
+
with gr.Column():
|
| 761 |
+
mt_audio = gr.Audio(label="Listen")
|
| 762 |
+
with gr.Group():
|
| 763 |
+
mt_midi = gr.File(label="Download MIDI File")
|
| 764 |
+
with gr.Row():
|
| 765 |
+
mt_qpm = gr.Slider(60, 140, step=10, value=120, label="Tempo")
|
| 766 |
+
btn_qpm = gr.Button("Change Tempo")
|
| 767 |
+
|
| 768 |
+
with gr.Row():
|
| 769 |
+
with gr.Column():
|
| 770 |
+
mt_img = gr.Image(label="Music Visualization")
|
| 771 |
+
with gr.Column():
|
| 772 |
+
mt_instruments = gr.Markdown("### Instruments")
|
| 773 |
+
|
| 774 |
+
mt_sequence = gr.Textbox(label="Token Sequence", lines=3)
|
| 775 |
+
mt_empty = gr.Textbox(visible=False, value="")
|
| 776 |
+
mt_tokens = gr.Textbox(visible=False)
|
| 777 |
+
|
| 778 |
+
btn_from_scratch.click(
|
| 779 |
+
fn=generate_song,
|
| 780 |
+
inputs=[mt_genre, mt_temp, mt_empty, mt_qpm],
|
| 781 |
+
outputs=[mt_midi, mt_img, mt_instruments, mt_sequence, mt_tokens, mt_audio]
|
| 782 |
+
)
|
| 783 |
+
btn_continue.click(
|
| 784 |
+
fn=generate_song,
|
| 785 |
+
inputs=[mt_genre, mt_temp, mt_sequence, mt_qpm],
|
| 786 |
+
outputs=[mt_midi, mt_img, mt_instruments, mt_sequence, mt_tokens, mt_audio]
|
| 787 |
+
)
|
| 788 |
+
btn_remove_last.click(
|
| 789 |
+
fn=remove_last_instrument,
|
| 790 |
+
inputs=[mt_sequence, mt_qpm],
|
| 791 |
+
outputs=[mt_midi, mt_img, mt_instruments, mt_sequence, mt_tokens, mt_audio]
|
| 792 |
+
)
|
| 793 |
+
btn_regenerate_last.click(
|
| 794 |
+
fn=regenerate_last_instrument,
|
| 795 |
+
inputs=[mt_sequence, mt_qpm],
|
| 796 |
+
outputs=[mt_midi, mt_img, mt_instruments, mt_sequence, mt_tokens, mt_audio]
|
| 797 |
+
)
|
| 798 |
+
btn_qpm.click(
|
| 799 |
+
fn=change_tempo,
|
| 800 |
+
inputs=[mt_sequence, mt_qpm],
|
| 801 |
+
outputs=[mt_midi, mt_img, mt_instruments, mt_sequence, mt_tokens, mt_audio]
|
| 802 |
+
)
|
| 803 |
+
|
| 804 |
+
gr.Markdown("**Model**: [juancopi81/lmd-8bars-2048-epochs40_v4](https://huggingface.co/juancopi81/lmd-8bars-2048-epochs40_v4)")
|
| 805 |
+
|
| 806 |
+
# Tab 2: SkyTNT MIDI Model
|
| 807 |
+
with gr.TabItem("SkyTNT MIDI Model"):
|
| 808 |
+
if not SKYTNT_AVAILABLE:
|
| 809 |
+
gr.Markdown("⚠️ SkyTNT model not available (onnxruntime required).")
|
| 810 |
+
else:
|
| 811 |
+
gr.Markdown("Select instruments and generate MIDI events. Processing: ~20 seconds for 200 events.")
|
| 812 |
+
|
| 813 |
+
with gr.Row():
|
| 814 |
+
with gr.Column():
|
| 815 |
+
sky_instruments = gr.Dropdown(
|
| 816 |
+
label="Instruments (optional, auto if empty)",
|
| 817 |
+
choices=get_available_instruments(),
|
| 818 |
+
multiselect=True,
|
| 819 |
+
max_choices=10
|
| 820 |
+
)
|
| 821 |
+
sky_drum_kit = gr.Dropdown(
|
| 822 |
+
label="Drum Kit",
|
| 823 |
+
choices=get_available_drum_kits(),
|
| 824 |
+
value="None"
|
| 825 |
+
)
|
| 826 |
+
sky_bpm = gr.Slider(60, 200, step=5, value=120, label="BPM")
|
| 827 |
+
sky_max_events = gr.Slider(50, 500, step=50, value=200, label="Max Events")
|
| 828 |
+
|
| 829 |
+
with gr.Column():
|
| 830 |
+
with gr.Accordion("Advanced Options", open=False):
|
| 831 |
+
sky_temp = gr.Slider(0.1, 1.5, step=0.05, value=1.0, label="Temperature")
|
| 832 |
+
sky_top_p = gr.Slider(0.5, 1.0, step=0.05, value=0.95, label="Top-p")
|
| 833 |
+
sky_top_k = gr.Slider(1, 50, step=1, value=20, label="Top-k")
|
| 834 |
+
sky_seed_rand = gr.Checkbox(label="Random Seed", value=True)
|
| 835 |
+
sky_seed = gr.Number(label="Seed", value=42)
|
| 836 |
+
|
| 837 |
+
btn_sky_generate = gr.Button("Generate", variant="primary")
|
| 838 |
+
|
| 839 |
+
with gr.Row():
|
| 840 |
+
sky_audio = gr.Audio(label="Listen")
|
| 841 |
+
sky_midi = gr.File(label="Download MIDI")
|
| 842 |
+
|
| 843 |
+
sky_img = gr.Image(label="Visualization")
|
| 844 |
+
sky_status = gr.Textbox(label="Status", interactive=False)
|
| 845 |
+
|
| 846 |
+
btn_sky_generate.click(
|
| 847 |
+
fn=generate_skytnt,
|
| 848 |
+
inputs=[sky_instruments, sky_drum_kit, sky_bpm, sky_max_events,
|
| 849 |
+
sky_temp, sky_top_p, sky_top_k, sky_seed_rand, sky_seed],
|
| 850 |
+
outputs=[sky_midi, sky_img, sky_status, sky_audio]
|
| 851 |
+
)
|
| 852 |
+
|
| 853 |
+
gr.Markdown("**Model**: [skytnt/midi-model](https://huggingface.co/skytnt/midi-model) (ONNX)")
|
| 854 |
+
|
| 855 |
+
gr.Markdown("""
|
| 856 |
+
---
|
| 857 |
+
**Credits**: Dr. Tristan Behrens (Multitrack) | SkyTNT (MIDI Model)
|
| 858 |
+
""")
|
| 859 |
+
|
| 860 |
+
return demo
|
| 861 |
+
|
| 862 |
+
|
| 863 |
+
# =============================================================================
|
| 864 |
+
# CLI Interface
|
| 865 |
+
# =============================================================================
|
| 866 |
+
|
| 867 |
+
def cli_main():
|
| 868 |
+
"""CLI entry point."""
|
| 869 |
+
parser = argparse.ArgumentParser(description="Multitrack MIDI Composer")
|
| 870 |
+
subparsers = parser.add_subparsers(dest="command", help="Commands")
|
| 871 |
+
|
| 872 |
+
# Demo command
|
| 873 |
+
demo_parser = subparsers.add_parser("demo", help="Generate demo MIDI")
|
| 874 |
+
demo_parser.add_argument("--tempo", type=int, default=120, help="Tempo in BPM")
|
| 875 |
+
demo_parser.add_argument("--bars", type=int, default=4, help="Number of bars")
|
| 876 |
+
demo_parser.add_argument("--output", "-o", type=str, default="demo.mid", help="Output file")
|
| 877 |
+
|
| 878 |
+
# Generate command
|
| 879 |
+
gen_parser = subparsers.add_parser("generate", help="Generate multitrack music")
|
| 880 |
+
gen_parser.add_argument("--genre", type=str, default="POP", choices=GENRES)
|
| 881 |
+
gen_parser.add_argument("--tempo", type=int, default=120, help="Tempo in BPM")
|
| 882 |
+
gen_parser.add_argument("--temperature", type=float, default=0.85, help="Sampling temperature")
|
| 883 |
+
gen_parser.add_argument("--output", "-o", type=str, default="output.mid", help="Output file")
|
| 884 |
+
|
| 885 |
+
args = parser.parse_args()
|
| 886 |
+
|
| 887 |
+
if args.command == "demo":
|
| 888 |
+
midi_path, fig, audio_path, status = create_demo_midi(args.tempo, args.bars)
|
| 889 |
+
if midi_path:
|
| 890 |
+
import shutil
|
| 891 |
+
shutil.copy(midi_path, args.output)
|
| 892 |
+
print(f"Created: {args.output}")
|
| 893 |
+
if audio_path:
|
| 894 |
+
audio_out = args.output.replace('.mid', '.wav')
|
| 895 |
+
shutil.copy(audio_path, audio_out)
|
| 896 |
+
print(f"Audio: {audio_out}")
|
| 897 |
+
print(status)
|
| 898 |
+
else:
|
| 899 |
+
print(f"Error: {status}")
|
| 900 |
+
|
| 901 |
+
elif args.command == "generate":
|
| 902 |
+
if not TORCH_AVAILABLE:
|
| 903 |
+
print("Error: PyTorch not available. Install: pip install torch transformers note-seq")
|
| 904 |
+
return
|
| 905 |
+
print(f"Generating {args.genre} music at {args.tempo} BPM...")
|
| 906 |
+
midi_path, fig, instruments, sequence, tokens, audio_path = generate_song(
|
| 907 |
+
genre=args.genre, temp=args.temperature, qpm=args.tempo
|
| 908 |
+
)
|
| 909 |
+
if midi_path:
|
| 910 |
+
import shutil
|
| 911 |
+
shutil.copy(midi_path, args.output)
|
| 912 |
+
print(f"Created: {args.output}")
|
| 913 |
+
if audio_path:
|
| 914 |
+
audio_out = args.output.replace('.mid', '.wav')
|
| 915 |
+
shutil.copy(audio_path, audio_out)
|
| 916 |
+
print(f"Audio: {audio_out}")
|
| 917 |
+
print(f"Instruments:\n{instruments}")
|
| 918 |
+
print(f"Tokens: {tokens}")
|
| 919 |
+
|
| 920 |
+
else:
|
| 921 |
+
parser.print_help()
|
| 922 |
+
|
| 923 |
+
|
| 924 |
+
if __name__ == "__main__":
|
| 925 |
+
if len(sys.argv) > 1 and sys.argv[1] in ["demo", "generate", "-h", "--help"]:
|
| 926 |
+
cli_main()
|
| 927 |
+
else:
|
| 928 |
+
# Preload model if available
|
| 929 |
+
if TORCH_AVAILABLE:
|
| 930 |
+
print("Initializing model...")
|
| 931 |
+
get_model_and_tokenizer()
|
| 932 |
+
|
| 933 |
+
demo = build_ui()
|
| 934 |
+
demo.launch()
|
gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.sf2 filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.mid
|
meltysynth.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model_base_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:259c89650e8d87cf70682af8d4aaa9827061d2a42fcf52cd6f0678bd6406c541
|
| 3 |
+
size 206967431
|
model_token_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b7a0486ac531d193d51c5535857bea07a77306648e8e11942d8435c4756be83
|
| 3 |
+
size 29298660
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 2 |
+
gradio==6.3.0
|
| 3 |
+
torch
|
| 4 |
+
transformers
|
| 5 |
+
note-seq
|
| 6 |
+
matplotlib
|
| 7 |
+
midiutil>=1.2.1
|
| 8 |
+
numpy>=1.24.0
|
| 9 |
+
huggingface_hub>=0.19.0
|
| 10 |
+
onnxruntime
|
| 11 |
+
optimum[onnxruntime]
|
| 12 |
+
tqdm
|
| 13 |
+
Pillow
|
skytnt_generator.py
ADDED
|
@@ -0,0 +1,433 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SkyTNT MIDI Model Generator - CPU ONNX version
|
| 3 |
+
Based on: https://huggingface.co/spaces/skytnt/midi-composer
|
| 4 |
+
Optimized with: ONNX Runtime + Optimum + OMP_NUM_THREADS=2
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import json
|
| 9 |
+
import tempfile
|
| 10 |
+
from typing import Optional, Tuple, List
|
| 11 |
+
|
| 12 |
+
# Set OpenMP threads before importing numpy/onnxruntime
|
| 13 |
+
os.environ["OMP_NUM_THREADS"] = "2"
|
| 14 |
+
os.environ["OMP_WAIT_POLICY"] = "ACTIVE"
|
| 15 |
+
|
| 16 |
+
import numpy as np
|
| 17 |
+
import tqdm
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
import onnxruntime as ort
|
| 21 |
+
ONNX_AVAILABLE = True
|
| 22 |
+
except ImportError:
|
| 23 |
+
ONNX_AVAILABLE = False
|
| 24 |
+
print("onnxruntime not available - SkyTNT model disabled")
|
| 25 |
+
|
| 26 |
+
from huggingface_hub import hf_hub_download
|
| 27 |
+
|
| 28 |
+
# Import tokenizer and MIDI library
|
| 29 |
+
from skytnt_tokenizer import MIDITokenizer
|
| 30 |
+
import MIDI
|
| 31 |
+
|
| 32 |
+
# Globals for cached model
|
| 33 |
+
_skytnt_model = None
|
| 34 |
+
_skytnt_tokenizer = None
|
| 35 |
+
|
| 36 |
+
# Path to local INT8 quantized models
|
| 37 |
+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 38 |
+
INT8_MODEL_BASE = os.path.join(SCRIPT_DIR, "model_base_int8.onnx")
|
| 39 |
+
INT8_MODEL_TOKEN = os.path.join(SCRIPT_DIR, "model_token_int8.onnx")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def softmax(x, axis):
|
| 43 |
+
"""Numpy softmax implementation."""
|
| 44 |
+
x_max = np.amax(x, axis=axis, keepdims=True)
|
| 45 |
+
exp_x_shifted = np.exp(x - x_max)
|
| 46 |
+
return exp_x_shifted / np.sum(exp_x_shifted, axis=axis, keepdims=True)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def sample_top_p_k(probs, p, k, generator=None):
|
| 50 |
+
"""Top-p and top-k sampling."""
|
| 51 |
+
if generator is None:
|
| 52 |
+
generator = np.random
|
| 53 |
+
probs_idx = np.argsort(-probs, axis=-1)
|
| 54 |
+
probs_sort = np.take_along_axis(probs, probs_idx, -1)
|
| 55 |
+
probs_sum = np.cumsum(probs_sort, axis=-1)
|
| 56 |
+
mask = probs_sum - probs_sort > p
|
| 57 |
+
probs_sort[mask] = 0.0
|
| 58 |
+
mask = np.zeros(probs_sort.shape[-1])
|
| 59 |
+
mask[:k] = 1
|
| 60 |
+
probs_sort = probs_sort * mask
|
| 61 |
+
probs_sort /= np.sum(probs_sort, axis=-1, keepdims=True)
|
| 62 |
+
shape = probs_sort.shape
|
| 63 |
+
probs_sort_flat = probs_sort.reshape(-1, shape[-1])
|
| 64 |
+
probs_idx_flat = probs_idx.reshape(-1, shape[-1])
|
| 65 |
+
next_token = np.stack([generator.choice(idxs, p=pvals) for pvals, idxs in zip(probs_sort_flat, probs_idx_flat)])
|
| 66 |
+
next_token = next_token.reshape(*shape[:-1])
|
| 67 |
+
return next_token
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def load_skytnt_model():
|
| 71 |
+
"""Load SkyTNT ONNX models and tokenizer."""
|
| 72 |
+
global _skytnt_model, _skytnt_tokenizer
|
| 73 |
+
|
| 74 |
+
if not ONNX_AVAILABLE:
|
| 75 |
+
return None, None, None
|
| 76 |
+
|
| 77 |
+
if _skytnt_model is not None:
|
| 78 |
+
return _skytnt_model[0], _skytnt_model[1], _skytnt_tokenizer
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
print("Loading SkyTNT MIDI model (INT8 quantized)...")
|
| 82 |
+
repo_id = "skytnt/midi-model"
|
| 83 |
+
|
| 84 |
+
# Check for local INT8 models first
|
| 85 |
+
if os.path.exists(INT8_MODEL_BASE) and os.path.exists(INT8_MODEL_TOKEN):
|
| 86 |
+
print(f"Using local INT8 models...")
|
| 87 |
+
model_base_path = INT8_MODEL_BASE
|
| 88 |
+
model_token_path = INT8_MODEL_TOKEN
|
| 89 |
+
else:
|
| 90 |
+
# Fallback to downloading FP32 models
|
| 91 |
+
print("INT8 models not found, downloading FP32 models...")
|
| 92 |
+
print("Downloading model_base.onnx (~821MB)...")
|
| 93 |
+
model_base_path = hf_hub_download(repo_id=repo_id, filename="onnx/model_base.onnx")
|
| 94 |
+
print("Downloading model_token.onnx (~115MB)...")
|
| 95 |
+
model_token_path = hf_hub_download(repo_id=repo_id, filename="onnx/model_token.onnx")
|
| 96 |
+
|
| 97 |
+
# Download config for tokenizer
|
| 98 |
+
print("Downloading tokenizer config...")
|
| 99 |
+
config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
|
| 100 |
+
with open(config_path, "r") as f:
|
| 101 |
+
config = json.load(f)
|
| 102 |
+
|
| 103 |
+
# Initialize tokenizer
|
| 104 |
+
tokenizer = MIDITokenizer(config["tokenizer"]["version"])
|
| 105 |
+
tokenizer.set_optimise_midi(config["tokenizer"]["optimise_midi"])
|
| 106 |
+
|
| 107 |
+
# Load ONNX models with CPU provider and optimizations
|
| 108 |
+
sess_options = ort.SessionOptions()
|
| 109 |
+
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
| 110 |
+
sess_options.intra_op_num_threads = 2
|
| 111 |
+
sess_options.inter_op_num_threads = 2
|
| 112 |
+
sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
| 113 |
+
|
| 114 |
+
# Enable memory optimizations
|
| 115 |
+
sess_options.enable_cpu_mem_arena = True
|
| 116 |
+
sess_options.enable_mem_pattern = True
|
| 117 |
+
sess_options.enable_mem_reuse = True
|
| 118 |
+
|
| 119 |
+
providers = ['CPUExecutionProvider']
|
| 120 |
+
print(f"Loading ONNX models on CPU (OMP_NUM_THREADS={os.environ.get('OMP_NUM_THREADS', 'default')})...")
|
| 121 |
+
model_base = ort.InferenceSession(model_base_path, sess_options, providers=providers)
|
| 122 |
+
model_token = ort.InferenceSession(model_token_path, sess_options, providers=providers)
|
| 123 |
+
|
| 124 |
+
_skytnt_model = (model_base, model_token)
|
| 125 |
+
_skytnt_tokenizer = tokenizer
|
| 126 |
+
|
| 127 |
+
print("SkyTNT model loaded successfully!")
|
| 128 |
+
return model_base, model_token, tokenizer
|
| 129 |
+
|
| 130 |
+
except Exception as e:
|
| 131 |
+
print(f"Error loading SkyTNT model: {e}")
|
| 132 |
+
import traceback
|
| 133 |
+
traceback.print_exc()
|
| 134 |
+
return None, None, None
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def generate(model_base, model_token, tokenizer, prompt=None, max_len=512,
|
| 138 |
+
temp=1.0, top_p=0.98, top_k=20,
|
| 139 |
+
disable_patch_change=False, disable_channels=None, generator=None):
|
| 140 |
+
"""
|
| 141 |
+
Generate MIDI tokens using the two-stage ONNX model.
|
| 142 |
+
Based on app_onnx.py generate() function.
|
| 143 |
+
"""
|
| 144 |
+
if disable_channels is None:
|
| 145 |
+
disable_channels = []
|
| 146 |
+
else:
|
| 147 |
+
disable_channels = [tokenizer.parameter_ids["channel"][c] for c in disable_channels]
|
| 148 |
+
|
| 149 |
+
if generator is None:
|
| 150 |
+
generator = np.random
|
| 151 |
+
|
| 152 |
+
max_token_seq = tokenizer.max_token_seq
|
| 153 |
+
batch_size = 1
|
| 154 |
+
|
| 155 |
+
# Initialize input
|
| 156 |
+
if prompt is None:
|
| 157 |
+
input_tensor = np.full((1, max_token_seq), tokenizer.pad_id, dtype=np.int64)
|
| 158 |
+
input_tensor[0, 0] = tokenizer.bos_id
|
| 159 |
+
input_tensor = input_tensor[None, :, :] # [1, 1, max_token_seq]
|
| 160 |
+
else:
|
| 161 |
+
if len(prompt.shape) == 2:
|
| 162 |
+
prompt = prompt[None, :]
|
| 163 |
+
prompt = prompt[..., :max_token_seq]
|
| 164 |
+
if prompt.shape[-1] < max_token_seq:
|
| 165 |
+
prompt = np.pad(prompt, ((0, 0), (0, 0), (0, max_token_seq - prompt.shape[-1])),
|
| 166 |
+
mode="constant", constant_values=tokenizer.pad_id)
|
| 167 |
+
input_tensor = prompt
|
| 168 |
+
|
| 169 |
+
cur_len = input_tensor.shape[1]
|
| 170 |
+
|
| 171 |
+
# Get model info
|
| 172 |
+
# model_base: 12 layers, [batch, 16, seq, 64]
|
| 173 |
+
# model_token: 3 layers, [batch, 4, seq, 256]
|
| 174 |
+
emb_size = 1024
|
| 175 |
+
|
| 176 |
+
# KV cache for model_base (12 layers)
|
| 177 |
+
model0_cache = {}
|
| 178 |
+
for i in range(12):
|
| 179 |
+
model0_cache[f"past_key_values.{i}.key"] = np.zeros((batch_size, 16, 0, 64), dtype=np.float32)
|
| 180 |
+
model0_cache[f"past_key_values.{i}.value"] = np.zeros((batch_size, 16, 0, 64), dtype=np.float32)
|
| 181 |
+
|
| 182 |
+
past_len = 0
|
| 183 |
+
bar = tqdm.tqdm(desc="generating", total=max_len - cur_len)
|
| 184 |
+
|
| 185 |
+
with bar:
|
| 186 |
+
while cur_len < max_len:
|
| 187 |
+
end = False
|
| 188 |
+
|
| 189 |
+
# Prepare model_base inputs
|
| 190 |
+
model0_inputs = {"x": input_tensor[:, past_len:]}
|
| 191 |
+
|
| 192 |
+
# Add KV cache
|
| 193 |
+
for name, cache in model0_cache.items():
|
| 194 |
+
model0_inputs[name] = cache
|
| 195 |
+
|
| 196 |
+
# Run model_base
|
| 197 |
+
output_names = [o.name for o in model_base.get_outputs()]
|
| 198 |
+
outputs0 = model_base.run(None, model0_inputs)
|
| 199 |
+
|
| 200 |
+
# Parse outputs
|
| 201 |
+
hidden = None
|
| 202 |
+
for i, name in enumerate(output_names):
|
| 203 |
+
if name == "hidden":
|
| 204 |
+
hidden = outputs0[i][:, -1:] # Take last hidden state
|
| 205 |
+
elif name.startswith("present."):
|
| 206 |
+
# Update cache: present -> past_key_values
|
| 207 |
+
past_name = name.replace("present.", "past_key_values.")
|
| 208 |
+
model0_cache[past_name] = outputs0[i]
|
| 209 |
+
|
| 210 |
+
if hidden is None:
|
| 211 |
+
print("Error: No hidden output")
|
| 212 |
+
break
|
| 213 |
+
|
| 214 |
+
# Generate token sequence using model_token
|
| 215 |
+
next_token_seq = np.zeros((batch_size, 0), dtype=np.int64)
|
| 216 |
+
event_name = ""
|
| 217 |
+
|
| 218 |
+
# KV cache for model_token (3 layers)
|
| 219 |
+
model1_cache = {}
|
| 220 |
+
for i in range(3):
|
| 221 |
+
model1_cache[f"past_key_values.{i}.key"] = np.zeros((batch_size, 4, 0, 256), dtype=np.float32)
|
| 222 |
+
model1_cache[f"past_key_values.{i}.value"] = np.zeros((batch_size, 4, 0, 256), dtype=np.float32)
|
| 223 |
+
|
| 224 |
+
for i in range(max_token_seq):
|
| 225 |
+
# Build mask
|
| 226 |
+
mask = np.zeros((batch_size, tokenizer.vocab_size), dtype=np.int64)
|
| 227 |
+
if end:
|
| 228 |
+
mask[0, tokenizer.pad_id] = 1
|
| 229 |
+
elif i == 0:
|
| 230 |
+
mask_ids = list(tokenizer.event_ids.values()) + [tokenizer.eos_id]
|
| 231 |
+
if disable_patch_change and tokenizer.event_ids["patch_change"] in mask_ids:
|
| 232 |
+
mask_ids.remove(tokenizer.event_ids["patch_change"])
|
| 233 |
+
mask[0, mask_ids] = 1
|
| 234 |
+
else:
|
| 235 |
+
param_names = tokenizer.events[event_name]
|
| 236 |
+
if i > len(param_names):
|
| 237 |
+
mask[0, tokenizer.pad_id] = 1
|
| 238 |
+
else:
|
| 239 |
+
param_name = param_names[i - 1]
|
| 240 |
+
mask_ids = tokenizer.parameter_ids[param_name]
|
| 241 |
+
if param_name == "channel":
|
| 242 |
+
mask_ids = [m for m in mask_ids if m not in disable_channels]
|
| 243 |
+
mask[0, mask_ids] = 1
|
| 244 |
+
|
| 245 |
+
mask = mask[:, None, :] # [batch, 1, vocab]
|
| 246 |
+
|
| 247 |
+
# Prepare inputs
|
| 248 |
+
if i == 0:
|
| 249 |
+
x = next_token_seq
|
| 250 |
+
h = hidden
|
| 251 |
+
else:
|
| 252 |
+
# Use cache - only pass last token
|
| 253 |
+
x = next_token_seq[:, -1:]
|
| 254 |
+
h = np.zeros((batch_size, 0, emb_size), dtype=np.float32)
|
| 255 |
+
|
| 256 |
+
model1_inputs = {"hidden": h, "x": x}
|
| 257 |
+
for name, cache in model1_cache.items():
|
| 258 |
+
model1_inputs[name] = cache
|
| 259 |
+
|
| 260 |
+
# Run model_token
|
| 261 |
+
output_names1 = [o.name for o in model_token.get_outputs()]
|
| 262 |
+
outputs1 = model_token.run(None, model1_inputs)
|
| 263 |
+
|
| 264 |
+
# Parse outputs
|
| 265 |
+
logits = None
|
| 266 |
+
for j, name in enumerate(output_names1):
|
| 267 |
+
if name == "y":
|
| 268 |
+
logits = outputs1[j]
|
| 269 |
+
elif name.startswith("present."):
|
| 270 |
+
past_name = name.replace("present.", "past_key_values.")
|
| 271 |
+
model1_cache[past_name] = outputs1[j]
|
| 272 |
+
|
| 273 |
+
if logits is None:
|
| 274 |
+
print("Error: No logits output")
|
| 275 |
+
break
|
| 276 |
+
|
| 277 |
+
# Sample
|
| 278 |
+
scores = softmax(logits / temp, -1) * mask
|
| 279 |
+
samples = sample_top_p_k(scores, top_p, top_k, generator)
|
| 280 |
+
|
| 281 |
+
if i == 0:
|
| 282 |
+
next_token_seq = samples
|
| 283 |
+
eid = samples[0].item()
|
| 284 |
+
if eid == tokenizer.eos_id:
|
| 285 |
+
end = True
|
| 286 |
+
else:
|
| 287 |
+
event_name = tokenizer.id_events[eid]
|
| 288 |
+
else:
|
| 289 |
+
next_token_seq = np.concatenate([next_token_seq, samples], axis=1)
|
| 290 |
+
if len(tokenizer.events[event_name]) == i:
|
| 291 |
+
break
|
| 292 |
+
|
| 293 |
+
# Pad token sequence
|
| 294 |
+
if next_token_seq.shape[1] < max_token_seq:
|
| 295 |
+
next_token_seq = np.pad(next_token_seq,
|
| 296 |
+
((0, 0), (0, max_token_seq - next_token_seq.shape[-1])),
|
| 297 |
+
mode="constant", constant_values=tokenizer.pad_id)
|
| 298 |
+
|
| 299 |
+
next_token_seq = next_token_seq[:, None, :]
|
| 300 |
+
input_tensor = np.concatenate([input_tensor, next_token_seq], axis=1)
|
| 301 |
+
past_len = cur_len
|
| 302 |
+
cur_len += 1
|
| 303 |
+
bar.update(1)
|
| 304 |
+
|
| 305 |
+
if end:
|
| 306 |
+
break
|
| 307 |
+
|
| 308 |
+
return input_tensor
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
def generate_midi(
|
| 312 |
+
instruments: List[str] = None,
|
| 313 |
+
drum_kit: str = "None",
|
| 314 |
+
bpm: int = 120,
|
| 315 |
+
max_events: int = 256,
|
| 316 |
+
temp: float = 1.0,
|
| 317 |
+
top_p: float = 0.95,
|
| 318 |
+
top_k: int = 20,
|
| 319 |
+
seed: int = None
|
| 320 |
+
) -> Optional[str]:
|
| 321 |
+
"""
|
| 322 |
+
Generate MIDI using SkyTNT model.
|
| 323 |
+
Processing time: ~20 seconds for 200 events on CPU.
|
| 324 |
+
"""
|
| 325 |
+
model_base, model_token, tokenizer = load_skytnt_model()
|
| 326 |
+
if model_base is None:
|
| 327 |
+
return None
|
| 328 |
+
|
| 329 |
+
# Setup random generator
|
| 330 |
+
if seed is None:
|
| 331 |
+
seed = np.random.randint(0, 2**31)
|
| 332 |
+
generator = np.random.RandomState(seed)
|
| 333 |
+
print(f"Using seed: {seed}")
|
| 334 |
+
|
| 335 |
+
# Build initial prompt
|
| 336 |
+
max_token_seq = tokenizer.max_token_seq
|
| 337 |
+
mid = [[tokenizer.bos_id] + [tokenizer.pad_id] * (max_token_seq - 1)]
|
| 338 |
+
|
| 339 |
+
# Add tempo
|
| 340 |
+
if bpm > 0:
|
| 341 |
+
mid.append(tokenizer.event2tokens(["set_tempo", 0, 0, 0, min(bpm, 255)]))
|
| 342 |
+
|
| 343 |
+
# Add instruments
|
| 344 |
+
patch2number = {v: k for k, v in MIDI.Number2patch.items()}
|
| 345 |
+
drum_kits = {"None": -1, "Standard": 0, "Room": 8, "Power": 16, "Electric": 24,
|
| 346 |
+
"TR-808": 25, "Jazz": 32, "Blush": 40, "Orchestra": 48}
|
| 347 |
+
|
| 348 |
+
patches = {}
|
| 349 |
+
if instruments:
|
| 350 |
+
i = 0
|
| 351 |
+
for instr in instruments[:15]:
|
| 352 |
+
if instr in patch2number:
|
| 353 |
+
patches[i] = patch2number[instr]
|
| 354 |
+
i = (i + 1) if i != 8 else 10
|
| 355 |
+
|
| 356 |
+
if drum_kit != "None" and drum_kit in drum_kits:
|
| 357 |
+
patches[9] = drum_kits[drum_kit]
|
| 358 |
+
|
| 359 |
+
for idx, (channel, patch) in enumerate(patches.items()):
|
| 360 |
+
mid.append(tokenizer.event2tokens(["patch_change", 0, 0, idx + 1, channel, patch]))
|
| 361 |
+
|
| 362 |
+
prompt = np.asarray([mid], dtype=np.int64)
|
| 363 |
+
|
| 364 |
+
# Setup generation params
|
| 365 |
+
disable_patch_change = len(instruments) > 0 if instruments else False
|
| 366 |
+
disable_channels = [i for i in range(16) if i not in patches] if disable_patch_change else None
|
| 367 |
+
|
| 368 |
+
max_len = prompt.shape[1] + max_events
|
| 369 |
+
|
| 370 |
+
print(f"Generating up to {max_events} events...")
|
| 371 |
+
|
| 372 |
+
# Generate
|
| 373 |
+
result = generate(
|
| 374 |
+
model_base, model_token, tokenizer,
|
| 375 |
+
prompt=prompt,
|
| 376 |
+
max_len=max_len,
|
| 377 |
+
temp=temp,
|
| 378 |
+
top_p=top_p,
|
| 379 |
+
top_k=top_k,
|
| 380 |
+
disable_patch_change=disable_patch_change,
|
| 381 |
+
disable_channels=disable_channels,
|
| 382 |
+
generator=generator
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
print(f"Generation complete: {result.shape[1]} total events")
|
| 386 |
+
|
| 387 |
+
# Convert to MIDI
|
| 388 |
+
try:
|
| 389 |
+
mid_seq = result[0].tolist()
|
| 390 |
+
midi_data = tokenizer.detokenize(mid_seq)
|
| 391 |
+
|
| 392 |
+
with tempfile.NamedTemporaryFile(suffix='.mid', delete=False) as f:
|
| 393 |
+
midi_path = f.name
|
| 394 |
+
|
| 395 |
+
with open(midi_path, 'wb') as f:
|
| 396 |
+
f.write(MIDI.score2midi(midi_data))
|
| 397 |
+
|
| 398 |
+
return midi_path
|
| 399 |
+
|
| 400 |
+
except Exception as e:
|
| 401 |
+
print(f"Error converting to MIDI: {e}")
|
| 402 |
+
import traceback
|
| 403 |
+
traceback.print_exc()
|
| 404 |
+
return None
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
def get_available_instruments() -> List[str]:
|
| 408 |
+
"""Get list of available instruments."""
|
| 409 |
+
return list(MIDI.Number2patch.values())
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
def get_available_drum_kits() -> List[str]:
|
| 413 |
+
"""Get list of available drum kits."""
|
| 414 |
+
return ["None", "Standard", "Room", "Power", "Electric", "TR-808", "Jazz", "Blush", "Orchestra"]
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
# CLI test
|
| 418 |
+
if __name__ == "__main__":
|
| 419 |
+
import time
|
| 420 |
+
print("Testing SkyTNT generator...")
|
| 421 |
+
start = time.time()
|
| 422 |
+
midi_path = generate_midi(
|
| 423 |
+
instruments=["Acoustic Grand"],
|
| 424 |
+
bpm=120,
|
| 425 |
+
max_events=50,
|
| 426 |
+
seed=42
|
| 427 |
+
)
|
| 428 |
+
elapsed = time.time() - start
|
| 429 |
+
if midi_path:
|
| 430 |
+
print(f"Generated: {midi_path}")
|
| 431 |
+
print(f"Time: {elapsed:.1f}s")
|
| 432 |
+
else:
|
| 433 |
+
print("Generation failed")
|
skytnt_tokenizer.py
ADDED
|
@@ -0,0 +1,1196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Dict, Any
|
| 3 |
+
|
| 4 |
+
import PIL.Image
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class MIDITokenizerV1:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.version = "v1"
|
| 11 |
+
self.optimise_midi = False
|
| 12 |
+
self.vocab_size = 0
|
| 13 |
+
|
| 14 |
+
def allocate_ids(size):
|
| 15 |
+
ids = [self.vocab_size + i for i in range(size)]
|
| 16 |
+
self.vocab_size += size
|
| 17 |
+
return ids
|
| 18 |
+
|
| 19 |
+
self.pad_id = allocate_ids(1)[0]
|
| 20 |
+
self.bos_id = allocate_ids(1)[0]
|
| 21 |
+
self.eos_id = allocate_ids(1)[0]
|
| 22 |
+
self.events = {
|
| 23 |
+
"note": ["time1", "time2", "track", "duration", "channel", "pitch", "velocity"],
|
| 24 |
+
"patch_change": ["time1", "time2", "track", "channel", "patch"],
|
| 25 |
+
"control_change": ["time1", "time2", "track", "channel", "controller", "value"],
|
| 26 |
+
"set_tempo": ["time1", "time2", "track", "bpm"],
|
| 27 |
+
}
|
| 28 |
+
self.event_parameters = {
|
| 29 |
+
"time1": 128, "time2": 16, "duration": 2048, "track": 128, "channel": 16, "pitch": 128, "velocity": 128,
|
| 30 |
+
"patch": 128, "controller": 128, "value": 128, "bpm": 256
|
| 31 |
+
}
|
| 32 |
+
self.event_ids = {e: allocate_ids(1)[0] for e in self.events.keys()}
|
| 33 |
+
self.id_events = {i: e for e, i in self.event_ids.items()}
|
| 34 |
+
self.parameter_ids = {p: allocate_ids(s) for p, s in self.event_parameters.items()}
|
| 35 |
+
self.max_token_seq = max([len(ps) for ps in self.events.values()]) + 1
|
| 36 |
+
|
| 37 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 38 |
+
d = {
|
| 39 |
+
"version":self.version,
|
| 40 |
+
"optimise_midi":self.optimise_midi,
|
| 41 |
+
"vocab_size": self.vocab_size,
|
| 42 |
+
"events": self.events,
|
| 43 |
+
"event_parameters": self.event_parameters,
|
| 44 |
+
"max_token_seq": self.max_token_seq,
|
| 45 |
+
"pad_id": self.pad_id,
|
| 46 |
+
"bos_id": self.bos_id,
|
| 47 |
+
"eos_id": self.eos_id,
|
| 48 |
+
}
|
| 49 |
+
return d
|
| 50 |
+
|
| 51 |
+
def set_optimise_midi(self, optimise_midi=True):
|
| 52 |
+
self.optimise_midi = optimise_midi
|
| 53 |
+
|
| 54 |
+
@staticmethod
|
| 55 |
+
def tempo2bpm(tempo):
|
| 56 |
+
tempo = tempo / 10 ** 6 # us to s
|
| 57 |
+
bpm = 60 / tempo
|
| 58 |
+
return bpm
|
| 59 |
+
|
| 60 |
+
@staticmethod
|
| 61 |
+
def bpm2tempo(bpm):
|
| 62 |
+
if bpm == 0:
|
| 63 |
+
bpm = 1
|
| 64 |
+
tempo = int((60 / bpm) * 10 ** 6)
|
| 65 |
+
return tempo
|
| 66 |
+
|
| 67 |
+
def tokenize(self, midi_score, add_bos_eos=True, cc_eps=4, tempo_eps=4,
|
| 68 |
+
remap_track_channel=None, add_default_instr=None, remove_empty_channels=None):
|
| 69 |
+
if remap_track_channel is None: # set default value
|
| 70 |
+
remap_track_channel = self.optimise_midi
|
| 71 |
+
if add_default_instr is None:
|
| 72 |
+
add_default_instr = self.optimise_midi
|
| 73 |
+
if remove_empty_channels is None:
|
| 74 |
+
remove_empty_channels = self.optimise_midi
|
| 75 |
+
|
| 76 |
+
ticks_per_beat = midi_score[0]
|
| 77 |
+
event_list = {}
|
| 78 |
+
track_idx_map = {i: dict() for i in range(16)}
|
| 79 |
+
track_idx_dict = {}
|
| 80 |
+
channels = []
|
| 81 |
+
patch_channels = []
|
| 82 |
+
empty_channels = [True] * 16
|
| 83 |
+
channel_note_tracks = {i: list() for i in range(16)}
|
| 84 |
+
for track_idx, track in enumerate(midi_score[1:129]):
|
| 85 |
+
last_notes = {}
|
| 86 |
+
patch_dict = {}
|
| 87 |
+
control_dict = {}
|
| 88 |
+
last_tempo = 0
|
| 89 |
+
for event in track:
|
| 90 |
+
if event[0] not in self.events:
|
| 91 |
+
continue
|
| 92 |
+
c = -1
|
| 93 |
+
t = round(16 * event[1] / ticks_per_beat) # quantization
|
| 94 |
+
new_event = [event[0], t // 16, t % 16, track_idx] + event[2:]
|
| 95 |
+
if event[0] == "note":
|
| 96 |
+
c = event[3]
|
| 97 |
+
if c > 15 or c < 0:
|
| 98 |
+
continue
|
| 99 |
+
empty_channels[c] = False
|
| 100 |
+
track_idx_dict.setdefault(c, track_idx)
|
| 101 |
+
note_tracks = channel_note_tracks[c]
|
| 102 |
+
if track_idx not in note_tracks:
|
| 103 |
+
note_tracks.append(track_idx)
|
| 104 |
+
new_event[4] = max(1, round(16 * new_event[4] / ticks_per_beat))
|
| 105 |
+
elif event[0] == "set_tempo":
|
| 106 |
+
if new_event[4] == 0: # invalid tempo
|
| 107 |
+
continue
|
| 108 |
+
bpm = int(self.tempo2bpm(new_event[4]))
|
| 109 |
+
new_event[4] = min(bpm, 255)
|
| 110 |
+
if event[0] == "note":
|
| 111 |
+
key = tuple(new_event[:4] + new_event[5:-1])
|
| 112 |
+
else:
|
| 113 |
+
key = tuple(new_event[:-1])
|
| 114 |
+
if event[0] == "patch_change":
|
| 115 |
+
c, p = event[2:]
|
| 116 |
+
if c > 15 or c < 0:
|
| 117 |
+
continue
|
| 118 |
+
last_p = patch_dict.setdefault(c, None)
|
| 119 |
+
if last_p == p:
|
| 120 |
+
continue
|
| 121 |
+
patch_dict[c] = p
|
| 122 |
+
if c not in patch_channels:
|
| 123 |
+
patch_channels.append(c)
|
| 124 |
+
elif event[0] == "control_change":
|
| 125 |
+
c, cc, v = event[2:]
|
| 126 |
+
if c > 15 or c < 0:
|
| 127 |
+
continue
|
| 128 |
+
last_v = control_dict.setdefault((c, cc), 0)
|
| 129 |
+
if abs(last_v - v) < cc_eps:
|
| 130 |
+
continue
|
| 131 |
+
control_dict[(c, cc)] = v
|
| 132 |
+
elif event[0] == "set_tempo":
|
| 133 |
+
tempo = new_event[-1]
|
| 134 |
+
if abs(last_tempo - tempo) < tempo_eps:
|
| 135 |
+
continue
|
| 136 |
+
last_tempo = tempo
|
| 137 |
+
|
| 138 |
+
if c != -1:
|
| 139 |
+
if c not in channels:
|
| 140 |
+
channels.append(c)
|
| 141 |
+
tr_map = track_idx_map[c]
|
| 142 |
+
if track_idx not in tr_map:
|
| 143 |
+
tr_map[track_idx] = 0
|
| 144 |
+
|
| 145 |
+
if event[0] == "note": # to eliminate note overlap due to quantization
|
| 146 |
+
cp = tuple(new_event[5:7])
|
| 147 |
+
if cp in last_notes:
|
| 148 |
+
last_note_key, last_note = last_notes[cp]
|
| 149 |
+
last_t = last_note[1] * 16 + last_note[2]
|
| 150 |
+
last_note[4] = max(0, min(last_note[4], t - last_t))
|
| 151 |
+
if last_note[4] == 0:
|
| 152 |
+
event_list.pop(last_note_key)
|
| 153 |
+
last_notes[cp] = (key, new_event)
|
| 154 |
+
event_list[key] = new_event
|
| 155 |
+
event_list = list(event_list.values())
|
| 156 |
+
|
| 157 |
+
empty_channels = [c for c in channels if empty_channels[c]]
|
| 158 |
+
|
| 159 |
+
if remap_track_channel:
|
| 160 |
+
patch_channels = []
|
| 161 |
+
channels_count = 0
|
| 162 |
+
channels_map = {9: 9} if 9 in channels else {}
|
| 163 |
+
if remove_empty_channels:
|
| 164 |
+
channels = sorted(channels, key=lambda x: 1 if x in empty_channels else 0)
|
| 165 |
+
for c in channels:
|
| 166 |
+
if c == 9:
|
| 167 |
+
continue
|
| 168 |
+
channels_map[c] = channels_count
|
| 169 |
+
channels_count += 1
|
| 170 |
+
if channels_count == 9:
|
| 171 |
+
channels_count = 10
|
| 172 |
+
channels = list(channels_map.values())
|
| 173 |
+
|
| 174 |
+
track_count = 0
|
| 175 |
+
track_idx_map_order = [k for k, v in sorted(list(channels_map.items()), key=lambda x: x[1])]
|
| 176 |
+
for c in track_idx_map_order: # tracks not to remove
|
| 177 |
+
if remove_empty_channels and c in empty_channels:
|
| 178 |
+
continue
|
| 179 |
+
tr_map = track_idx_map[c]
|
| 180 |
+
for track_idx in tr_map:
|
| 181 |
+
note_tracks = channel_note_tracks[c]
|
| 182 |
+
if len(note_tracks) != 0 and track_idx not in note_tracks:
|
| 183 |
+
continue
|
| 184 |
+
track_count += 1
|
| 185 |
+
tr_map[track_idx] = track_count
|
| 186 |
+
for c in track_idx_map_order: # tracks to remove
|
| 187 |
+
if not (remove_empty_channels and c in empty_channels):
|
| 188 |
+
continue
|
| 189 |
+
tr_map = track_idx_map[c]
|
| 190 |
+
for track_idx in tr_map:
|
| 191 |
+
note_tracks = channel_note_tracks[c]
|
| 192 |
+
if not (len(note_tracks) != 0 and track_idx not in note_tracks):
|
| 193 |
+
continue
|
| 194 |
+
track_count += 1
|
| 195 |
+
tr_map[track_idx] = track_count
|
| 196 |
+
|
| 197 |
+
empty_channels = [channels_map[c] for c in empty_channels]
|
| 198 |
+
track_idx_dict = {}
|
| 199 |
+
for event in event_list:
|
| 200 |
+
name = event[0]
|
| 201 |
+
track_idx = event[3]
|
| 202 |
+
if name == "note":
|
| 203 |
+
c = event[5]
|
| 204 |
+
event[5] = channels_map[c]
|
| 205 |
+
event[3] = track_idx_map[c][track_idx]
|
| 206 |
+
track_idx_dict.setdefault(event[5], event[3])
|
| 207 |
+
# setdefault, so the track_idx is first of the channel
|
| 208 |
+
elif name == "set_tempo":
|
| 209 |
+
event[3] = 0
|
| 210 |
+
elif name == "control_change" or name == "patch_change":
|
| 211 |
+
c = event[4]
|
| 212 |
+
event[4] = channels_map[c]
|
| 213 |
+
tr_map = track_idx_map[c]
|
| 214 |
+
# move the event to first track of the channel if it's original track is empty
|
| 215 |
+
note_tracks = channel_note_tracks[c]
|
| 216 |
+
if len(note_tracks) != 0 and track_idx not in note_tracks:
|
| 217 |
+
track_idx = channel_note_tracks[c][0]
|
| 218 |
+
new_track_idx = tr_map[track_idx]
|
| 219 |
+
event[3] = new_track_idx
|
| 220 |
+
if name == "patch_change" and event[4] not in patch_channels:
|
| 221 |
+
patch_channels.append(event[4])
|
| 222 |
+
|
| 223 |
+
if add_default_instr:
|
| 224 |
+
for c in channels:
|
| 225 |
+
if c not in patch_channels and c in track_idx_dict:
|
| 226 |
+
event_list.append(["patch_change", 0, 0, track_idx_dict[c], c, 0])
|
| 227 |
+
|
| 228 |
+
events_name_order = {"set_tempo": 0, "patch_change": 1, "control_change": 2, "note": 3}
|
| 229 |
+
events_order = lambda e: e[1:4] + [events_name_order[e[0]]]
|
| 230 |
+
event_list = sorted(event_list, key=events_order)
|
| 231 |
+
|
| 232 |
+
setup_events = {}
|
| 233 |
+
notes_in_setup = False
|
| 234 |
+
for i, event in enumerate(event_list): # optimise setup
|
| 235 |
+
new_event = [*event]
|
| 236 |
+
if event[0] != "note":
|
| 237 |
+
new_event[1] = 0
|
| 238 |
+
new_event[2] = 0
|
| 239 |
+
has_next = False
|
| 240 |
+
has_pre = False
|
| 241 |
+
if i < len(event_list) - 1:
|
| 242 |
+
next_event = event_list[i + 1]
|
| 243 |
+
has_next = event[1] + event[2] == next_event[1] + next_event[2]
|
| 244 |
+
if notes_in_setup and i > 0:
|
| 245 |
+
pre_event = event_list[i - 1]
|
| 246 |
+
has_pre = event[1] + event[2] == pre_event[1] + pre_event[2]
|
| 247 |
+
if (event[0] == "note" and not has_next) or (notes_in_setup and not has_pre):
|
| 248 |
+
event_list = sorted(setup_events.values(), key=events_order) + event_list[i:]
|
| 249 |
+
break
|
| 250 |
+
else:
|
| 251 |
+
if event[0] == "note":
|
| 252 |
+
notes_in_setup = True
|
| 253 |
+
key = tuple([event[0]] + event[3:-2])
|
| 254 |
+
else:
|
| 255 |
+
key = tuple([event[0]] + event[3:-1])
|
| 256 |
+
setup_events[key] = new_event
|
| 257 |
+
|
| 258 |
+
last_t1 = 0
|
| 259 |
+
midi_seq = []
|
| 260 |
+
for event in event_list:
|
| 261 |
+
if remove_empty_channels and event[0] in ["control_change", "patch_change"] and event[4] in empty_channels:
|
| 262 |
+
continue
|
| 263 |
+
cur_t1 = event[1]
|
| 264 |
+
event[1] = event[1] - last_t1
|
| 265 |
+
tokens = self.event2tokens(event)
|
| 266 |
+
if not tokens:
|
| 267 |
+
continue
|
| 268 |
+
midi_seq.append(tokens)
|
| 269 |
+
last_t1 = cur_t1
|
| 270 |
+
|
| 271 |
+
if add_bos_eos:
|
| 272 |
+
bos = [self.bos_id] + [self.pad_id] * (self.max_token_seq - 1)
|
| 273 |
+
eos = [self.eos_id] + [self.pad_id] * (self.max_token_seq - 1)
|
| 274 |
+
midi_seq = [bos] + midi_seq + [eos]
|
| 275 |
+
return midi_seq
|
| 276 |
+
|
| 277 |
+
def event2tokens(self, event):
|
| 278 |
+
name = event[0]
|
| 279 |
+
params = event[1:]
|
| 280 |
+
if not all([0 <= params[i] < self.event_parameters[p] for i, p in enumerate(self.events[name])]):
|
| 281 |
+
return []
|
| 282 |
+
tokens = [self.event_ids[name]] + [self.parameter_ids[p][params[i]]
|
| 283 |
+
for i, p in enumerate(self.events[name])]
|
| 284 |
+
tokens += [self.pad_id] * (self.max_token_seq - len(tokens))
|
| 285 |
+
return tokens
|
| 286 |
+
|
| 287 |
+
def tokens2event(self, tokens):
|
| 288 |
+
if tokens[0] not in self.id_events:
|
| 289 |
+
return []
|
| 290 |
+
name = self.id_events[tokens[0]]
|
| 291 |
+
if len(tokens) <= len(self.events[name]):
|
| 292 |
+
return []
|
| 293 |
+
params = tokens[1:]
|
| 294 |
+
params = [params[i] - self.parameter_ids[p][0] for i, p in enumerate(self.events[name])]
|
| 295 |
+
if not all([0 <= params[i] < self.event_parameters[p] for i, p in enumerate(self.events[name])]):
|
| 296 |
+
return []
|
| 297 |
+
event = [name] + params
|
| 298 |
+
return event
|
| 299 |
+
|
| 300 |
+
def detokenize(self, midi_seq):
|
| 301 |
+
ticks_per_beat = 480
|
| 302 |
+
tracks_dict = {}
|
| 303 |
+
t1 = 0
|
| 304 |
+
for tokens in midi_seq:
|
| 305 |
+
if tokens[0] in self.id_events:
|
| 306 |
+
event = self.tokens2event(tokens)
|
| 307 |
+
if not event:
|
| 308 |
+
continue
|
| 309 |
+
name = event[0]
|
| 310 |
+
if name == "set_tempo":
|
| 311 |
+
event[4] = self.bpm2tempo(event[4])
|
| 312 |
+
if event[0] == "note":
|
| 313 |
+
event[4] = int(event[4] * ticks_per_beat / 16)
|
| 314 |
+
t1 += event[1]
|
| 315 |
+
t = t1 * 16 + event[2]
|
| 316 |
+
t = int(t * ticks_per_beat / 16)
|
| 317 |
+
track_idx = event[3]
|
| 318 |
+
if track_idx not in tracks_dict:
|
| 319 |
+
tracks_dict[track_idx] = []
|
| 320 |
+
tracks_dict[track_idx].append([event[0], t] + event[4:])
|
| 321 |
+
tracks = [tr for idx, tr in sorted(list(tracks_dict.items()), key=lambda it: it[0])]
|
| 322 |
+
|
| 323 |
+
for i in range(len(tracks)): # to eliminate note overlap
|
| 324 |
+
track = tracks[i]
|
| 325 |
+
track = sorted(track, key=lambda e: e[1])
|
| 326 |
+
last_note_t = {}
|
| 327 |
+
zero_len_notes = []
|
| 328 |
+
for e in reversed(track):
|
| 329 |
+
if e[0] == "note":
|
| 330 |
+
t, d, c, p = e[1:5]
|
| 331 |
+
key = (c, p)
|
| 332 |
+
if key in last_note_t:
|
| 333 |
+
d = min(d, max(last_note_t[key] - t, 0))
|
| 334 |
+
last_note_t[key] = t
|
| 335 |
+
e[2] = d
|
| 336 |
+
if d == 0:
|
| 337 |
+
zero_len_notes.append(e)
|
| 338 |
+
for e in zero_len_notes:
|
| 339 |
+
track.remove(e)
|
| 340 |
+
tracks[i] = track
|
| 341 |
+
return [ticks_per_beat, *tracks]
|
| 342 |
+
|
| 343 |
+
def midi2img(self, midi_score):
|
| 344 |
+
ticks_per_beat = midi_score[0]
|
| 345 |
+
notes = []
|
| 346 |
+
max_time = 1
|
| 347 |
+
track_num = len(midi_score[1:])
|
| 348 |
+
for track_idx, track in enumerate(midi_score[1:]):
|
| 349 |
+
for event in track:
|
| 350 |
+
t = round(16 * event[1] / ticks_per_beat)
|
| 351 |
+
if event[0] == "note":
|
| 352 |
+
d = max(1, round(16 * event[2] / ticks_per_beat))
|
| 353 |
+
c, p = event[3:5]
|
| 354 |
+
max_time = max(max_time, t + d + 1)
|
| 355 |
+
notes.append((track_idx, c, p, t, d))
|
| 356 |
+
img = np.zeros((128, max_time, 3), dtype=np.uint8)
|
| 357 |
+
colors = {(i, j): np.random.randint(50, 256, 3) for i in range(track_num) for j in range(16)}
|
| 358 |
+
for note in notes:
|
| 359 |
+
tr, c, p, t, d = note
|
| 360 |
+
img[p, t: t + d] = colors[(tr, c)]
|
| 361 |
+
img = PIL.Image.fromarray(np.flip(img, 0))
|
| 362 |
+
return img
|
| 363 |
+
|
| 364 |
+
def augment(self, midi_seq, max_pitch_shift=4, max_vel_shift=10, max_cc_val_shift=10, max_bpm_shift=10,
|
| 365 |
+
max_track_shift=0, max_channel_shift=16):
|
| 366 |
+
pitch_shift = random.randint(-max_pitch_shift, max_pitch_shift)
|
| 367 |
+
vel_shift = random.randint(-max_vel_shift, max_vel_shift)
|
| 368 |
+
cc_val_shift = random.randint(-max_cc_val_shift, max_cc_val_shift)
|
| 369 |
+
bpm_shift = random.randint(-max_bpm_shift, max_bpm_shift)
|
| 370 |
+
track_shift = random.randint(0, max_track_shift)
|
| 371 |
+
channel_shift = random.randint(0, max_channel_shift)
|
| 372 |
+
midi_seq_new = []
|
| 373 |
+
for tokens in midi_seq:
|
| 374 |
+
tokens_new = [*tokens]
|
| 375 |
+
if tokens[0] in self.id_events:
|
| 376 |
+
name = self.id_events[tokens[0]]
|
| 377 |
+
for i, pn in enumerate(self.events[name]):
|
| 378 |
+
if pn == "track":
|
| 379 |
+
tr = tokens[1 + i] - self.parameter_ids[pn][0]
|
| 380 |
+
tr += track_shift
|
| 381 |
+
tr = tr % self.event_parameters[pn]
|
| 382 |
+
tokens_new[1 + i] = self.parameter_ids[pn][tr]
|
| 383 |
+
elif pn == "channel":
|
| 384 |
+
c = tokens[1 + i] - self.parameter_ids[pn][0]
|
| 385 |
+
c0 = c
|
| 386 |
+
c += channel_shift
|
| 387 |
+
c = c % self.event_parameters[pn]
|
| 388 |
+
if c0 == 9:
|
| 389 |
+
c = 9
|
| 390 |
+
elif c == 9:
|
| 391 |
+
c = (9 + channel_shift) % self.event_parameters[pn]
|
| 392 |
+
tokens_new[1 + i] = self.parameter_ids[pn][c]
|
| 393 |
+
|
| 394 |
+
if name == "note":
|
| 395 |
+
c = tokens[5] - self.parameter_ids["channel"][0]
|
| 396 |
+
p = tokens[6] - self.parameter_ids["pitch"][0]
|
| 397 |
+
v = tokens[7] - self.parameter_ids["velocity"][0]
|
| 398 |
+
if c != 9: # no shift for drums
|
| 399 |
+
p += pitch_shift
|
| 400 |
+
if not 0 <= p < 128:
|
| 401 |
+
return midi_seq
|
| 402 |
+
v += vel_shift
|
| 403 |
+
v = max(1, min(127, v))
|
| 404 |
+
tokens_new[6] = self.parameter_ids["pitch"][p]
|
| 405 |
+
tokens_new[7] = self.parameter_ids["velocity"][v]
|
| 406 |
+
elif name == "control_change":
|
| 407 |
+
cc = tokens[5] - self.parameter_ids["controller"][0]
|
| 408 |
+
val = tokens[6] - self.parameter_ids["value"][0]
|
| 409 |
+
if cc in [1, 2, 7, 11]:
|
| 410 |
+
val += cc_val_shift
|
| 411 |
+
val = max(1, min(127, val))
|
| 412 |
+
tokens_new[6] = self.parameter_ids["value"][val]
|
| 413 |
+
elif name == "set_tempo":
|
| 414 |
+
bpm = tokens[4] - self.parameter_ids["bpm"][0]
|
| 415 |
+
bpm += bpm_shift
|
| 416 |
+
bpm = max(1, min(255, bpm))
|
| 417 |
+
tokens_new[4] = self.parameter_ids["bpm"][bpm]
|
| 418 |
+
midi_seq_new.append(tokens_new)
|
| 419 |
+
return midi_seq_new
|
| 420 |
+
|
| 421 |
+
def check_quality(self, midi_seq, alignment_min=0.3, tonality_min=0.8, piano_max=0.7, notes_bandwidth_min=3,
|
| 422 |
+
notes_density_max=50, notes_density_min=2.5, total_notes_max=20000, total_notes_min=256,
|
| 423 |
+
note_window_size=16):
|
| 424 |
+
total_notes = 0
|
| 425 |
+
channels = []
|
| 426 |
+
time_hist = [0] * 16
|
| 427 |
+
note_windows = {}
|
| 428 |
+
notes_sametime = []
|
| 429 |
+
notes_density_list = []
|
| 430 |
+
tonality_list = []
|
| 431 |
+
notes_bandwidth_list = []
|
| 432 |
+
instruments = {}
|
| 433 |
+
piano_channels = []
|
| 434 |
+
abs_t1 = 0
|
| 435 |
+
last_t = 0
|
| 436 |
+
for tsi, tokens in enumerate(midi_seq):
|
| 437 |
+
event = self.tokens2event(tokens)
|
| 438 |
+
if not event:
|
| 439 |
+
continue
|
| 440 |
+
t1, t2, tr = event[1:4]
|
| 441 |
+
abs_t1 += t1
|
| 442 |
+
t = abs_t1 * 16 + t2
|
| 443 |
+
c = None
|
| 444 |
+
if event[0] == "note":
|
| 445 |
+
d, c, p, v = event[4:]
|
| 446 |
+
total_notes += 1
|
| 447 |
+
time_hist[t2] += 1
|
| 448 |
+
if c != 9: # ignore drum channel
|
| 449 |
+
if c not in instruments:
|
| 450 |
+
instruments[c] = 0
|
| 451 |
+
if c not in piano_channels:
|
| 452 |
+
piano_channels.append(c)
|
| 453 |
+
note_windows.setdefault(abs_t1 // note_window_size, []).append(p)
|
| 454 |
+
if last_t != t:
|
| 455 |
+
notes_sametime = [(et, p_) for et, p_ in notes_sametime if et > last_t]
|
| 456 |
+
notes_sametime_p = [p_ for _, p_ in notes_sametime]
|
| 457 |
+
if len(notes_sametime) > 0:
|
| 458 |
+
notes_bandwidth_list.append(max(notes_sametime_p) - min(notes_sametime_p))
|
| 459 |
+
notes_sametime.append((t + d - 1, p))
|
| 460 |
+
elif event[0] == "patch_change":
|
| 461 |
+
c, p = event[4:]
|
| 462 |
+
instruments[c] = p
|
| 463 |
+
if p == 0 and c not in piano_channels:
|
| 464 |
+
piano_channels.append(c)
|
| 465 |
+
if c is not None and c not in channels:
|
| 466 |
+
channels.append(c)
|
| 467 |
+
last_t = t
|
| 468 |
+
reasons = []
|
| 469 |
+
if total_notes < total_notes_min:
|
| 470 |
+
reasons.append("total_min")
|
| 471 |
+
if total_notes > total_notes_max:
|
| 472 |
+
reasons.append("total_max")
|
| 473 |
+
if len(note_windows) == 0 and total_notes > 0:
|
| 474 |
+
reasons.append("drum_only")
|
| 475 |
+
if reasons:
|
| 476 |
+
return False, reasons
|
| 477 |
+
time_hist = sorted(time_hist, reverse=True)
|
| 478 |
+
alignment = sum(time_hist[:2]) / total_notes
|
| 479 |
+
for notes in note_windows.values():
|
| 480 |
+
key_hist = [0] * 12
|
| 481 |
+
for p in notes:
|
| 482 |
+
key_hist[p % 12] += 1
|
| 483 |
+
key_hist = sorted(key_hist, reverse=True)
|
| 484 |
+
tonality_list.append(sum(key_hist[:7]) / len(notes))
|
| 485 |
+
notes_density_list.append(len(notes) / note_window_size)
|
| 486 |
+
tonality_list = sorted(tonality_list)
|
| 487 |
+
tonality = sum(tonality_list) / len(tonality_list)
|
| 488 |
+
notes_bandwidth = sum(notes_bandwidth_list) / len(notes_bandwidth_list) if notes_bandwidth_list else 0
|
| 489 |
+
notes_density = max(notes_density_list) if notes_density_list else 0
|
| 490 |
+
piano_ratio = len(piano_channels) / len(channels)
|
| 491 |
+
if len(channels) <= 3: # ignore piano threshold if it is a piano solo midi
|
| 492 |
+
piano_max = 1
|
| 493 |
+
if alignment < alignment_min: # check weather the notes align to the bars (because some midi files are recorded)
|
| 494 |
+
reasons.append("alignment")
|
| 495 |
+
if tonality < tonality_min: # check whether the music is tonal
|
| 496 |
+
reasons.append("tonality")
|
| 497 |
+
if notes_bandwidth < notes_bandwidth_min: # check whether music is melodic line only
|
| 498 |
+
reasons.append("bandwidth")
|
| 499 |
+
if not notes_density_min < notes_density < notes_density_max:
|
| 500 |
+
reasons.append("density")
|
| 501 |
+
if piano_ratio > piano_max: # check whether most instruments is piano (because some midi files don't have instruments assigned correctly)
|
| 502 |
+
reasons.append("piano")
|
| 503 |
+
return not reasons, reasons
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
class MIDITokenizerV2:
|
| 507 |
+
def __init__(self):
|
| 508 |
+
self.version = "v2"
|
| 509 |
+
self.optimise_midi = False
|
| 510 |
+
self.vocab_size = 0
|
| 511 |
+
|
| 512 |
+
def allocate_ids(size):
|
| 513 |
+
ids = [self.vocab_size + i for i in range(size)]
|
| 514 |
+
self.vocab_size += size
|
| 515 |
+
return ids
|
| 516 |
+
|
| 517 |
+
self.pad_id = allocate_ids(1)[0]
|
| 518 |
+
self.bos_id = allocate_ids(1)[0]
|
| 519 |
+
self.eos_id = allocate_ids(1)[0]
|
| 520 |
+
self.events = {
|
| 521 |
+
"note": ["time1", "time2", "track", "channel", "pitch", "velocity", "duration"],
|
| 522 |
+
"patch_change": ["time1", "time2", "track", "channel", "patch"],
|
| 523 |
+
"control_change": ["time1", "time2", "track", "channel", "controller", "value"],
|
| 524 |
+
"set_tempo": ["time1", "time2", "track", "bpm"],
|
| 525 |
+
"time_signature": ["time1", "time2", "track", "nn", "dd"],
|
| 526 |
+
"key_signature": ["time1", "time2", "track", "sf", "mi"],
|
| 527 |
+
}
|
| 528 |
+
self.event_parameters = {
|
| 529 |
+
"time1": 128, "time2": 16, "duration": 2048, "track": 128, "channel": 16, "pitch": 128, "velocity": 128,
|
| 530 |
+
"patch": 128, "controller": 128, "value": 128, "bpm": 384, "nn": 16, "dd": 4, "sf": 15, "mi": 2
|
| 531 |
+
}
|
| 532 |
+
self.event_ids = {e: allocate_ids(1)[0] for e in self.events.keys()}
|
| 533 |
+
self.id_events = {i: e for e, i in self.event_ids.items()}
|
| 534 |
+
self.parameter_ids = {p: allocate_ids(s) for p, s in self.event_parameters.items()}
|
| 535 |
+
self.max_token_seq = max([len(ps) for ps in self.events.values()]) + 1
|
| 536 |
+
|
| 537 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 538 |
+
d = {
|
| 539 |
+
"version":self.version,
|
| 540 |
+
"optimise_midi":self.optimise_midi,
|
| 541 |
+
"vocab_size": self.vocab_size,
|
| 542 |
+
"events": self.events,
|
| 543 |
+
"event_parameters": self.event_parameters,
|
| 544 |
+
"max_token_seq": self.max_token_seq,
|
| 545 |
+
"pad_id": self.pad_id,
|
| 546 |
+
"bos_id": self.bos_id,
|
| 547 |
+
"eos_id": self.eos_id,
|
| 548 |
+
}
|
| 549 |
+
return d
|
| 550 |
+
|
| 551 |
+
def set_optimise_midi(self, optimise_midi=True):
|
| 552 |
+
self.optimise_midi = optimise_midi
|
| 553 |
+
|
| 554 |
+
@staticmethod
|
| 555 |
+
def tempo2bpm(tempo):
|
| 556 |
+
tempo = tempo / 10 ** 6 # us to s
|
| 557 |
+
bpm = 60 / tempo
|
| 558 |
+
return bpm
|
| 559 |
+
|
| 560 |
+
@staticmethod
|
| 561 |
+
def bpm2tempo(bpm):
|
| 562 |
+
if bpm == 0:
|
| 563 |
+
bpm = 1
|
| 564 |
+
tempo = int((60 / bpm) * 10 ** 6)
|
| 565 |
+
return tempo
|
| 566 |
+
|
| 567 |
+
@staticmethod
|
| 568 |
+
def sf2key(sf):
|
| 569 |
+
# sf in key_signature to key.
|
| 570 |
+
# key represents the sequence from C note to B note (12 in total)
|
| 571 |
+
return (sf * 7) % 12
|
| 572 |
+
|
| 573 |
+
@staticmethod
|
| 574 |
+
def key2sf(k, mi):
|
| 575 |
+
# key to sf
|
| 576 |
+
sf = (k * 7) % 12
|
| 577 |
+
if sf > 6 or (mi == 1 and sf >= 5):
|
| 578 |
+
sf -= 12
|
| 579 |
+
return sf
|
| 580 |
+
|
| 581 |
+
@staticmethod
|
| 582 |
+
def detect_key_signature(key_hist, threshold=0.7):
|
| 583 |
+
if len(key_hist) != 12:
|
| 584 |
+
return None
|
| 585 |
+
if sum(key_hist) == 0:
|
| 586 |
+
return None
|
| 587 |
+
p = sum(sorted(key_hist, reverse=True)[:7]) / sum(key_hist)
|
| 588 |
+
if p < threshold:
|
| 589 |
+
return None
|
| 590 |
+
keys = [x[1] for x in sorted(zip(key_hist, range(len(key_hist))), reverse=True, key=lambda x: x[0])[:7]]
|
| 591 |
+
keys = sorted(keys)
|
| 592 |
+
semitones = []
|
| 593 |
+
for i in range(len(keys)):
|
| 594 |
+
dis = keys[i] - keys[i - 1]
|
| 595 |
+
if dis == 1 or dis == -11:
|
| 596 |
+
semitones.append(keys[i])
|
| 597 |
+
if len(semitones) != 2:
|
| 598 |
+
return None
|
| 599 |
+
semitones_dis = semitones[1] - semitones[0]
|
| 600 |
+
if semitones_dis == 5:
|
| 601 |
+
root_key = semitones[0]
|
| 602 |
+
elif semitones_dis == 7:
|
| 603 |
+
root_key = semitones[1]
|
| 604 |
+
else:
|
| 605 |
+
return None
|
| 606 |
+
return root_key
|
| 607 |
+
|
| 608 |
+
def tokenize(self, midi_score, add_bos_eos=True, cc_eps=4, tempo_eps=4,
|
| 609 |
+
remap_track_channel=None, add_default_instr=None, remove_empty_channels=None):
|
| 610 |
+
if remap_track_channel is None: # set default value
|
| 611 |
+
remap_track_channel = self.optimise_midi
|
| 612 |
+
if add_default_instr is None:
|
| 613 |
+
add_default_instr = self.optimise_midi
|
| 614 |
+
if remove_empty_channels is None:
|
| 615 |
+
remove_empty_channels = self.optimise_midi
|
| 616 |
+
|
| 617 |
+
ticks_per_beat = midi_score[0]
|
| 618 |
+
event_list = {}
|
| 619 |
+
track_idx_map = {i: dict() for i in range(16)}
|
| 620 |
+
track_idx_dict = {}
|
| 621 |
+
channels = []
|
| 622 |
+
patch_channels = []
|
| 623 |
+
empty_channels = [True] * 16
|
| 624 |
+
channel_note_tracks = {i: list() for i in range(16)}
|
| 625 |
+
note_key_hist = [0]*12
|
| 626 |
+
key_sigs = []
|
| 627 |
+
track_to_channels = {}
|
| 628 |
+
for track_idx, track in enumerate(midi_score[1:129]):
|
| 629 |
+
last_notes = {}
|
| 630 |
+
patch_dict = {}
|
| 631 |
+
control_dict = {}
|
| 632 |
+
last_bpm = 0
|
| 633 |
+
track_channels = []
|
| 634 |
+
track_to_channels.setdefault(track_idx, track_channels)
|
| 635 |
+
for event in track:
|
| 636 |
+
if event[0] not in self.events:
|
| 637 |
+
continue
|
| 638 |
+
name = event[0]
|
| 639 |
+
c = -1
|
| 640 |
+
t = round(16 * event[1] / ticks_per_beat) # quantization
|
| 641 |
+
new_event = [name, t // 16, t % 16, track_idx]
|
| 642 |
+
if name == "note":
|
| 643 |
+
d, c, p, v = event[2:]
|
| 644 |
+
if not (0 <= c <= 15):
|
| 645 |
+
continue
|
| 646 |
+
d = max(1, round(16 * d / ticks_per_beat))
|
| 647 |
+
new_event += [c, p, v, d]
|
| 648 |
+
empty_channels[c] = False
|
| 649 |
+
track_idx_dict.setdefault(c, track_idx)
|
| 650 |
+
note_tracks = channel_note_tracks[c]
|
| 651 |
+
if track_idx not in note_tracks:
|
| 652 |
+
note_tracks.append(track_idx)
|
| 653 |
+
if c != 9:
|
| 654 |
+
note_key_hist[p%12] += 1
|
| 655 |
+
if c not in track_channels:
|
| 656 |
+
track_channels.append(c)
|
| 657 |
+
elif name == "patch_change":
|
| 658 |
+
c, p = event[2:]
|
| 659 |
+
if not (0 <= c <= 15):
|
| 660 |
+
continue
|
| 661 |
+
new_event += [c, p]
|
| 662 |
+
last_p = patch_dict.setdefault(c, None)
|
| 663 |
+
if last_p == p:
|
| 664 |
+
continue
|
| 665 |
+
patch_dict[c] = p
|
| 666 |
+
if c not in patch_channels:
|
| 667 |
+
patch_channels.append(c)
|
| 668 |
+
elif name == "control_change":
|
| 669 |
+
c, cc, v = event[2:]
|
| 670 |
+
if not (0 <= c <= 15):
|
| 671 |
+
continue
|
| 672 |
+
new_event += [c, cc, v]
|
| 673 |
+
last_v = control_dict.setdefault((c, cc), 0)
|
| 674 |
+
if abs(last_v - v) < cc_eps:
|
| 675 |
+
continue
|
| 676 |
+
control_dict[(c, cc)] = v
|
| 677 |
+
elif name == "set_tempo":
|
| 678 |
+
tempo = event[2]
|
| 679 |
+
if tempo == 0: # invalid tempo
|
| 680 |
+
continue
|
| 681 |
+
bpm = min(int(self.tempo2bpm(tempo)), 383)
|
| 682 |
+
new_event += [bpm]
|
| 683 |
+
if abs(last_bpm - bpm) < tempo_eps:
|
| 684 |
+
continue
|
| 685 |
+
last_bpm = bpm
|
| 686 |
+
elif name == "time_signature":
|
| 687 |
+
nn, dd = event[2:4]
|
| 688 |
+
if not (1 <= nn <= 16 and 1 <= dd <= 4): # invalid
|
| 689 |
+
continue
|
| 690 |
+
nn -= 1 # make it start from 0
|
| 691 |
+
dd -= 1
|
| 692 |
+
new_event += [nn, dd]
|
| 693 |
+
elif name == "key_signature":
|
| 694 |
+
sf, mi = event[2:]
|
| 695 |
+
if not (-7 <= sf <= 7 and 0 <= mi <= 1): # invalid
|
| 696 |
+
continue
|
| 697 |
+
sf += 7
|
| 698 |
+
new_event += [sf, mi]
|
| 699 |
+
key_sigs.append(new_event)
|
| 700 |
+
|
| 701 |
+
if name in ["note", "time_signature", "key_signature"]:
|
| 702 |
+
key = tuple(new_event[:-2])
|
| 703 |
+
else:
|
| 704 |
+
key = tuple(new_event[:-1])
|
| 705 |
+
|
| 706 |
+
if c != -1:
|
| 707 |
+
if c not in channels:
|
| 708 |
+
channels.append(c)
|
| 709 |
+
tr_map = track_idx_map[c]
|
| 710 |
+
if track_idx not in tr_map:
|
| 711 |
+
tr_map[track_idx] = 0
|
| 712 |
+
|
| 713 |
+
if event[0] == "note": # to eliminate note overlap due to quantization
|
| 714 |
+
cp = tuple(new_event[4:6]) # channel pitch
|
| 715 |
+
if cp in last_notes:
|
| 716 |
+
last_note_key, last_note = last_notes[cp]
|
| 717 |
+
last_t = last_note[1] * 16 + last_note[2]
|
| 718 |
+
last_note[-1] = max(0, min(last_note[-1], t - last_t)) # modify duration
|
| 719 |
+
if last_note[-1] == 0:
|
| 720 |
+
event_list.pop(last_note_key)
|
| 721 |
+
last_notes[cp] = (key, new_event)
|
| 722 |
+
event_list[key] = new_event
|
| 723 |
+
event_list = list(event_list.values())
|
| 724 |
+
|
| 725 |
+
empty_channels = [c for c in channels if empty_channels[c]]
|
| 726 |
+
|
| 727 |
+
if remap_track_channel:
|
| 728 |
+
patch_channels = []
|
| 729 |
+
channels_count = 0
|
| 730 |
+
channels_map = {9: 9} if 9 in channels else {}
|
| 731 |
+
if remove_empty_channels:
|
| 732 |
+
channels = sorted(channels, key=lambda x: 1 if x in empty_channels else 0)
|
| 733 |
+
for c in channels:
|
| 734 |
+
if c == 9:
|
| 735 |
+
continue
|
| 736 |
+
channels_map[c] = channels_count
|
| 737 |
+
channels_count += 1
|
| 738 |
+
if channels_count == 9:
|
| 739 |
+
channels_count = 10
|
| 740 |
+
channels = list(channels_map.values())
|
| 741 |
+
|
| 742 |
+
track_count = 0
|
| 743 |
+
track_idx_map_order = [k for k, v in sorted(list(channels_map.items()), key=lambda x: x[1])]
|
| 744 |
+
for c in track_idx_map_order: # tracks not to remove
|
| 745 |
+
if remove_empty_channels and c in empty_channels:
|
| 746 |
+
continue
|
| 747 |
+
tr_map = track_idx_map[c]
|
| 748 |
+
for track_idx in tr_map:
|
| 749 |
+
note_tracks = channel_note_tracks[c]
|
| 750 |
+
if len(note_tracks) != 0 and track_idx not in note_tracks:
|
| 751 |
+
continue
|
| 752 |
+
track_count += 1
|
| 753 |
+
tr_map[track_idx] = track_count
|
| 754 |
+
for c in track_idx_map_order: # tracks to remove
|
| 755 |
+
if not (remove_empty_channels and c in empty_channels):
|
| 756 |
+
continue
|
| 757 |
+
tr_map = track_idx_map[c]
|
| 758 |
+
for track_idx in tr_map:
|
| 759 |
+
note_tracks = channel_note_tracks[c]
|
| 760 |
+
if not (len(note_tracks) != 0 and track_idx not in note_tracks):
|
| 761 |
+
continue
|
| 762 |
+
track_count += 1
|
| 763 |
+
tr_map[track_idx] = track_count
|
| 764 |
+
|
| 765 |
+
empty_channels = [channels_map[c] for c in empty_channels]
|
| 766 |
+
track_idx_dict = {}
|
| 767 |
+
key_sigs = []
|
| 768 |
+
key_signature_to_add = []
|
| 769 |
+
key_signature_to_remove = []
|
| 770 |
+
for event in event_list:
|
| 771 |
+
name = event[0]
|
| 772 |
+
track_idx = event[3]
|
| 773 |
+
if name == "note":
|
| 774 |
+
c = event[4]
|
| 775 |
+
event[4] = channels_map[c] # channel
|
| 776 |
+
event[3] = track_idx_map[c][track_idx] # track
|
| 777 |
+
track_idx_dict.setdefault(event[4], event[3])
|
| 778 |
+
# setdefault, so the track_idx is first of the channel
|
| 779 |
+
elif name in ["set_tempo", "time_signature"]:
|
| 780 |
+
event[3] = 0 # set track 0 for meta events
|
| 781 |
+
elif name == "key_signature":
|
| 782 |
+
new_channel_track_idxs = []
|
| 783 |
+
for c, tr_map in track_idx_map.items():
|
| 784 |
+
if track_idx in tr_map:
|
| 785 |
+
new_track_idx = tr_map[track_idx]
|
| 786 |
+
c = channels_map[c]
|
| 787 |
+
new_channel_track_idx = (c, new_track_idx)
|
| 788 |
+
if new_track_idx == 0:
|
| 789 |
+
continue
|
| 790 |
+
if new_channel_track_idx not in new_channel_track_idxs:
|
| 791 |
+
new_channel_track_idxs.append(new_channel_track_idx)
|
| 792 |
+
|
| 793 |
+
if len(new_channel_track_idxs) == 0:
|
| 794 |
+
if event[3] == 0: # keep key_signature on track 0 (meta)
|
| 795 |
+
key_sigs.append(event)
|
| 796 |
+
continue
|
| 797 |
+
event[3] = -1 # avoid remove same event
|
| 798 |
+
key_signature_to_remove.append(event) # empty track
|
| 799 |
+
continue
|
| 800 |
+
c, nt = new_channel_track_idxs[0]
|
| 801 |
+
event[3] = nt
|
| 802 |
+
key_sigs.append(event)
|
| 803 |
+
if c == 9:
|
| 804 |
+
event[4] = 7 # sf=0
|
| 805 |
+
for c, nt in new_channel_track_idxs[1:]:
|
| 806 |
+
new_event = [*event]
|
| 807 |
+
new_event[3] = nt
|
| 808 |
+
if c == 9:
|
| 809 |
+
new_event[4] = 7 # sf=0
|
| 810 |
+
key_sigs.append(new_event)
|
| 811 |
+
key_signature_to_add.append(new_event)
|
| 812 |
+
elif name == "control_change" or name == "patch_change":
|
| 813 |
+
c = event[4]
|
| 814 |
+
event[4] = channels_map[c] # channel
|
| 815 |
+
tr_map = track_idx_map[c]
|
| 816 |
+
# move the event to first track of the channel if it's original track is empty
|
| 817 |
+
note_tracks = channel_note_tracks[c]
|
| 818 |
+
if len(note_tracks) != 0 and track_idx not in note_tracks:
|
| 819 |
+
track_idx = channel_note_tracks[c][0]
|
| 820 |
+
new_track_idx = tr_map[track_idx]
|
| 821 |
+
event[3] = new_track_idx
|
| 822 |
+
if name == "patch_change" and event[4] not in patch_channels:
|
| 823 |
+
patch_channels.append(event[4])
|
| 824 |
+
for key_sig in key_signature_to_remove:
|
| 825 |
+
event_list.remove(key_sig)
|
| 826 |
+
event_list += key_signature_to_add
|
| 827 |
+
track_to_channels ={}
|
| 828 |
+
for c, tr_map in track_idx_map.items():
|
| 829 |
+
if c not in channels_map:
|
| 830 |
+
continue
|
| 831 |
+
c = channels_map[c]
|
| 832 |
+
for _, track_idx in tr_map.items():
|
| 833 |
+
track_to_channels.setdefault(track_idx, [])
|
| 834 |
+
cs = track_to_channels[track_idx]
|
| 835 |
+
if c not in cs:
|
| 836 |
+
cs.append(c)
|
| 837 |
+
|
| 838 |
+
if add_default_instr:
|
| 839 |
+
for c in channels:
|
| 840 |
+
if c not in patch_channels and c in track_idx_dict:
|
| 841 |
+
event_list.append(["patch_change", 0, 0, track_idx_dict[c], c, 0])
|
| 842 |
+
|
| 843 |
+
if len(key_sigs) == 0 or all([key_sig[4]==7 for key_sig in key_sigs]):
|
| 844 |
+
# detect key signature or fix the default key signature
|
| 845 |
+
root_key = self.detect_key_signature(note_key_hist)
|
| 846 |
+
if root_key is not None:
|
| 847 |
+
sf = self.key2sf(root_key, 0)
|
| 848 |
+
# print("detect_key_signature",sf)
|
| 849 |
+
if len(key_sigs) == 0:
|
| 850 |
+
for tr, cs in track_to_channels.items():
|
| 851 |
+
if remap_track_channel and tr == 0:
|
| 852 |
+
continue
|
| 853 |
+
new_event = ["key_signature", 0, 0, tr, (0 if (len(cs) == 1 and cs[0] == 9) else sf) + 7, 0]
|
| 854 |
+
event_list.append(new_event)
|
| 855 |
+
else:
|
| 856 |
+
for key_sig in key_sigs:
|
| 857 |
+
tr = key_sig[3]
|
| 858 |
+
if tr in track_to_channels:
|
| 859 |
+
cs = track_to_channels[tr]
|
| 860 |
+
if len(cs) == 1 and cs[0] == 9:
|
| 861 |
+
continue
|
| 862 |
+
key_sig[4] = sf + 7
|
| 863 |
+
key_sig[5] = 0
|
| 864 |
+
else:
|
| 865 |
+
# remove default key signature
|
| 866 |
+
for key_sig in key_sigs:
|
| 867 |
+
event_list.remove(key_sig)
|
| 868 |
+
|
| 869 |
+
events_name_order = ["time_signature", "key_signature", "set_tempo", "patch_change", "control_change", "note"]
|
| 870 |
+
events_name_order = {name: i for i, name in enumerate(events_name_order)}
|
| 871 |
+
events_order = lambda e: e[1:4] + [events_name_order[e[0]]]
|
| 872 |
+
event_list = sorted(event_list, key=events_order)
|
| 873 |
+
|
| 874 |
+
setup_events = {}
|
| 875 |
+
notes_in_setup = False
|
| 876 |
+
for i, event in enumerate(event_list): # optimise setup
|
| 877 |
+
new_event = [*event] # make copy of event
|
| 878 |
+
if event[0] not in ["note", "time_signature"]:
|
| 879 |
+
new_event[1] = 0
|
| 880 |
+
new_event[2] = 0
|
| 881 |
+
has_next = False
|
| 882 |
+
has_pre = False
|
| 883 |
+
if i < len(event_list) - 1:
|
| 884 |
+
next_event = event_list[i + 1]
|
| 885 |
+
has_next = event[1] + event[2] == next_event[1] + next_event[2]
|
| 886 |
+
if notes_in_setup and i > 0:
|
| 887 |
+
pre_event = event_list[i - 1]
|
| 888 |
+
has_pre = event[1] + event[2] == pre_event[1] + pre_event[2]
|
| 889 |
+
if (event[0] == "note" and not has_next) or (notes_in_setup and not has_pre):
|
| 890 |
+
event_list = sorted(setup_events.values(), key=events_order) + event_list[i:]
|
| 891 |
+
break
|
| 892 |
+
else:
|
| 893 |
+
if event[0] == "note":
|
| 894 |
+
notes_in_setup = True
|
| 895 |
+
if event[0] in ["note", "time_signature", "key_signature"]:
|
| 896 |
+
key = tuple([event[0]]+event[3:-2])
|
| 897 |
+
else:
|
| 898 |
+
key = tuple([event[0]]+event[3:-1])
|
| 899 |
+
setup_events[key] = new_event
|
| 900 |
+
|
| 901 |
+
last_t1 = 0
|
| 902 |
+
midi_seq = []
|
| 903 |
+
for event in event_list:
|
| 904 |
+
if remove_empty_channels and event[0] in ["control_change", "patch_change"] and event[4] in empty_channels:
|
| 905 |
+
continue
|
| 906 |
+
cur_t1 = event[1]
|
| 907 |
+
event[1] = event[1] - last_t1
|
| 908 |
+
tokens = self.event2tokens(event)
|
| 909 |
+
if not tokens:
|
| 910 |
+
continue
|
| 911 |
+
midi_seq.append(tokens)
|
| 912 |
+
last_t1 = cur_t1
|
| 913 |
+
|
| 914 |
+
if add_bos_eos:
|
| 915 |
+
bos = [self.bos_id] + [self.pad_id] * (self.max_token_seq - 1)
|
| 916 |
+
eos = [self.eos_id] + [self.pad_id] * (self.max_token_seq - 1)
|
| 917 |
+
midi_seq = [bos] + midi_seq + [eos]
|
| 918 |
+
return midi_seq
|
| 919 |
+
|
| 920 |
+
def event2tokens(self, event):
|
| 921 |
+
name = event[0]
|
| 922 |
+
params = event[1:]
|
| 923 |
+
if not all([0 <= params[i] < self.event_parameters[p] for i, p in enumerate(self.events[name])]):
|
| 924 |
+
return []
|
| 925 |
+
tokens = [self.event_ids[name]] + [self.parameter_ids[p][params[i]]
|
| 926 |
+
for i, p in enumerate(self.events[name])]
|
| 927 |
+
tokens += [self.pad_id] * (self.max_token_seq - len(tokens))
|
| 928 |
+
return tokens
|
| 929 |
+
|
| 930 |
+
def tokens2event(self, tokens):
|
| 931 |
+
if tokens[0] not in self.id_events:
|
| 932 |
+
return []
|
| 933 |
+
name = self.id_events[tokens[0]]
|
| 934 |
+
if len(tokens) <= len(self.events[name]):
|
| 935 |
+
return []
|
| 936 |
+
params = tokens[1:]
|
| 937 |
+
params = [params[i] - self.parameter_ids[p][0] for i, p in enumerate(self.events[name])]
|
| 938 |
+
if not all([0 <= params[i] < self.event_parameters[p] for i, p in enumerate(self.events[name])]):
|
| 939 |
+
return []
|
| 940 |
+
event = [name] + params
|
| 941 |
+
return event
|
| 942 |
+
|
| 943 |
+
def detokenize(self, midi_seq):
|
| 944 |
+
ticks_per_beat = 480
|
| 945 |
+
tracks_dict = {}
|
| 946 |
+
t1 = 0
|
| 947 |
+
for tokens in midi_seq:
|
| 948 |
+
if tokens[0] in self.id_events:
|
| 949 |
+
event = self.tokens2event(tokens)
|
| 950 |
+
if not event:
|
| 951 |
+
continue
|
| 952 |
+
name = event[0]
|
| 953 |
+
t1 += event[1]
|
| 954 |
+
t = t1 * 16 + event[2]
|
| 955 |
+
t = int(t * ticks_per_beat / 16)
|
| 956 |
+
track_idx = event[3]
|
| 957 |
+
event_new = [name, t]
|
| 958 |
+
if name == "note":
|
| 959 |
+
c, p, v, d = event[4:]
|
| 960 |
+
d = int(d * ticks_per_beat / 16)
|
| 961 |
+
event_new += [d, c, p, v]
|
| 962 |
+
elif name == "control_change" or name == "patch_change":
|
| 963 |
+
event_new += event[4:]
|
| 964 |
+
elif name == "set_tempo":
|
| 965 |
+
event_new += [self.bpm2tempo(event[4])]
|
| 966 |
+
elif name == "time_signature":
|
| 967 |
+
nn, dd = event[4:]
|
| 968 |
+
nn += 1
|
| 969 |
+
dd += 1
|
| 970 |
+
event_new += [nn, dd, 24, 8] # usually cc, bb = 24, 8
|
| 971 |
+
elif name == "key_signature":
|
| 972 |
+
sf, mi = event[4:]
|
| 973 |
+
sf -= 7
|
| 974 |
+
event_new += [sf, mi]
|
| 975 |
+
else: # should not go here
|
| 976 |
+
continue
|
| 977 |
+
if track_idx not in tracks_dict:
|
| 978 |
+
tracks_dict[track_idx] = []
|
| 979 |
+
tracks_dict[track_idx].append(event_new)
|
| 980 |
+
tracks = [tr for idx, tr in sorted(list(tracks_dict.items()), key=lambda it: it[0])]
|
| 981 |
+
|
| 982 |
+
for i in range(len(tracks)): # to eliminate note overlap
|
| 983 |
+
track = tracks[i]
|
| 984 |
+
track = sorted(track, key=lambda e: e[1])
|
| 985 |
+
last_note_t = {}
|
| 986 |
+
zero_len_notes = []
|
| 987 |
+
for e in reversed(track):
|
| 988 |
+
if e[0] == "note":
|
| 989 |
+
t, d, c, p = e[1:5]
|
| 990 |
+
key = (c, p)
|
| 991 |
+
if key in last_note_t:
|
| 992 |
+
d = min(d, max(last_note_t[key] - t, 0))
|
| 993 |
+
last_note_t[key] = t
|
| 994 |
+
e[2] = d
|
| 995 |
+
if d == 0:
|
| 996 |
+
zero_len_notes.append(e)
|
| 997 |
+
for e in zero_len_notes:
|
| 998 |
+
track.remove(e)
|
| 999 |
+
tracks[i] = track
|
| 1000 |
+
return [ticks_per_beat, *tracks]
|
| 1001 |
+
|
| 1002 |
+
def midi2img(self, midi_score):
|
| 1003 |
+
ticks_per_beat = midi_score[0]
|
| 1004 |
+
notes = []
|
| 1005 |
+
max_time = 1
|
| 1006 |
+
track_num = len(midi_score[1:])
|
| 1007 |
+
for track_idx, track in enumerate(midi_score[1:]):
|
| 1008 |
+
for event in track:
|
| 1009 |
+
t = round(16 * event[1] / ticks_per_beat)
|
| 1010 |
+
if event[0] == "note":
|
| 1011 |
+
d = max(1, round(16 * event[2] / ticks_per_beat))
|
| 1012 |
+
c, p = event[3:5]
|
| 1013 |
+
max_time = max(max_time, t + d + 1)
|
| 1014 |
+
notes.append((track_idx, c, p, t, d))
|
| 1015 |
+
img = np.zeros((128, max_time, 3), dtype=np.uint8)
|
| 1016 |
+
colors = {(i, j): np.random.randint(50, 256, 3) for i in range(track_num) for j in range(16)}
|
| 1017 |
+
for note in notes:
|
| 1018 |
+
tr, c, p, t, d = note
|
| 1019 |
+
img[p, t: t + d] = colors[(tr, c)]
|
| 1020 |
+
img = PIL.Image.fromarray(np.flip(img, 0))
|
| 1021 |
+
return img
|
| 1022 |
+
|
| 1023 |
+
def augment(self, midi_seq, max_pitch_shift=4, max_vel_shift=10, max_cc_val_shift=10, max_bpm_shift=10,
|
| 1024 |
+
max_track_shift=0, max_channel_shift=16):
|
| 1025 |
+
pitch_shift = random.randint(-max_pitch_shift, max_pitch_shift)
|
| 1026 |
+
vel_shift = random.randint(-max_vel_shift, max_vel_shift)
|
| 1027 |
+
cc_val_shift = random.randint(-max_cc_val_shift, max_cc_val_shift)
|
| 1028 |
+
bpm_shift = random.randint(-max_bpm_shift, max_bpm_shift)
|
| 1029 |
+
track_shift = random.randint(0, max_track_shift)
|
| 1030 |
+
channel_shift = random.randint(0, max_channel_shift)
|
| 1031 |
+
midi_seq_new = []
|
| 1032 |
+
key_signature_tokens = []
|
| 1033 |
+
track_to_channels = {}
|
| 1034 |
+
for tokens in midi_seq:
|
| 1035 |
+
tokens_new = [*tokens]
|
| 1036 |
+
if tokens[0] in self.id_events:
|
| 1037 |
+
name = self.id_events[tokens[0]]
|
| 1038 |
+
for i, pn in enumerate(self.events[name]):
|
| 1039 |
+
if pn == "track":
|
| 1040 |
+
tr = tokens[1 + i] - self.parameter_ids[pn][0]
|
| 1041 |
+
tr += track_shift
|
| 1042 |
+
tr = tr % self.event_parameters[pn]
|
| 1043 |
+
tokens_new[1 + i] = self.parameter_ids[pn][tr]
|
| 1044 |
+
elif pn == "channel":
|
| 1045 |
+
c = tokens[1 + i] - self.parameter_ids[pn][0]
|
| 1046 |
+
c0 = c
|
| 1047 |
+
c += channel_shift
|
| 1048 |
+
c = c % self.event_parameters[pn]
|
| 1049 |
+
if c0 == 9:
|
| 1050 |
+
c = 9
|
| 1051 |
+
elif c == 9:
|
| 1052 |
+
c = (9 + channel_shift) % self.event_parameters[pn]
|
| 1053 |
+
tokens_new[1 + i] = self.parameter_ids[pn][c]
|
| 1054 |
+
|
| 1055 |
+
if name == "note":
|
| 1056 |
+
tr = tokens[3] - self.parameter_ids["track"][0]
|
| 1057 |
+
c = tokens[4] - self.parameter_ids["channel"][0]
|
| 1058 |
+
p = tokens[5] - self.parameter_ids["pitch"][0]
|
| 1059 |
+
v = tokens[6] - self.parameter_ids["velocity"][0]
|
| 1060 |
+
if c != 9: # no shift for drums
|
| 1061 |
+
p += pitch_shift
|
| 1062 |
+
if not 0 <= p < 128:
|
| 1063 |
+
return midi_seq
|
| 1064 |
+
v += vel_shift
|
| 1065 |
+
v = max(1, min(127, v))
|
| 1066 |
+
tokens_new[5] = self.parameter_ids["pitch"][p]
|
| 1067 |
+
tokens_new[6] = self.parameter_ids["velocity"][v]
|
| 1068 |
+
track_to_channels.setdefault(tr, [])
|
| 1069 |
+
cs = track_to_channels[tr]
|
| 1070 |
+
if c not in cs:
|
| 1071 |
+
cs.append(c)
|
| 1072 |
+
elif name == "control_change":
|
| 1073 |
+
cc = tokens[5] - self.parameter_ids["controller"][0]
|
| 1074 |
+
val = tokens[6] - self.parameter_ids["value"][0]
|
| 1075 |
+
if cc in [1, 2, 7, 11]:
|
| 1076 |
+
val += cc_val_shift
|
| 1077 |
+
val = max(1, min(127, val))
|
| 1078 |
+
tokens_new[6] = self.parameter_ids["value"][val]
|
| 1079 |
+
elif name == "set_tempo":
|
| 1080 |
+
bpm = tokens[4] - self.parameter_ids["bpm"][0]
|
| 1081 |
+
bpm += bpm_shift
|
| 1082 |
+
bpm = max(1, min(383, bpm))
|
| 1083 |
+
tokens_new[4] = self.parameter_ids["bpm"][bpm]
|
| 1084 |
+
elif name == "key_signature":
|
| 1085 |
+
sf = tokens[4] - self.parameter_ids["sf"][0]
|
| 1086 |
+
mi = tokens[5] - self.parameter_ids["mi"][0]
|
| 1087 |
+
sf -= 7
|
| 1088 |
+
k = self.sf2key(sf)
|
| 1089 |
+
k = (k + pitch_shift) % 12
|
| 1090 |
+
sf = self.key2sf(k, mi)
|
| 1091 |
+
sf += 7
|
| 1092 |
+
tokens_new[4] = self.parameter_ids["sf"][sf]
|
| 1093 |
+
tokens_new[5] = self.parameter_ids["mi"][mi]
|
| 1094 |
+
key_signature_tokens.append(tokens_new)
|
| 1095 |
+
midi_seq_new.append(tokens_new)
|
| 1096 |
+
for tokens in key_signature_tokens:
|
| 1097 |
+
tr = tokens[3] - self.parameter_ids["track"][0]
|
| 1098 |
+
if tr in track_to_channels:
|
| 1099 |
+
cs = track_to_channels[tr]
|
| 1100 |
+
if len(cs) == 1 and cs[0] == 9:
|
| 1101 |
+
tokens[4] = self.parameter_ids["sf"][7] # sf=0
|
| 1102 |
+
return midi_seq_new
|
| 1103 |
+
|
| 1104 |
+
def check_quality(self, midi_seq, alignment_min=0.3, tonality_min=0.8, piano_max=0.7, notes_bandwidth_min=3,
|
| 1105 |
+
notes_density_max=50, notes_density_min=2.5, total_notes_max=20000, total_notes_min=256,
|
| 1106 |
+
note_window_size=16):
|
| 1107 |
+
total_notes = 0
|
| 1108 |
+
channels = []
|
| 1109 |
+
time_hist = [0] * 16
|
| 1110 |
+
note_windows = {}
|
| 1111 |
+
notes_sametime = []
|
| 1112 |
+
notes_density_list = []
|
| 1113 |
+
tonality_list = []
|
| 1114 |
+
notes_bandwidth_list = []
|
| 1115 |
+
instruments = {}
|
| 1116 |
+
piano_channels = []
|
| 1117 |
+
abs_t1 = 0
|
| 1118 |
+
last_t = 0
|
| 1119 |
+
for tsi, tokens in enumerate(midi_seq):
|
| 1120 |
+
event = self.tokens2event(tokens)
|
| 1121 |
+
if not event:
|
| 1122 |
+
continue
|
| 1123 |
+
t1, t2, tr = event[1:4]
|
| 1124 |
+
abs_t1 += t1
|
| 1125 |
+
t = abs_t1 * 16 + t2
|
| 1126 |
+
c = None
|
| 1127 |
+
if event[0] == "note":
|
| 1128 |
+
c, p, v, d = event[4:]
|
| 1129 |
+
total_notes += 1
|
| 1130 |
+
time_hist[t2] += 1
|
| 1131 |
+
if c != 9: # ignore drum channel
|
| 1132 |
+
if c not in instruments:
|
| 1133 |
+
instruments[c] = 0
|
| 1134 |
+
if c not in piano_channels:
|
| 1135 |
+
piano_channels.append(c)
|
| 1136 |
+
note_windows.setdefault(abs_t1 // note_window_size, []).append(p)
|
| 1137 |
+
if last_t != t:
|
| 1138 |
+
notes_sametime = [(et, p_) for et, p_ in notes_sametime if et > last_t]
|
| 1139 |
+
notes_sametime_p = [p_ for _, p_ in notes_sametime]
|
| 1140 |
+
if len(notes_sametime) > 0:
|
| 1141 |
+
notes_bandwidth_list.append(max(notes_sametime_p) - min(notes_sametime_p))
|
| 1142 |
+
notes_sametime.append((t + d - 1, p))
|
| 1143 |
+
elif event[0] == "patch_change":
|
| 1144 |
+
c, p = event[4:]
|
| 1145 |
+
instruments[c] = p
|
| 1146 |
+
if p == 0 and c not in piano_channels:
|
| 1147 |
+
piano_channels.append(c)
|
| 1148 |
+
if c is not None and c not in channels:
|
| 1149 |
+
channels.append(c)
|
| 1150 |
+
last_t = t
|
| 1151 |
+
reasons = []
|
| 1152 |
+
if total_notes < total_notes_min:
|
| 1153 |
+
reasons.append("total_min")
|
| 1154 |
+
if total_notes > total_notes_max:
|
| 1155 |
+
reasons.append("total_max")
|
| 1156 |
+
if len(note_windows) == 0 and total_notes > 0:
|
| 1157 |
+
reasons.append("drum_only")
|
| 1158 |
+
if reasons:
|
| 1159 |
+
return False, reasons
|
| 1160 |
+
time_hist = sorted(time_hist, reverse=True)
|
| 1161 |
+
alignment = sum(time_hist[:2]) / total_notes
|
| 1162 |
+
for notes in note_windows.values():
|
| 1163 |
+
key_hist = [0] * 12
|
| 1164 |
+
for p in notes:
|
| 1165 |
+
key_hist[p % 12] += 1
|
| 1166 |
+
key_hist = sorted(key_hist, reverse=True)
|
| 1167 |
+
tonality_list.append(sum(key_hist[:7]) / len(notes))
|
| 1168 |
+
notes_density_list.append(len(notes) / note_window_size)
|
| 1169 |
+
tonality_list = sorted(tonality_list)
|
| 1170 |
+
tonality = sum(tonality_list) / len(tonality_list)
|
| 1171 |
+
notes_bandwidth = sum(notes_bandwidth_list) / len(notes_bandwidth_list) if notes_bandwidth_list else 0
|
| 1172 |
+
notes_density = max(notes_density_list) if notes_density_list else 0
|
| 1173 |
+
piano_ratio = len(piano_channels) / len(channels)
|
| 1174 |
+
if len(channels) <= 3: # ignore piano threshold if it is a piano solo midi
|
| 1175 |
+
piano_max = 1
|
| 1176 |
+
if alignment < alignment_min: # check weather the notes align to the bars (because some midi files are recorded)
|
| 1177 |
+
reasons.append("alignment")
|
| 1178 |
+
if tonality < tonality_min: # check whether the music is tonal
|
| 1179 |
+
reasons.append("tonality")
|
| 1180 |
+
if notes_bandwidth < notes_bandwidth_min: # check whether music is melodic line only
|
| 1181 |
+
reasons.append("bandwidth")
|
| 1182 |
+
if not notes_density_min < notes_density < notes_density_max:
|
| 1183 |
+
reasons.append("density")
|
| 1184 |
+
if piano_ratio > piano_max: # check whether most instruments is piano (because some midi files don't have instruments assigned correctly)
|
| 1185 |
+
reasons.append("piano")
|
| 1186 |
+
return not reasons, reasons
|
| 1187 |
+
|
| 1188 |
+
|
| 1189 |
+
class MIDITokenizer:
|
| 1190 |
+
def __new__(cls, version="v2"):
|
| 1191 |
+
if version == "v1":
|
| 1192 |
+
return MIDITokenizerV1()
|
| 1193 |
+
elif version == "v2":
|
| 1194 |
+
return MIDITokenizerV2()
|
| 1195 |
+
else:
|
| 1196 |
+
raise ValueError(f"Unsupported version: {version}")
|