kryptodogg commited on
Commit ·
6b214f0
1
Parent(s): 5629fe6
Add model card and manifest for Synesthesia ONNX models
Browse files- README.md +92 -0
- manifest.json +76 -0
README.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
tags:
|
| 6 |
+
- music-generation
|
| 7 |
+
- audio
|
| 8 |
+
- onnx
|
| 9 |
+
- directml
|
| 10 |
+
- synesthesia
|
| 11 |
+
- magenta
|
| 12 |
+
- performance-rnn
|
| 13 |
+
- musicvae
|
| 14 |
+
- ddsp
|
| 15 |
+
library_name: onnxruntime
|
| 16 |
+
pipeline_tag: audio-to-audio
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
# Synesthesia — AI Music Models
|
| 20 |
+
|
| 21 |
+
ONNX model weights for [Synesthesia](https://github.com/kryptodogg/synesthesia), a cyber-physical synthesizer and 3D/4D signal workstation.
|
| 22 |
+
|
| 23 |
+
## Models
|
| 24 |
+
|
| 25 |
+
| Model | Source | Format | Size | Task |
|
| 26 |
+
|-------|--------|--------|------|------|
|
| 27 |
+
| Performance RNN | Magenta | ONNX | ~20MB | Note-level MIDI generation |
|
| 28 |
+
| MusicVAE (Encoder) | Magenta | ONNX | ~80MB | Latent music encoding |
|
| 29 |
+
| MusicVAE (Decoder) | Magenta | ONNX | ~80MB | Latent music decoding |
|
| 30 |
+
| DDSP (Encoder) | Magenta | ONNX | ~30MB | Audio → harmonic params |
|
| 31 |
+
| DDSP (Decoder) | Magenta | ONNX | ~30MB | Harmonic params → audio |
|
| 32 |
+
| SpectroStream (Encoder) | Magenta RT | ONNX | TBD | Audio → spectral tokens |
|
| 33 |
+
| SpectroStream (Decoder) | Magenta RT | ONNX | TBD | Spectral tokens → audio |
|
| 34 |
+
| MusicCoCa (Text) | Google | ONNX | TBD | Text → music embedding |
|
| 35 |
+
| MusicCoCa (Audio) | Google | ONNX | TBD | Audio → music embedding |
|
| 36 |
+
| Gemma-3N | Google | ONNX | TBD | Vision → mood/energy JSON |
|
| 37 |
+
|
| 38 |
+
## Runtime
|
| 39 |
+
|
| 40 |
+
All models run locally via **ONNX Runtime with DirectML** (GPU acceleration on Windows).
|
| 41 |
+
|
| 42 |
+
```toml
|
| 43 |
+
# Cargo.toml
|
| 44 |
+
[dependencies]
|
| 45 |
+
ort = { version = "2", features = ["directml"] }
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
## Download
|
| 49 |
+
|
| 50 |
+
```python
|
| 51 |
+
from huggingface_hub import snapshot_download
|
| 52 |
+
snapshot_download("Ashiedu/Synesthesia", local_dir="./models")
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
```rust
|
| 56 |
+
// Rust (using hf-hub crate)
|
| 57 |
+
use hf_hub::api::sync::Api;
|
| 58 |
+
let api = Api::new().unwrap();
|
| 59 |
+
let repo = api.model("Ashiedu/Synesthesia".to_string());
|
| 60 |
+
let model_path = repo.get("perfrnn/model.onnx").unwrap();
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
## Structure
|
| 64 |
+
|
| 65 |
+
```
|
| 66 |
+
├── perfrnn/
|
| 67 |
+
│ └── model.onnx
|
| 68 |
+
├── musicvae/
|
| 69 |
+
│ ├── encoder.onnx
|
| 70 |
+
│ └── decoder.onnx
|
| 71 |
+
├── ddsp/
|
| 72 |
+
│ ├── encoder.onnx
|
| 73 |
+
│ └── decoder.onnx
|
| 74 |
+
├── spectrostream/
|
| 75 |
+
│ ├── encoder.onnx
|
| 76 |
+
│ └── decoder.onnx
|
| 77 |
+
├── musiccoca/
|
| 78 |
+
│ ├── text.onnx
|
| 79 |
+
│ └── audio.onnx
|
| 80 |
+
├── gemma3n/
|
| 81 |
+
│ └── model.onnx
|
| 82 |
+
└── manifest.json
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
## License
|
| 86 |
+
|
| 87 |
+
Apache 2.0 — model weights may have additional upstream licenses (see individual model directories).
|
| 88 |
+
|
| 89 |
+
## Links
|
| 90 |
+
|
| 91 |
+
- **GitHub:** [kryptodogg/synesthesia](https://github.com/kryptodogg/synesthesia)
|
| 92 |
+
- **Roadmap:** See GitHub Issues with `lane:ml` label
|
manifest.json
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "0.1.0",
|
| 3 |
+
"runtime": "onnxruntime-directml",
|
| 4 |
+
"models": {
|
| 5 |
+
"perfrnn": {
|
| 6 |
+
"path": "perfrnn/model.onnx",
|
| 7 |
+
"task": "midi-generation",
|
| 8 |
+
"source": "magenta/performance-rnn",
|
| 9 |
+
"status": "pending",
|
| 10 |
+
"sha256": null
|
| 11 |
+
},
|
| 12 |
+
"musicvae_encoder": {
|
| 13 |
+
"path": "musicvae/encoder.onnx",
|
| 14 |
+
"task": "music-encoding",
|
| 15 |
+
"source": "magenta/music-vae",
|
| 16 |
+
"status": "pending",
|
| 17 |
+
"sha256": null
|
| 18 |
+
},
|
| 19 |
+
"musicvae_decoder": {
|
| 20 |
+
"path": "musicvae/decoder.onnx",
|
| 21 |
+
"task": "music-decoding",
|
| 22 |
+
"source": "magenta/music-vae",
|
| 23 |
+
"status": "pending",
|
| 24 |
+
"sha256": null
|
| 25 |
+
},
|
| 26 |
+
"ddsp_encoder": {
|
| 27 |
+
"path": "ddsp/encoder.onnx",
|
| 28 |
+
"task": "audio-analysis",
|
| 29 |
+
"source": "magenta/ddsp",
|
| 30 |
+
"status": "pending",
|
| 31 |
+
"sha256": null
|
| 32 |
+
},
|
| 33 |
+
"ddsp_decoder": {
|
| 34 |
+
"path": "ddsp/decoder.onnx",
|
| 35 |
+
"task": "audio-synthesis",
|
| 36 |
+
"source": "magenta/ddsp",
|
| 37 |
+
"status": "pending",
|
| 38 |
+
"sha256": null
|
| 39 |
+
},
|
| 40 |
+
"spectrostream_encoder": {
|
| 41 |
+
"path": "spectrostream/encoder.onnx",
|
| 42 |
+
"task": "audio-tokenization",
|
| 43 |
+
"source": "magenta-rt/spectrostream",
|
| 44 |
+
"status": "pending",
|
| 45 |
+
"sha256": null
|
| 46 |
+
},
|
| 47 |
+
"spectrostream_decoder": {
|
| 48 |
+
"path": "spectrostream/decoder.onnx",
|
| 49 |
+
"task": "audio-detokenization",
|
| 50 |
+
"source": "magenta-rt/spectrostream",
|
| 51 |
+
"status": "pending",
|
| 52 |
+
"sha256": null
|
| 53 |
+
},
|
| 54 |
+
"musiccoca_text": {
|
| 55 |
+
"path": "musiccoca/text.onnx",
|
| 56 |
+
"task": "text-embedding",
|
| 57 |
+
"source": "google/musiccoca",
|
| 58 |
+
"status": "pending",
|
| 59 |
+
"sha256": null
|
| 60 |
+
},
|
| 61 |
+
"musiccoca_audio": {
|
| 62 |
+
"path": "musiccoca/audio.onnx",
|
| 63 |
+
"task": "audio-embedding",
|
| 64 |
+
"source": "google/musiccoca",
|
| 65 |
+
"status": "pending",
|
| 66 |
+
"sha256": null
|
| 67 |
+
},
|
| 68 |
+
"gemma3n": {
|
| 69 |
+
"path": "gemma3n/model.onnx",
|
| 70 |
+
"task": "vision-mood",
|
| 71 |
+
"source": "google/gemma-3n",
|
| 72 |
+
"status": "pending",
|
| 73 |
+
"sha256": null
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
}
|