kryptodogg commited on
Commit
6b214f0
·
1 Parent(s): 5629fe6

Add model card and manifest for Synesthesia ONNX models

Browse files
Files changed (2) hide show
  1. README.md +92 -0
  2. manifest.json +76 -0
README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ tags:
6
+ - music-generation
7
+ - audio
8
+ - onnx
9
+ - directml
10
+ - synesthesia
11
+ - magenta
12
+ - performance-rnn
13
+ - musicvae
14
+ - ddsp
15
+ library_name: onnxruntime
16
+ pipeline_tag: audio-to-audio
17
+ ---
18
+
19
+ # Synesthesia — AI Music Models
20
+
21
+ ONNX model weights for [Synesthesia](https://github.com/kryptodogg/synesthesia), a cyber-physical synthesizer and 3D/4D signal workstation.
22
+
23
+ ## Models
24
+
25
+ | Model | Source | Format | Size | Task |
26
+ |-------|--------|--------|------|------|
27
+ | Performance RNN | Magenta | ONNX | ~20MB | Note-level MIDI generation |
28
+ | MusicVAE (Encoder) | Magenta | ONNX | ~80MB | Latent music encoding |
29
+ | MusicVAE (Decoder) | Magenta | ONNX | ~80MB | Latent music decoding |
30
+ | DDSP (Encoder) | Magenta | ONNX | ~30MB | Audio → harmonic params |
31
+ | DDSP (Decoder) | Magenta | ONNX | ~30MB | Harmonic params → audio |
32
+ | SpectroStream (Encoder) | Magenta RT | ONNX | TBD | Audio → spectral tokens |
33
+ | SpectroStream (Decoder) | Magenta RT | ONNX | TBD | Spectral tokens → audio |
34
+ | MusicCoCa (Text) | Google | ONNX | TBD | Text → music embedding |
35
+ | MusicCoCa (Audio) | Google | ONNX | TBD | Audio → music embedding |
36
+ | Gemma-3N | Google | ONNX | TBD | Vision → mood/energy JSON |
37
+
38
+ ## Runtime
39
+
40
+ All models run locally via **ONNX Runtime with DirectML** (GPU acceleration on Windows).
41
+
42
+ ```toml
43
+ # Cargo.toml
44
+ [dependencies]
45
+ ort = { version = "2", features = ["directml"] }
46
+ ```
47
+
48
+ ## Download
49
+
50
+ ```python
51
+ from huggingface_hub import snapshot_download
52
+ snapshot_download("Ashiedu/Synesthesia", local_dir="./models")
53
+ ```
54
+
55
+ ```rust
56
+ // Rust (using hf-hub crate)
57
+ use hf_hub::api::sync::Api;
58
+ let api = Api::new().unwrap();
59
+ let repo = api.model("Ashiedu/Synesthesia".to_string());
60
+ let model_path = repo.get("perfrnn/model.onnx").unwrap();
61
+ ```
62
+
63
+ ## Structure
64
+
65
+ ```
66
+ ├── perfrnn/
67
+ │ └── model.onnx
68
+ ├── musicvae/
69
+ │ ├── encoder.onnx
70
+ │ └── decoder.onnx
71
+ ├── ddsp/
72
+ │ ├── encoder.onnx
73
+ │ └── decoder.onnx
74
+ ├── spectrostream/
75
+ │ ├── encoder.onnx
76
+ │ └── decoder.onnx
77
+ ├── musiccoca/
78
+ │ ├── text.onnx
79
+ │ └── audio.onnx
80
+ ├── gemma3n/
81
+ │ └── model.onnx
82
+ └── manifest.json
83
+ ```
84
+
85
+ ## License
86
+
87
+ Apache 2.0 — model weights may have additional upstream licenses (see individual model directories).
88
+
89
+ ## Links
90
+
91
+ - **GitHub:** [kryptodogg/synesthesia](https://github.com/kryptodogg/synesthesia)
92
+ - **Roadmap:** See GitHub Issues with `lane:ml` label
manifest.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "runtime": "onnxruntime-directml",
4
+ "models": {
5
+ "perfrnn": {
6
+ "path": "perfrnn/model.onnx",
7
+ "task": "midi-generation",
8
+ "source": "magenta/performance-rnn",
9
+ "status": "pending",
10
+ "sha256": null
11
+ },
12
+ "musicvae_encoder": {
13
+ "path": "musicvae/encoder.onnx",
14
+ "task": "music-encoding",
15
+ "source": "magenta/music-vae",
16
+ "status": "pending",
17
+ "sha256": null
18
+ },
19
+ "musicvae_decoder": {
20
+ "path": "musicvae/decoder.onnx",
21
+ "task": "music-decoding",
22
+ "source": "magenta/music-vae",
23
+ "status": "pending",
24
+ "sha256": null
25
+ },
26
+ "ddsp_encoder": {
27
+ "path": "ddsp/encoder.onnx",
28
+ "task": "audio-analysis",
29
+ "source": "magenta/ddsp",
30
+ "status": "pending",
31
+ "sha256": null
32
+ },
33
+ "ddsp_decoder": {
34
+ "path": "ddsp/decoder.onnx",
35
+ "task": "audio-synthesis",
36
+ "source": "magenta/ddsp",
37
+ "status": "pending",
38
+ "sha256": null
39
+ },
40
+ "spectrostream_encoder": {
41
+ "path": "spectrostream/encoder.onnx",
42
+ "task": "audio-tokenization",
43
+ "source": "magenta-rt/spectrostream",
44
+ "status": "pending",
45
+ "sha256": null
46
+ },
47
+ "spectrostream_decoder": {
48
+ "path": "spectrostream/decoder.onnx",
49
+ "task": "audio-detokenization",
50
+ "source": "magenta-rt/spectrostream",
51
+ "status": "pending",
52
+ "sha256": null
53
+ },
54
+ "musiccoca_text": {
55
+ "path": "musiccoca/text.onnx",
56
+ "task": "text-embedding",
57
+ "source": "google/musiccoca",
58
+ "status": "pending",
59
+ "sha256": null
60
+ },
61
+ "musiccoca_audio": {
62
+ "path": "musiccoca/audio.onnx",
63
+ "task": "audio-embedding",
64
+ "source": "google/musiccoca",
65
+ "status": "pending",
66
+ "sha256": null
67
+ },
68
+ "gemma3n": {
69
+ "path": "gemma3n/model.onnx",
70
+ "task": "vision-mood",
71
+ "source": "google/gemma-3n",
72
+ "status": "pending",
73
+ "sha256": null
74
+ }
75
+ }
76
+ }