Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

.gitattributes +4 -0
README.md +52 -0
config.json +49 -0
generation_config.json +4 -0
onnx/conditional_decoder_q4f16.onnx +3 -0
onnx/conditional_decoder_q4f16.onnx_data +3 -0
onnx/embed_tokens_q4f16.onnx +3 -0
onnx/embed_tokens_q4f16.onnx_data +3 -0
onnx/language_model_q4f16.onnx +3 -0
onnx/language_model_q4f16.onnx_data +3 -0
onnx/speech_encoder_q4f16.onnx +3 -0
onnx/speech_encoder_q4f16.onnx_data +3 -0
preprocessor_config.json +5 -0
tokenizer.json +0 -0
tokenizer_config.json +15 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+onnx/conditional_decoder_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/embed_tokens_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/language_model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/speech_encoder_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,52 @@

+---
+license: mit
+base_model: ResembleAI/chatterbox-turbo-ONNX
+tags:
+  - text-to-speech
+  - tts
+  - onnx
+  - webgpu
+  - transformers.js
+---
+# Chatterbox Turbo - WebGPU Compatible
+This is a WebGPU-compatible version of [ResembleAI/chatterbox-turbo-ONNX](https://huggingface.co/ResembleAI/chatterbox-turbo-ONNX).
+## Changes from Original
+The original model contains `int64` Cast operations and tensors that WebGPU cannot execute.
+This version converts all `int64` operations to `int32`, enabling direct WebGPU inference.
+### Modifications Made:
+- **speech_encoder**: 26 Cast ops, 36 initializers, 1 I/O type converted
+- **conditional_decoder**: 3 Cast ops, 37 initializers, 1 I/O type converted
+- **language_model**: 2 I/O types converted
+- **embed_tokens**: 5 initializers, 1 I/O type converted
+## Usage with Transformers.js
+```javascript
+import { AutoModel, AutoProcessor } from '@huggingface/transformers';
+const model = await AutoModel.from_pretrained('spacekaren/chatterbox-turbo-webgpu', {
+  device: 'webgpu',
+  dtype: 'q4f16',
+});
+const processor = await AutoProcessor.from_pretrained('spacekaren/chatterbox-turbo-webgpu');
+```
+## Model Size
+- **Total**: ~539 MB (q4f16 quantization)
+- Same architecture as original, just int64→int32 conversion
+## License
+MIT (same as original)
+## Credits
+- Original model: [ResembleAI/chatterbox-turbo-ONNX](https://huggingface.co/ResembleAI/chatterbox-turbo-ONNX)
+- Conversion script: [local.core/scripts/convert_int64_to_int32.py](https://github.com/anthropics/lama)

config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "model_type": "chatterbox",
+  "text_config": {
+    "activation_function": "gelu_new",
+    "architectures": [
+      "GPT2LMHeadModel"
+    ],
+    "attn_pdrop": 0.1,
+    "bos_token_id": 50256,
+    "embd_pdrop": 0.1,
+    "eos_token_id": 50256,
+    "initializer_range": 0.02,
+    "layer_norm_epsilon": 1e-05,
+    "model_type": "gpt2",
+    "n_ctx": 8196,
+    "n_embd": 1024,
+    "n_head": 16,
+    "n_inner": null,
+    "n_layer": 24,
+    "n_positions": 8196,
+    "n_special": 0,
+    "predict_special_tokens": true,
+    "reorder_and_upcast_attn": false,
+    "resid_pdrop": 0.1,
+    "scale_attn_by_inverse_layer_idx": false,
+    "scale_attn_weights": true,
+    "summary_activation": null,
+    "summary_first_dropout": 0.1,
+    "summary_proj_to_labels": true,
+    "summary_type": "cls_index",
+    "summary_use_proj": true,
+    "task_specific_params": {
+      "text-generation": {
+        "do_sample": true,
+        "max_length": 50
+      }
+    },
+    "transformers_version": "5.0.0.dev0",
+    "use_cache": true,
+    "vocab_size": 6563
+  },
+  "transformers.js_config": {
+    "use_external_data_format": true,
+    "kv_cache_dtype": {
+      "fp16": "float16",
+      "q4f16": "float16"
+    }
+  }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "eos_token_id": 6562,
+  "repetition_penalty": 1.2
+}

onnx/conditional_decoder_q4f16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22572a6001425d5d067a78902472a26f5f585b39ec5ce4d9743f0bebcc6503e0
+size 2393626

onnx/conditional_decoder_q4f16.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e53cc7cc05e72957797c620e1bf0c6d6b909df937de4756c6e534d50854ca3e
+size 162996136

onnx/embed_tokens_q4f16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4135269ccfadaa5e084cec0ccbbd8d16a22121e0b3da771420df0644455f24ef
+size 2528

onnx/embed_tokens_q4f16.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37e96447b5f3ef46f46fb35cc599cfcb95b277e5528cc0c89bab78169da620ef
+size 33648688

onnx/language_model_q4f16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37573106c17cee1fd1af841b7cf39dc6ecfe65c7331380a90cb97e3a76002972
+size 276803

onnx/language_model_q4f16.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f084f21f977e365e76e622d2958c309ff894537f7b7133ac7fc5ce8fc055d6f9
+size 183981430

onnx/speech_encoder_q4f16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52d6d01762a013c4e2e17c31717e638c45ab869b2be7d7aac1b4be54b2f9cbef
+size 1217363

onnx/speech_encoder_q4f16.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c017ab57887418e0609afdef52b0cbc17f43cb304e78af44300c0292cd2b2fa9
+size 177289008

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "feature_extractor_type": "ChatterboxFeatureExtractor",
+  "processor_class": "ChatterboxProcessor",
+  "sampling_rate": 24000
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "add_prefix_space": false,
+  "additional_special_tokens": null,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 1024,
+  "model_specific_special_tokens": {},
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}