Text-to-Speech
Transformers
Safetensors
Arabic
moss_tts_local
feature-extraction
voice-cloning
custom_code
sglang-omni
moss-tts
moss-tts-local
lora
saudi-arabic
Instructions to use Rabe3/Moss-Saudi-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Rabe3/Moss-Saudi-2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-to-speech", model="Rabe3/Moss-Saudi-2", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Rabe3/Moss-Saudi-2", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "architectures": [ | |
| "MossTTSLocalModel" | |
| ], | |
| "attn_implementation": "flash_attention_2", | |
| "audio_assistant_gen_slot_token_id": 151656, | |
| "audio_assistant_slot_token_id": 151656, | |
| "audio_codebook_sizes": [ | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024 | |
| ], | |
| "audio_end_token_id": 151670, | |
| "audio_pad_code": 1024, | |
| "audio_pad_token_id": 1024, | |
| "audio_start_token_id": 151669, | |
| "audio_tokenizer_name_or_path": "OpenMOSS-Team/MOSS-Audio-Tokenizer-v2", | |
| "audio_user_slot_token_id": 151654, | |
| "audio_vocab_size": 1024, | |
| "auto_map": { | |
| "AutoConfig": "configuration_moss_tts.MossTTSLocalConfig", | |
| "AutoModel": "modeling_moss_tts.MossTTSLocalModel", | |
| "AutoProcessor": "processing_moss_tts.MossTTSLocalProcessor" | |
| }, | |
| "dtype": "bfloat16", | |
| "gpt2_config": { | |
| "activation_function": "silu", | |
| "add_cross_attention": false, | |
| "attn_pdrop": 0.0, | |
| "bos_token_id": null, | |
| "cross_attention_hidden_size": null, | |
| "decoder_start_token_id": null, | |
| "embd_pdrop": 0.0, | |
| "eos_token_id": 151645, | |
| "finetuning_task": null, | |
| "initializer_range": 0.02, | |
| "is_decoder": false, | |
| "layer_norm_epsilon": 1e-06, | |
| "model_type": "gpt2", | |
| "n_ctx": 10240, | |
| "n_embd": 2560, | |
| "n_head": 32, | |
| "n_inner": 9728, | |
| "n_layer": 1, | |
| "n_positions": 10240, | |
| "pad_token_id": null, | |
| "position_embedding_type": "rope", | |
| "prefix": null, | |
| "pruned_heads": {}, | |
| "reorder_and_upcast_attn": false, | |
| "resid_pdrop": 0.0, | |
| "rope_base": 1000000.0, | |
| "scale_attn_by_inverse_layer_idx": false, | |
| "scale_attn_weights": true, | |
| "sep_token_id": null, | |
| "summary_activation": null, | |
| "summary_first_dropout": 0.1, | |
| "summary_proj_to_labels": true, | |
| "summary_type": "cls_index", | |
| "summary_use_proj": true, | |
| "task_specific_params": null, | |
| "tf_legacy_loss": false, | |
| "tie_encoder_decoder": false, | |
| "tie_word_embeddings": true, | |
| "tokenizer_class": null, | |
| "torchscript": false, | |
| "use_bfloat16": false, | |
| "use_cache": true, | |
| "vocab_size": 151936 | |
| }, | |
| "hidden_size": 2560, | |
| "im_end_token_id": 151645, | |
| "im_start_token_id": 151644, | |
| "initializer_range": 0.02, | |
| "language_config": { | |
| "add_cross_attention": false, | |
| "architectures": [ | |
| "Qwen3ForCausalLM" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 151643, | |
| "cross_attention_hidden_size": null, | |
| "decoder_start_token_id": null, | |
| "dtype": "bfloat16", | |
| "eos_token_id": 151643, | |
| "finetuning_task": null, | |
| "gradient_checkpointing_use_reentrant": false, | |
| "head_dim": 128, | |
| "hidden_act": "silu", | |
| "hidden_size": 2560, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 9728, | |
| "is_decoder": false, | |
| "layer_types": [ | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention" | |
| ], | |
| "max_position_embeddings": 32768, | |
| "max_window_layers": 36, | |
| "model_type": "qwen3", | |
| "num_attention_heads": 32, | |
| "num_hidden_layers": 36, | |
| "num_key_value_heads": 8, | |
| "pad_token_id": 151643, | |
| "prefix": null, | |
| "pruned_heads": {}, | |
| "rms_norm_eps": 1e-06, | |
| "rope_parameters": { | |
| "rope_theta": 1000000, | |
| "rope_type": "default" | |
| }, | |
| "sep_token_id": null, | |
| "sliding_window": null, | |
| "task_specific_params": null, | |
| "tf_legacy_loss": false, | |
| "tie_encoder_decoder": false, | |
| "tie_word_embeddings": true, | |
| "tokenizer_class": null, | |
| "torchscript": false, | |
| "use_bfloat16": false, | |
| "use_cache": false, | |
| "use_sliding_window": false, | |
| "vocab_size": 151936 | |
| }, | |
| "local_hidden_size": 2560, | |
| "local_text_head_mode": "binary", | |
| "local_transformer_attn_implementation": "flash_attention_2", | |
| "local_transformer_layers": 1, | |
| "model_type": "moss_tts_local", | |
| "n_vq": 12, | |
| "pad_token_id": 151643, | |
| "processor_class": "MossTTSLocalProcessor", | |
| "qwen3_config": { | |
| "add_cross_attention": false, | |
| "architectures": [ | |
| "Qwen3ForCausalLM" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 151643, | |
| "cross_attention_hidden_size": null, | |
| "decoder_start_token_id": null, | |
| "dtype": "bfloat16", | |
| "eos_token_id": 151643, | |
| "finetuning_task": null, | |
| "gradient_checkpointing_use_reentrant": false, | |
| "head_dim": 128, | |
| "hidden_act": "silu", | |
| "hidden_size": 2560, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 9728, | |
| "is_decoder": false, | |
| "layer_types": [ | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention", | |
| "full_attention" | |
| ], | |
| "max_position_embeddings": 32768, | |
| "max_window_layers": 36, | |
| "model_type": "qwen3", | |
| "num_attention_heads": 32, | |
| "num_hidden_layers": 36, | |
| "num_key_value_heads": 8, | |
| "pad_token_id": 151643, | |
| "prefix": null, | |
| "pruned_heads": {}, | |
| "rms_norm_eps": 1e-06, | |
| "rope_parameters": { | |
| "rope_theta": 1000000, | |
| "rope_type": "default" | |
| }, | |
| "sep_token_id": null, | |
| "sliding_window": null, | |
| "task_specific_params": null, | |
| "tf_legacy_loss": false, | |
| "tie_encoder_decoder": false, | |
| "tie_word_embeddings": true, | |
| "tokenizer_class": null, | |
| "torchscript": false, | |
| "use_bfloat16": false, | |
| "use_cache": false, | |
| "use_sliding_window": false, | |
| "vocab_size": 151936 | |
| }, | |
| "sampling_rate": 48000, | |
| "tie_word_embeddings": true, | |
| "transformers_version": "5.0.0", | |
| "use_static_local_kv_cache": true, | |
| "vocab_size": 151936 | |
| } | |