Update README.md
Browse files
README.md
CHANGED
|
@@ -1,12 +1,50 @@
|
|
| 1 |
---
|
| 2 |
base_model: unsloth/csm-1b
|
| 3 |
tags:
|
| 4 |
-
- text-generation-inference
|
| 5 |
- transformers
|
| 6 |
-
- unsloth
|
| 7 |
- csm
|
| 8 |
license: apache-2.0
|
| 9 |
language:
|
| 10 |
- en
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
base_model: unsloth/csm-1b
|
| 3 |
tags:
|
|
|
|
| 4 |
- transformers
|
|
|
|
| 5 |
- csm
|
| 6 |
license: apache-2.0
|
| 7 |
language:
|
| 8 |
- en
|
| 9 |
+
datasets:
|
| 10 |
+
- beyoru/kafka-voice
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Usage
|
| 14 |
+
```
|
| 15 |
+
import torch
|
| 16 |
+
from transformers import CsmForConditionalGeneration, AutoProcessor
|
| 17 |
+
|
| 18 |
+
model_id = "beyoru/kafka-sesame"
|
| 19 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 20 |
+
|
| 21 |
+
# load the model and the processor
|
| 22 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
| 23 |
+
model = CsmForConditionalGeneration.from_pretrained(model_id, device_map=device)
|
| 24 |
+
model.eval()
|
| 25 |
+
|
| 26 |
+
model.generation_config.max_length = 250 # big enough to avoid recompilation
|
| 27 |
+
model.generation_config.max_new_tokens = None # would take precedence over max_length
|
| 28 |
+
model.generation_config.cache_implementation = "static"
|
| 29 |
+
model.depth_decoder.generation_config.cache_implementation = "static"
|
| 30 |
+
|
| 31 |
+
# prepare the inputs
|
| 32 |
+
text = "[0]Hello from Sesame." # `[0]` for speaker id 0
|
| 33 |
+
inputs = processor(text, add_special_tokens=True).to(device)
|
| 34 |
+
|
| 35 |
+
# another equivalent way to prepare the inputs
|
| 36 |
+
conversation = [
|
| 37 |
+
{"role": "0", "content": [{"type": "text", "text": "Hello from Sesame."}]},
|
| 38 |
+
]
|
| 39 |
+
inputs = processor.apply_chat_template(
|
| 40 |
+
conversation,
|
| 41 |
+
tokenize=True,
|
| 42 |
+
return_dict=True,
|
| 43 |
+
).to(device)
|
| 44 |
+
|
| 45 |
+
# infer the model
|
| 46 |
+
@torch.interface_mode()
|
| 47 |
+
audio = model.generate(**inputs, output_audio=True)
|
| 48 |
+
processor.save_audio(audio, "example_without_context.wav")
|
| 49 |
+
|
| 50 |
+
```
|