niobures commited on
Commit
da66937
·
verified ·
1 Parent(s): f316adb

Orpheus (tr)

Browse files
.gitattributes CHANGED
@@ -192,3 +192,5 @@ tr/Orpheus-KhanAcademy-TR-Math/tokenizer.json filter=lfs diff=lfs merge=lfs -tex
192
  tr/Orpheus-KhanAcademy-TR/tokenizer.json filter=lfs diff=lfs merge=lfs -text
193
  tr/Orpheus-TTS-Turkish-PT-2000-GGUF/orpheus-tts-turkish-pt-2000.gguf filter=lfs diff=lfs merge=lfs -text
194
  tr/Orpheus-TTS-Turkish-PT-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
192
  tr/Orpheus-KhanAcademy-TR/tokenizer.json filter=lfs diff=lfs merge=lfs -text
193
  tr/Orpheus-TTS-Turkish-PT-2000-GGUF/orpheus-tts-turkish-pt-2000.gguf filter=lfs diff=lfs merge=lfs -text
194
  tr/Orpheus-TTS-Turkish-PT-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
195
+ tr/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF/orpheus-tts-turkish-pt-5000-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text
196
+ tr/Orpheus-TTS-Turkish-PT-5000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
tr/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ orpheus-tts-turkish-pt-5000-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text
tr/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF/README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - Karayakar/Orpheus-TTS-Turkish-PT-5000
4
+ - canopylabs/orpheus-3b-0.1-pretrained
5
+ - canopylabs/orpheus-3b-0.1-ft
6
+ language:
7
+ - tr
8
+ license: mit
9
+ pipeline_tag: text-to-speech
10
+ tags:
11
+ - karayakar
12
+ - Turkish
13
+ - Turkce
14
+ - TTS
15
+ - Orpheus
16
+ - Text-to-Speech
17
+ - llama-cpp
18
+ - gguf-my-repo
19
+ ---
20
+
21
+ # Karayakar/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF
22
+ This model was converted to GGUF format from [`Karayakar/Orpheus-TTS-Turkish-PT-5000`](https://huggingface.co/Karayakar/Orpheus-TTS-Turkish-PT-5000) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
23
+ Refer to the [original model card](https://huggingface.co/Karayakar/Orpheus-TTS-Turkish-PT-5000) for more details on the model.
24
+
25
+ ## Use with llama.cpp
26
+ Install llama.cpp through brew (works on Mac and Linux)
27
+
28
+ ```bash
29
+ brew install llama.cpp
30
+
31
+ ```
32
+ Invoke the llama.cpp server or the CLI.
33
+
34
+ ### CLI:
35
+ ```bash
36
+ llama-cli --hf-repo Karayakar/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF --hf-file orpheus-tts-turkish-pt-5000-q5_k_m.gguf -p "The meaning to life and the universe is"
37
+ ```
38
+
39
+ ### Server:
40
+ ```bash
41
+ llama-server --hf-repo Karayakar/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF --hf-file orpheus-tts-turkish-pt-5000-q5_k_m.gguf -c 2048
42
+ ```
43
+
44
+ Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
45
+
46
+ Step 1: Clone llama.cpp from GitHub.
47
+ ```
48
+ git clone https://github.com/ggerganov/llama.cpp
49
+ ```
50
+
51
+ Step 2: Move into the llama.cpp folder and build it with `LLAMA_CURL=1` flag along with other hardware-specific flags (for ex: LLAMA_CUDA=1 for Nvidia GPUs on Linux).
52
+ ```
53
+ cd llama.cpp && LLAMA_CURL=1 make
54
+ ```
55
+
56
+ Step 3: Run inference through the main binary.
57
+ ```
58
+ ./llama-cli --hf-repo Karayakar/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF --hf-file orpheus-tts-turkish-pt-5000-q5_k_m.gguf -p "The meaning to life and the universe is"
59
+ ```
60
+ or
61
+ ```
62
+ ./llama-server --hf-repo Karayakar/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF --hf-file orpheus-tts-turkish-pt-5000-q5_k_m.gguf -c 2048
63
+ ```
tr/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF/orpheus-tts-turkish-pt-5000-q5_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71140f8f181ae53be00fed4a3161b77576ee1d8116f9d4cf889f65220cc6c6c
3
+ size 2395344704
tr/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Karayakar/Orpheus-TTS-Turkish-PT-5000-Q5_K_M-GGUF
tr/Orpheus-TTS-Turkish-PT-5000/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
tr/Orpheus-TTS-Turkish-PT-5000/README.md ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - tr
5
+ base_model:
6
+ - canopylabs/orpheus-3b-0.1-pretrained
7
+ pipeline_tag: text-to-speech
8
+ tags:
9
+ - karayakar
10
+ - Turkish
11
+ - Turkce
12
+ - TTS
13
+ - Orpheus
14
+ - Text-to-Speech
15
+ ---
16
+
17
+
18
+ # Orpheus TTS Turkish Model
19
+
20
+ Orpheus TTS Turkish Pretrain (step 2000)
21
+ model is trained based on "canopylabs/orpheus-3b-0.1-pretrained".
22
+
23
+ Syntethic voice data over 60 hrs used for initial training.
24
+ +160hrs additional Syntethic voice data mixed in training.
25
+ 400 Emoji (real voice) data used for emoji support.
26
+
27
+ you can interact with the model - Flask API
28
+
29
+
30
+ # Emotion Support
31
+
32
+ Model supports below emotions in the text.
33
+ ```
34
+ <laugh> – gülme
35
+
36
+ <chuckle> – kıkırdama
37
+
38
+ <sigh> – iç çekme
39
+
40
+ <cough> – öksürme
41
+
42
+ <sniffle> – <burnunu çekme>
43
+
44
+ <groan> – inleme
45
+
46
+ <yawn> – esneme
47
+
48
+ <gasp> – nefesi kesilme / şaşkınlıkla soluma
49
+ ```
50
+
51
+
52
+ # API
53
+
54
+ Flask configured to run on port 5400 (you can change in the below script)
55
+
56
+ ```
57
+ POST http://127.0.0.1:5400/generate HTTP/1.1
58
+ User-Agent: Fiddler
59
+ content-type: application/json
60
+ Host: 127.0.0.1:5400
61
+ Content-Length: 110
62
+
63
+ {
64
+ "text": "Merhaba, orpheusTTS Turkce deneme"
65
+ }
66
+
67
+ ```
68
+
69
+ # Create Environment
70
+
71
+ windows:
72
+ ```
73
+ #create virtual environment
74
+ python -m venv venv
75
+ venv\Scripts\activate
76
+
77
+ python inference.py
78
+
79
+
80
+
81
+ ```
82
+
83
+ # Training
84
+
85
+ ```
86
+ For training with your own data, you can check
87
+ train.py
88
+ config.yaml
89
+
90
+ ```
91
+
92
+
93
+
94
+
95
+
96
+ # inference.py
97
+ (please install the necessary libraries)
98
+
99
+ ```
100
+ # respective torch from https://pytorch.org/
101
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
102
+ pip install snac pathlib torch transformers huggingface_hub librosa numpy scipy torchaudio Flask jsonify
103
+
104
+
105
+ ```
106
+
107
+ ```
108
+ import os
109
+ from snac import SNAC
110
+ from pathlib import Path
111
+ import torch
112
+ from transformers import AutoModelForCausalLM, Trainer, TrainingArguments, AutoTokenizer,BitsAndBytesConfig
113
+ from huggingface_hub import snapshot_download
114
+ import librosa
115
+ import numpy as np
116
+ from scipy.io.wavfile import write
117
+ import torchaudio
118
+ from flask import Flask, jsonify, request
119
+
120
+ modelLocalPath="D:\\...\\Karayakar\\Orpheus-TTS-Turkish-PT-5000"
121
+
122
+
123
+ def load_orpheus_tokenizer(model_id: str = modelLocalPath) -> AutoTokenizer:
124
+ tokenizer = AutoTokenizer.from_pretrained(model_id,local_files_only=True, device_map="cuda")
125
+ return tokenizer
126
+
127
+ def load_snac():
128
+ snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz")
129
+ return snac_model
130
+
131
+ def load_orpheus_auto_model(model_id: str = modelLocalPath):
132
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16,local_files_only=True, device_map="cuda")
133
+ model.cuda()
134
+ return model
135
+
136
+
137
+
138
+ def tokenize_audio(audio_file_path, snac_model):
139
+ audio_array, sample_rate = librosa.load(audio_file_path, sr=24000)
140
+ waveform = torch.from_numpy(audio_array).unsqueeze(0)
141
+ waveform = waveform.to(dtype=torch.float32)
142
+
143
+ waveform = waveform.unsqueeze(0)
144
+
145
+ with torch.inference_mode():
146
+ codes = snac_model.encode(waveform)
147
+
148
+ all_codes = []
149
+ for i in range(codes[0].shape[1]):
150
+ all_codes.append(codes[0][0][i].item() + 128266)
151
+ all_codes.append(codes[1][0][2 * i].item() + 128266 + 4096)
152
+ all_codes.append(codes[2][0][4 * i].item() + 128266 + (2 * 4096))
153
+ all_codes.append(codes[2][0][(4 * i) + 1].item() + 128266 + (3 * 4096))
154
+ all_codes.append(codes[1][0][(2 * i) + 1].item() + 128266 + (4 * 4096))
155
+ all_codes.append(codes[2][0][(4 * i) + 2].item() + 128266 + (5 * 4096))
156
+ all_codes.append(codes[2][0][(4 * i) + 3].item() + 128266 + (6 * 4096))
157
+
158
+ return all_codes
159
+
160
+
161
+ def prepare_inputs(
162
+ fpath_audio_ref,
163
+ audio_ref_transcript: str,
164
+ text_prompts: list[str],
165
+ snac_model,
166
+ tokenizer,
167
+ ):
168
+
169
+
170
+ start_tokens = torch.tensor([[128259]], dtype=torch.int64)
171
+ end_tokens = torch.tensor([[128009, 128260, 128261, 128257]], dtype=torch.int64)
172
+ final_tokens = torch.tensor([[128258, 128262]], dtype=torch.int64)
173
+
174
+
175
+ all_modified_input_ids = []
176
+ for prompt in text_prompts:
177
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
178
+ #second_input_ids = torch.cat([zeroprompt_input_ids, start_tokens, input_ids, end_tokens], dim=1)
179
+ second_input_ids = torch.cat([start_tokens, input_ids, end_tokens], dim=1)
180
+ all_modified_input_ids.append(second_input_ids)
181
+
182
+ all_padded_tensors = []
183
+ all_attention_masks = []
184
+ max_length = max([modified_input_ids.shape[1] for modified_input_ids in all_modified_input_ids])
185
+
186
+ for modified_input_ids in all_modified_input_ids:
187
+ padding = max_length - modified_input_ids.shape[1]
188
+ padded_tensor = torch.cat([torch.full((1, padding), 128263, dtype=torch.int64), modified_input_ids], dim=1)
189
+ attention_mask = torch.cat([torch.zeros((1, padding), dtype=torch.int64),
190
+ torch.ones((1, modified_input_ids.shape[1]), dtype=torch.int64)], dim=1)
191
+ all_padded_tensors.append(padded_tensor)
192
+ all_attention_masks.append(attention_mask)
193
+
194
+ all_padded_tensors = torch.cat(all_padded_tensors, dim=0)
195
+ all_attention_masks = torch.cat(all_attention_masks, dim=0)
196
+
197
+ input_ids = all_padded_tensors.to("cuda")
198
+ attention_mask = all_attention_masks.to("cuda")
199
+ return input_ids, attention_mask
200
+
201
+
202
+
203
+ def inference(model, input_ids, attention_mask):
204
+ with torch.no_grad():
205
+ generated_ids = model.generate(
206
+ input_ids=input_ids,
207
+ attention_mask=attention_mask,
208
+ max_new_tokens=2048,
209
+ do_sample=True,
210
+ temperature=0.2,
211
+ top_k=10,
212
+ top_p=0.9,
213
+ repetition_penalty=1.9,
214
+ num_return_sequences=1,
215
+ eos_token_id=128258,
216
+
217
+ )
218
+
219
+ generated_ids = torch.cat([generated_ids, torch.tensor([[128262]]).to("cuda")], dim=1) # EOAI
220
+
221
+ return generated_ids
222
+
223
+
224
+ def convert_tokens_to_speech(generated_ids, snac_model):
225
+ token_to_find = 128257
226
+ token_to_remove = 128258
227
+ token_indices = (generated_ids == token_to_find).nonzero(as_tuple=True)
228
+
229
+ if len(token_indices[1]) > 0:
230
+ last_occurrence_idx = token_indices[1][-1].item()
231
+ cropped_tensor = generated_ids[:, last_occurrence_idx + 1:]
232
+ else:
233
+ cropped_tensor = generated_ids
234
+
235
+ _mask = cropped_tensor != token_to_remove
236
+ processed_rows = []
237
+ for row in cropped_tensor:
238
+ masked_row = row[row != token_to_remove]
239
+ processed_rows.append(masked_row)
240
+
241
+ code_lists = []
242
+ for row in processed_rows:
243
+ row_length = row.size(0)
244
+ new_length = (row_length // 7) * 7
245
+ trimmed_row = row[:new_length]
246
+ trimmed_row = [t - 128266 for t in trimmed_row]
247
+ code_lists.append(trimmed_row)
248
+
249
+ my_samples = []
250
+ for code_list in code_lists:
251
+ samples = redistribute_codes(code_list, snac_model)
252
+ my_samples.append(samples)
253
+
254
+ return my_samples
255
+
256
+
257
+ def redistribute_codes(code_list, snac_model):
258
+ layer_1 = []
259
+ layer_2 = []
260
+ layer_3 = []
261
+
262
+ for i in range((len(code_list) + 1) // 7):
263
+ layer_1.append(code_list[7 * i])
264
+ layer_2.append(code_list[7 * i + 1] - 4096)
265
+ layer_3.append(code_list[7 * i + 2] - (2 * 4096))
266
+ layer_3.append(code_list[7 * i + 3] - (3 * 4096))
267
+ layer_2.append(code_list[7 * i + 4] - (4 * 4096))
268
+ layer_3.append(code_list[7 * i + 5] - (5 * 4096))
269
+ layer_3.append(code_list[7 * i + 6] - (6 * 4096))
270
+
271
+ codes = [
272
+ torch.tensor(layer_1).unsqueeze(0),
273
+ torch.tensor(layer_2).unsqueeze(0),
274
+ torch.tensor(layer_3).unsqueeze(0)
275
+ ]
276
+ audio_hat = snac_model.decode(codes)
277
+ return audio_hat
278
+
279
+
280
+ def to_wav_from(samples: list) -> list[np.ndarray]:
281
+ """Converts a list of PyTorch tensors (or NumPy arrays) to NumPy arrays."""
282
+ processed_samples = []
283
+
284
+ for s in samples:
285
+ if isinstance(s, torch.Tensor):
286
+ s = s.detach().squeeze().to('cpu').numpy()
287
+ else:
288
+ s = np.squeeze(s)
289
+
290
+ processed_samples.append(s)
291
+
292
+ return processed_samples
293
+
294
+
295
+ def zero_shot_tts(fpath_audio_ref, audio_ref_transcript, texts: list[str], model, snac_model, tokenizer):
296
+ print(f"fpath_audio_ref {fpath_audio_ref}")
297
+ print(f"audio_ref_transcript {audio_ref_transcript}")
298
+ print(f"texts {texts}")
299
+ inp_ids, attn_mask = prepare_inputs(fpath_audio_ref, audio_ref_transcript, texts, snac_model, tokenizer)
300
+ print(f"input_id_len:{len(inp_ids)}")
301
+ gen_ids = inference(model, inp_ids, attn_mask)
302
+ samples = convert_tokens_to_speech(gen_ids, snac_model)
303
+ wav_forms = to_wav_from(samples)
304
+ return wav_forms
305
+
306
+
307
+ def save_wav(samples: list[np.array], sample_rate: int, filenames: list[str]):
308
+ """ Saves a list of tensors as .wav files.
309
+
310
+ Args:
311
+ samples (list[torch.Tensor]): List of audio tensors.
312
+ sample_rate (int): Sample rate in Hz.
313
+ filenames (list[str]): List of filenames to save.
314
+ """
315
+ wav_data = to_wav_from(samples)
316
+
317
+ for data, filename in zip(wav_data, filenames):
318
+ write(filename, sample_rate, data.astype(np.float32))
319
+ print(f"saved to {filename}")
320
+
321
+
322
+ def get_ref_audio_and_transcript(root_folder: str):
323
+ root_path = Path(root_folder)
324
+ print(f"root_path {root_path}")
325
+ out = []
326
+ for speaker_folder in root_path.iterdir():
327
+ if speaker_folder.is_dir(): # Ensure it's a directory
328
+ wav_files = list(speaker_folder.glob("*.wav"))
329
+ txt_files = list(speaker_folder.glob("*.txt"))
330
+
331
+ if wav_files and txt_files:
332
+ ref_audio = wav_files[0] # Assume only one .wav file per folder
333
+ transcript = txt_files[0].read_text(encoding="utf-8").strip()
334
+ out.append((ref_audio, transcript))
335
+
336
+ return out
337
+
338
+ app = Flask(__name__)
339
+
340
+
341
+ @app.route('/generate', methods=['POST'])
342
+ def generate():
343
+ content = request.json
344
+ process_data(content)
345
+ rresponse = {
346
+ 'received': content,
347
+ 'status': 'success'
348
+ }
349
+ response= jsonify(rresponse)
350
+ response.headers['Content-Type'] = 'application/json; charset=utf-8'
351
+ return response
352
+
353
+
354
+
355
+ def process_data(jsonText):
356
+ texts = [f"{jsonText['text']}"]
357
+ #print(f"texts:{texts}")
358
+ #print(f"prompt_pairs:{prompt_pairs}")
359
+ for fpath_audio, audio_transcript in prompt_pairs:
360
+ print(f"zero shot: {fpath_audio} {audio_transcript}")
361
+ wav_forms = zero_shot_tts(fpath_audio, audio_transcript, texts, model, snac_model, tokenizer)
362
+
363
+ import os
364
+ from pathlib import Path
365
+ from datetime import datetime
366
+ out_dir = Path(fpath_audio).parent / "inference"
367
+ #print(f"out_dir:{out_dir}")
368
+ out_dir.mkdir(parents=True, exist_ok=True) #
369
+ timestamp_str = str(int(datetime.now().timestamp()))
370
+ file_names = [f"{out_dir.as_posix()}/{Path(fpath_audio).stem}_{i}_{timestamp_str}.wav" for i, t in enumerate(texts)]
371
+ #print(f"file_names:{file_names}")
372
+ save_wav(wav_forms, 24000, file_names)
373
+
374
+
375
+
376
+ if __name__ == "__main__":
377
+ tokenizer = load_orpheus_tokenizer()
378
+ model = load_orpheus_auto_model()
379
+ snac_model = load_snac()
380
+ prompt_pairs = get_ref_audio_and_transcript("D:\\AI_APPS\\Orpheus-TTS\\data")
381
+ print(f"snac_model loaded")
382
+ app.run(debug=True,port=5400)
383
+
384
+
385
+
386
+ ```
tr/Orpheus-TTS-Turkish-PT-5000/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "G:\\OPENAI\\Orpheus-TTS\\pretrain\\checkpoints_Orpheus_TTS_KA_60HRS_24000Khz\\checkpoint-500",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": 128001,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 3072,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 24,
19
+ "num_hidden_layers": 28,
20
+ "num_key_value_heads": 8,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": {
24
+ "factor": 32.0,
25
+ "high_freq_factor": 4.0,
26
+ "low_freq_factor": 1.0,
27
+ "original_max_position_embeddings": 8192,
28
+ "rope_type": "llama3"
29
+ },
30
+ "rope_theta": 500000.0,
31
+ "tie_word_embeddings": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.49.0",
34
+ "use_cache": true,
35
+ "vocab_size": 156940
36
+ }
tr/Orpheus-TTS-Turkish-PT-5000/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "do_sample": true,
5
+ "eos_token_id": 128001,
6
+ "temperature": 0.6,
7
+ "top_p": 0.9,
8
+ "transformers_version": "4.49.0"
9
+ }
tr/Orpheus-TTS-Turkish-PT-5000/model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f1d6ee9afa77c637e452a1bd3d38d9b714a9abec9b07960be9c4b975daff4ab
3
+ size 4948557560
tr/Orpheus-TTS-Turkish-PT-5000/model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db049d00b53eda9b319fa776954a3cafb00b15d95d7e9a8d72dcb61c5f01aff4
3
+ size 4932808960
tr/Orpheus-TTS-Turkish-PT-5000/model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:612a8ac5891b30f3947dc32d8f2c5af91f5326e0249c5ec7661638631597aa8a
3
+ size 3322130968
tr/Orpheus-TTS-Turkish-PT-5000/model.safetensors.index.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13203468288
4
+ },
5
+ "weight_map": {
6
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
7
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
26
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
29
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
30
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
32
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
33
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
34
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
35
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00003.safetensors",
107
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
108
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
111
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
116
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00003.safetensors",
125
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
126
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
127
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
128
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
129
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
130
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
131
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
132
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
133
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00003.safetensors",
134
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
135
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
136
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
137
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
138
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
139
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
140
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
141
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
142
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
143
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
149
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
150
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
151
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
152
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
197
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
198
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
199
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
200
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
201
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
202
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
203
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
204
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
205
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
206
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
207
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
208
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
209
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
210
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
211
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
212
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
213
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
214
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
215
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
225
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
226
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
227
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
228
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
229
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
230
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
231
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
232
+ "model.layers.7.input_layernorm.weight": "model-00002-of-00003.safetensors",
233
+ "model.layers.7.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
234
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
235
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
236
+ "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
237
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
238
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
239
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
240
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
241
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00003.safetensors",
242
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
243
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
244
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
245
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
246
+ "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
247
+ "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
248
+ "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
249
+ "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
250
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00003.safetensors",
251
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
252
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
253
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
254
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
255
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
256
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
257
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
258
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
259
+ "model.norm.weight": "model-00003-of-00003.safetensors"
260
+ }
261
+ }
tr/Orpheus-TTS-Turkish-PT-5000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f4ba6dc08766c35a3d04aa5ecd0667195d53b3d30d6463846a30170a376d129
3
+ size 26407159030
tr/Orpheus-TTS-Turkish-PT-5000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea005b38dffb3ddb68fe35f651e4ee5e119bf913293dda58efdd7f56f4335656
3
+ size 14244
tr/Orpheus-TTS-Turkish-PT-5000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c726a35741ada52fd0a38ce49a122a7744613053eb44b9b58c2b237b7b1d9a
3
+ size 1064
tr/Orpheus-TTS-Turkish-PT-5000/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Karayakar/Orpheus-TTS-Turkish-PT-5000
tr/Orpheus-TTS-Turkish-PT-5000/special_tokens_map.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|audio|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<|begin_of_text|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "<|eot_id|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "pad_token": {
20
+ "content": "<|finetune_right_pad_id|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ }
26
+ }
tr/Orpheus-TTS-Turkish-PT-5000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3fecb199b4170636dbfab986d25f628157268d37b861f9cadaca60b1353bce
3
+ size 22849547
tr/Orpheus-TTS-Turkish-PT-5000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
tr/Orpheus-TTS-Turkish-PT-5000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
tr/Orpheus-TTS-Turkish-PT-5000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2e17615c0117656ff03a59eab05516ad636e4ce6df4182952bb10a0fe6b8314
3
+ size 5368