File size: 2,382 Bytes
1e9b1a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c30054
 
 
2e4e32c
8c30054
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39e3a57
 
 
 
8c30054
 
 
 
39e3a57
 
 
 
 
 
 
 
8c30054
 
466043a
db9167e
1e9b1a1
8c30054
 
 
 
 
 
 
 
483395b
 
466043a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import instld
from instld.errors import InstallingPackageError


is_f5_installed = False
try:
    with instld('f5-tts', no_deps=True, catch_output=True):
        is_f5_installed = True
        print("f5-tts installed successfully")
except InstallingPackageError as e:
    print(e.stdout)
    print(e.stderr)

import pip
try:
    import f5_tts
except Exception as e:
    print(e)
    e = str(e).split(' ')[-1].replace("'","")
    pip.main(['install', e, '--no-deps'])
    print(f'retrying install {e}')

try:
    import os
    os.system('pip install f5-tts --no-deps')
except Exception as e:
    print(e)


from huggingface_hub import hf_hub_download
import os
from f5_tts.api import F5TTS


os.makedirs("models", exist_ok=True)
model_path = 'models/checkpoints/model_220000.pt'

if not os.path.exists(model_path):
    print(f"Downloading model from HuggingFace...")
    try:
        downloaded_model = hf_hub_download(
            repo_id="mesolitica/Malaysian-F5-TTS-v3",
            filename="checkpoints/model_220000.pt",
            local_dir="models",
            local_dir_use_symlinks=False
        )
        print(f"Model downloaded successfully to {downloaded_model}!")
        model_path = downloaded_model
    except Exception as e:
        print(f"Error downloading model: {e}")
        raise

seed = 1
sampling_rate = 24000

CLONE_VOICES = {
    # 'Prabowo': {
    #     'path': 'prab.wav',
    #     'transcript': 'pada saat sekarang ini dimana bangsa indonesia ditengah tantangan global'
    # },
    'Ono': {
        'path': 'ono.wav',
        'transcript': 'kalau saya percaya pekerjaan manusia itu harus lebih banyak lagi melakukan'
    },
    # 'Najwa': {
    #     'path': 'najwa.wav',
    #     'transcript': 'ada kekhawatiran dari masyarakat sipil proses pembentukan undang-undang kita'
    # },
    # 'Zilong': {
    #     'path': 'zilong.wav',
    #     'transcript': 'tidak ada yang menakutiku, bahkan kematian sekalipun'
    # },
}

print('pre model loaded')
f5tts = F5TTS(ckpt_file=model_path, vocab_file='vocab.txt', device='cuda')

def generate_tts(gen_text, voice, speed):
    wav, sampling_rate, _ = f5tts.infer(
        ref_file=f'ref/{CLONE_VOICES[voice]["path"]}',
        ref_text=CLONE_VOICES[voice]["transcript"],
        gen_text=gen_text,
        seed=seed,
        speed=speed,
    )
    return wav, sampling_rate

print('post model loaded!')