mr-don88 commited on
Commit
cf2b3c2
·
verified ·
1 Parent(s): 8d63a70

Update app.py

Browse files

# -*- coding: utf-8 -*-
import os, re, time, random, zipfile
import requests
import gradio as gr
from pydub import AudioSegment
import natsort

# ================= CONFIG =================
VOICE_DIR = "voices"
os.makedirs(VOICE_DIR, exist_ok=True)

# ================= CORE FUNCTIONS =================
def check_api_key(api_key):
try:
r = requests.get(
"https://api.elevenlabs.io/v1/user",
headers={"xi-api-key": api_key},
timeout=10
)
if r.status_code == 200:
sub = r.json().get("subscription", {})
return {
"valid": True,
"remaining": sub.get("character_limit", 0) - sub.get("character_count", 0)
}
return {"valid": False}
except:
return {"valid": False}


def parse_text_blocks(text, max_len=200):
blocks, cur = [], ""
for s in re.split(r'(?<=[.!?])\s+', text):
if len(cur) + len(s) <= max_len:
cur += " " + s
else:
blocks.append(cur.strip())
cur = s
if cur:
blocks.append(cur.strip())
return blocks


def estimate_credit(text):
return len(text) + 50


def generate_voice(text, api_key, voice_id, model_id,
stability, similarity, style, speed, boost):
time.sleep(random.uniform(1, 2))
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
headers = {"xi-api-key": api_key, "Content-Type": "application/json"}

payload = {
"text": text,
"model_id": model_id,
"voice_settings": {
"stability": stability,
"similarity_boost": similarity,
"style": style,
"speed": speed,
"use_speaker_boost": boost
}
}

r = requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code == 200:
return r.content
return None


def merge_audio_files(fmt):
files = natsort.natsorted(
[f for f in os.listdir(VOICE_DIR) if f.endswith(fmt.lower())]
)
if not files:
return None

combined = AudioSegment.from_file(os.path.join(VOICE_DIR, files[0]))
for f in files[1:]:
combined += AudioSegment.silent(500)
combined += AudioSegment.from_file(os.path.join(VOICE_DIR, f))

out = f"output_full.{fmt.lower()}"
combined.export(out, format=fmt.lower())
return out


# ================= MAIN RUN =================
def run_tts(
api_keys_text,
voice_id,
text,
model_id,
fmt,
stability,
similarity,
style,
speed,
boost
):
for f in os.listdir(VOICE_DIR):
os.remove(os.path.join(VOICE_DIR, f))

api_keys = [k.strip() for k in api_keys_text.splitlines() if k.strip()]
valid_keys = []

for k in api_keys:
info = check_api_key(k)
if info.get("valid") and info["remaining"] > 500:
valid_keys.append([k, info["remaining"]])

if not valid_keys:
return "❌ Không có API key hợp lệ", None

texts = parse_text_blocks(text)
key_index = 0

for i, block in enumerate(texts):
success = False
while valid_keys:
key, remain = valid_keys[key_index]
need = estimate_credit(block)

if remain < need:
valid_keys.pop(key_index)
continue

audio = generate_voice(
block, key, voice_id, model_id,
stability, similarity, style, speed, boost
)

if audio:
path = f"{VOICE_DIR}/voice_{i+1:03d}.{fmt.lower()}"
with open(path, "wb") as f:
f.write(audio)
valid_keys[key_index][1] -= need
success = True
break
else:
valid_keys.pop(key_index)

if not success:
return "❌ Hết API key khi đang chạy", None

merged = merge_audio_files(fmt)
return "✅ Hoàn tất", merged


# ================= GRADIO UI =================
with gr.Blocks(title="ElevenLabs TTS PRO") as demo:
gr.Markdown("## 🔊 ElevenLabs TTS – Hugging Face Edition")

api_keys = gr.Textbox(label="API Keys (mỗi dòng 1 key)", lines=4)
voice_id = gr.Textbox(label="Voice ID")
text = gr.Textbox(label="Text", lines=6)

model = gr.Dropdown(
choices=[
("Turbo v2.5", "eleven_turbo_v2_5"),
("Flash v2.5", "eleven_flash_v2_5"),
("Multilingual v2", "eleven_multilingual_v2"),
],
value="eleven_multilingual_v2",
label="Model"
)

fmt = gr.Dropdown(["MP3", "WAV"], value="MP3", label="Format")

stability = gr.Slider(0, 1, 0.9, label="Stability")
similarity = gr.Slider(0, 1, 0.5, label="Similarity")
style = gr.Slider(0, 1, 0.4, label="Style")
speed = gr.Slider(0.7, 1.2, 0.81, label="Speed")
boost = gr.Checkbox(True, label="Speaker Boost")

run_btn = gr.Button("🎧 Tạo giọng nói")
status = gr.Textbox(label="Status")
output_audio = gr.Audio(label="Output", type="filepath")

run_btn.click(
run_tts,
inputs=[
api_keys, voice_id, text, model, fmt,
stability, similarity, style, speed, boost
],
outputs=[status, output_audio]
)

demo.launch()

Files changed (1) hide show
  1. app.py +0 -161
app.py CHANGED
@@ -1,161 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- import os, re, time, random, zipfile, requests, natsort
3
- import gradio as gr
4
- from pydub import AudioSegment
5
-
6
- # ================= API CHECK =================
7
- def check_api_key(api_key):
8
- try:
9
- r = requests.get(
10
- "https://api.elevenlabs.io/v1/user",
11
- headers={"xi-api-key": api_key},
12
- timeout=10
13
- )
14
- if r.status_code != 200:
15
- return None
16
- sub = r.json().get("subscription", {})
17
- return sub.get("character_limit", 0) - sub.get("character_count", 0)
18
- except:
19
- return None
20
-
21
-
22
- # ================= TEXT =================
23
- def split_text(text, max_len=200):
24
- out, cur = [], ""
25
- for s in re.split(r'(?<=[.!?])\s+', text):
26
- if len(cur) + len(s) <= max_len:
27
- cur += " " + s
28
- else:
29
- out.append(cur.strip())
30
- cur = s
31
- if cur:
32
- out.append(cur.strip())
33
- return out
34
-
35
-
36
- # ================= TTS (FIX QUAN TRỌNG) =================
37
- def tts(text, api_key, voice_id, model):
38
- time.sleep(random.uniform(0.8, 1.5))
39
- r = requests.post(
40
- f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
41
- headers={
42
- "xi-api-key": api_key,
43
- "Content-Type": "application/json",
44
- "Accept": "audio/mpeg" # 🔥 BẮT BUỘC
45
- },
46
- json={
47
- "text": text,
48
- "model_id": model,
49
- "voice_settings": {
50
- "stability": 0.9,
51
- "similarity_boost": 0.5,
52
- "style": 0.4,
53
- "speed": 0.81,
54
- "use_speaker_boost": True
55
- }
56
- },
57
- timeout=60
58
- )
59
- if r.status_code == 200 and len(r.content) > 1000:
60
- return r.content
61
- return None
62
-
63
-
64
- # ================= AUDIO + SRT =================
65
- def merge_audio(folder, fmt):
66
- files = natsort.natsorted(f for f in os.listdir(folder) if f.endswith(fmt))
67
- audio = AudioSegment.from_file(os.path.join(folder, files[0]))
68
- for f in files[1:]:
69
- audio += AudioSegment.silent(500)
70
- audio += AudioSegment.from_file(os.path.join(folder, f))
71
- out = f"output_full.{fmt}"
72
- audio.export(out, format=fmt)
73
- return out
74
-
75
-
76
- def create_srt(folder, texts):
77
- t, lines = 0, []
78
- files = natsort.natsorted(f for f in os.listdir(folder) if f.startswith("voice_"))
79
- for i, (f, txt) in enumerate(zip(files, texts), 1):
80
- a = AudioSegment.from_file(os.path.join(folder, f))
81
- lines += [
82
- str(i),
83
- f"00:00:{t//1000:02},{t%1000:03} --> 00:00:{(t+len(a))//1000:02},{(t+len(a))%1000:03}",
84
- txt, ""
85
- ]
86
- t += len(a) + 500
87
- with open(os.path.join(folder, "output_full.srt"), "w", encoding="utf-8") as f:
88
- f.write("\n".join(lines))
89
-
90
-
91
- # ================= RUN =================
92
- def run(api_text, api_file, voice_id, text, model, fmt):
93
- keys = api_file.decode().splitlines() if api_file else api_text.splitlines()
94
- keys = [k.strip() for k in keys if k.strip()]
95
-
96
- table = "| API KEY | CREDIT |\n|---|---|\n"
97
- valid = []
98
-
99
- for k in keys:
100
- rem = check_api_key(k)
101
- show = f"{k[:6]}...{k[-4:]}"
102
- if rem and rem > 600:
103
- valid.append(k)
104
- table += f"| {show} | {rem} |\n"
105
- else:
106
- table += f"| {show} | ❌ |\n"
107
-
108
- if not valid:
109
- return "❌ Không có API key >600", None, None, table
110
-
111
- texts = split_text(text)
112
- os.makedirs("voices", exist_ok=True)
113
- for f in os.listdir("voices"):
114
- os.remove(os.path.join("voices", f))
115
-
116
- for i, t in enumerate(texts):
117
- success = False
118
- for key in valid:
119
- audio = tts(t, key, voice_id, model)
120
- if audio:
121
- with open(f"voices/voice_{i+1:03d}.{fmt}", "wb") as f:
122
- f.write(audio)
123
- success = True
124
- break
125
- if not success:
126
- return "❌ Không API key nào tạo được audio", None, None, table
127
-
128
- merged = merge_audio("voices", fmt)
129
- create_srt("voices", texts)
130
-
131
- zipf = "output.zip"
132
- with zipfile.ZipFile(zipf, "w") as z:
133
- for f in os.listdir("voices"):
134
- z.write(os.path.join("voices", f), f)
135
- z.write(merged)
136
-
137
- return "✅ HOÀN TẤT", merged, zipf, table
138
-
139
-
140
- # ================= UI =================
141
- with gr.Blocks() as app:
142
- gr.Markdown("## 🔊 ElevenLabs TTS – FIX CHUẨN HF")
143
-
144
- api_text = gr.Textbox(lines=4, label="API key")
145
- api_file = gr.File(type="binary", label="Upload API file")
146
- voice_id = gr.Textbox(label="Voice ID")
147
- text = gr.Textbox(lines=6, label="Text")
148
-
149
- model = gr.Dropdown(["eleven_multilingual_v2"], value="eleven_multilingual_v2")
150
- fmt = gr.Dropdown(["mp3", "wav"], value="mp3")
151
-
152
- btn = gr.Button("🎧 TẠO GIỌNG")
153
- status = gr.Textbox()
154
- audio = gr.Audio(type="filepath")
155
- zipf = gr.File()
156
- table = gr.Markdown()
157
-
158
- btn.click(run, [api_text, api_file, voice_id, text, model, fmt],
159
- [status, audio, zipf, table])
160
-
161
- app.launch()