Update app.py
Browse files
app.py
CHANGED
|
@@ -1,33 +1,20 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import gradio as gr
|
| 3 |
-
from transformers import pipeline
|
| 4 |
-
from huggingface_hub import login
|
| 5 |
import os
|
|
|
|
|
|
|
| 6 |
import numpy as np
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# -----------------------------
|
| 9 |
-
# HUGGING FACE
|
| 10 |
# -----------------------------
|
| 11 |
-
HF_TOKEN = os.getenv(HF_TOKEN)
|
| 12 |
if HF_TOKEN is None:
|
| 13 |
-
raise ValueError("Environment variable HF_TOKEN not found.
|
| 14 |
-
|
| 15 |
-
login(HF_TOKEN)
|
| 16 |
-
|
| 17 |
-
# -----------------------------
|
| 18 |
-
# CONFIG
|
| 19 |
-
# -----------------------------
|
| 20 |
-
MODEL_NAME = "canopylabs/orpheus-3b-0.1-ft"
|
| 21 |
-
DEVICE = 0 if torch.cuda.is_available() else -1
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
tts_pipe = pipeline(
|
| 27 |
-
task="text-to-speech",
|
| 28 |
-
model=MODEL_NAME,
|
| 29 |
-
device=DEVICE,
|
| 30 |
-
use_auth_token=HF_TOKEN # دسترسی به مدل gated
|
| 31 |
)
|
| 32 |
|
| 33 |
# -----------------------------
|
|
@@ -37,10 +24,15 @@ def tts_generate(text):
|
|
| 37 |
if not text.strip():
|
| 38 |
return None
|
| 39 |
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
|
|
|
| 44 |
|
| 45 |
return (sr, audio)
|
| 46 |
|
|
@@ -66,9 +58,9 @@ demo = gr.Interface(
|
|
| 66 |
lines=4,
|
| 67 |
),
|
| 68 |
outputs=gr.Audio(label="Generated Audio"),
|
| 69 |
-
title="Orpheus 3B Text-to-Speech",
|
| 70 |
description=(
|
| 71 |
-
"English TTS using **canopylabs/orpheus-3b-0.1-ft**\n\n"
|
| 72 |
"Supported style tags examples:\n"
|
| 73 |
"- `[neutral]`\n"
|
| 74 |
"- `[expressive]`\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from huggingface_hub import InferenceClient
|
| 4 |
import numpy as np
|
| 5 |
+
import io
|
| 6 |
+
import soundfile as sf
|
| 7 |
|
| 8 |
# -----------------------------
|
| 9 |
+
# HUGGING FACE INFERENCE CLIENT
|
| 10 |
# -----------------------------
|
| 11 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 12 |
if HF_TOKEN is None:
|
| 13 |
+
raise ValueError("Environment variable HF_TOKEN not found.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
client = InferenceClient(
|
| 16 |
+
provider="fal-ai",
|
| 17 |
+
api_key=HF_TOKEN,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
)
|
| 19 |
|
| 20 |
# -----------------------------
|
|
|
|
| 24 |
if not text.strip():
|
| 25 |
return None
|
| 26 |
|
| 27 |
+
# دریافت صوت به صورت bytes
|
| 28 |
+
audio_bytes = client.text_to_speech(
|
| 29 |
+
text,
|
| 30 |
+
model="canopylabs/orpheus-3b-0.1-ft",
|
| 31 |
+
)
|
| 32 |
|
| 33 |
+
# تبدیل bytes به numpy array و نمونهبرداری
|
| 34 |
+
buffer = io.BytesIO(audio_bytes)
|
| 35 |
+
audio, sr = sf.read(buffer, dtype="float32")
|
| 36 |
|
| 37 |
return (sr, audio)
|
| 38 |
|
|
|
|
| 58 |
lines=4,
|
| 59 |
),
|
| 60 |
outputs=gr.Audio(label="Generated Audio"),
|
| 61 |
+
title="Orpheus 3B Text-to-Speech (Inference API)",
|
| 62 |
description=(
|
| 63 |
+
"English TTS using **canopylabs/orpheus-3b-0.1-ft** via Hugging Face Inference API.\n\n"
|
| 64 |
"Supported style tags examples:\n"
|
| 65 |
"- `[neutral]`\n"
|
| 66 |
"- `[expressive]`\n"
|