Spaces:

farid678
/

TTSModel

Runtime error

App Files Files Community

farid678 commited on Dec 27, 2025

Commit

4655264

verified ·

1 Parent(s): a766e2a

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -15

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
 import numpy as np
 import gradio as gr
-from transformers import pipeline
 import logging
 from scipy.io.wavfile import write
 import uuid
@@ -13,15 +13,24 @@ import os
 logging.getLogger("transformers").setLevel(logging.ERROR)
 # -----------------------------
-# LOAD PIPELINE
 # -----------------------------
 device = 0 if torch.cuda.is_available() else -1
-tts_pipe = pipeline(
-    task="text-to-speech",
-    model="canopylabs/orpheus-3b-0.1-ft",
-    device=device
 )
 # -----------------------------
 # INFERENCE FUNCTION
 # -----------------------------
@@ -29,17 +38,17 @@ def tts_generate(text):
     if not text.strip():
         return None
-    # اجرای مدل TTS
-    output = tts_pipe(text)
-    # بررسی خروجی
-    if "audio" not in output:
-        raise ValueError("TTS pipeline did not return audio")
-    audio = np.array(output["audio"], dtype=np.float32)
-    # بررسی sampling_rate و مقدار پیش‌فرض در صورت نبود
-    sr = output.get("sampling_rate", 22050)
     # تبدیل float32 به int16 برای scipy
     audio_int16 = (audio * 32767).astype(np.int16)
@@ -73,7 +82,7 @@ demo = gr.Interface(
         placeholder=SAMPLES[0],
     ),
     outputs=gr.Audio(type="filepath", label="Generated Audio"),
-    title="Orpheus-3B Expressive TTS",
     examples=[[s] for s in SAMPLES],
 )

 import torch
 import numpy as np
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSpeechSeq2Seq
 import logging
 from scipy.io.wavfile import write
 import uuid
 logging.getLogger("transformers").setLevel(logging.ERROR)
 # -----------------------------
+# LOAD LOCAL MODEL AND TOKENIZER
 # -----------------------------
 device = 0 if torch.cuda.is_available() else -1
+model_dir = "./"  # مسیر محلی در Space، همان‌جایی که adapter_model.safetensors قرار دارد
+# بارگذاری tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_dir)
+# بارگذاری مدل
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_dir,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    low_cpu_mem_usage=True,
 )
+if device >= 0:
+    model = model.to(f"cuda:{device}")
 # -----------------------------
 # INFERENCE FUNCTION
 # -----------------------------
     if not text.strip():
         return None
+    # تبدیل متن به توکن
+    inputs = tokenizer(text, return_tensors="pt").to(model.device)
+    # تولید صوت
+    with torch.no_grad():
+        audio_out = model.generate_speech(**inputs)
+    audio = np.array(audio_out.cpu().numpy(), dtype=np.float32)
+    # sampling rate پیش‌فرض
+    sr = 22050
     # تبدیل float32 به int16 برای scipy
     audio_int16 = (audio * 32767).astype(np.int16)
         placeholder=SAMPLES[0],
     ),
     outputs=gr.Audio(type="filepath", label="Generated Audio"),
+    title="Custom TTS with Adapter Model",
     examples=[[s] for s in SAMPLES],
 )