farid678 commited on
Commit
4655264
·
verified ·
1 Parent(s): a766e2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -15
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import torch
2
  import numpy as np
3
  import gradio as gr
4
- from transformers import pipeline
5
  import logging
6
  from scipy.io.wavfile import write
7
  import uuid
@@ -13,15 +13,24 @@ import os
13
  logging.getLogger("transformers").setLevel(logging.ERROR)
14
 
15
  # -----------------------------
16
- # LOAD PIPELINE
17
  # -----------------------------
18
  device = 0 if torch.cuda.is_available() else -1
19
- tts_pipe = pipeline(
20
- task="text-to-speech",
21
- model="canopylabs/orpheus-3b-0.1-ft",
22
- device=device
 
 
 
 
 
 
23
  )
24
 
 
 
 
25
  # -----------------------------
26
  # INFERENCE FUNCTION
27
  # -----------------------------
@@ -29,17 +38,17 @@ def tts_generate(text):
29
  if not text.strip():
30
  return None
31
 
32
- # اجرای مدل TTS
33
- output = tts_pipe(text)
34
 
35
- # بررسی خروجی
36
- if "audio" not in output:
37
- raise ValueError("TTS pipeline did not return audio")
38
 
39
- audio = np.array(output["audio"], dtype=np.float32)
40
 
41
- # بررسی sampling_rate و مقدار پیش‌فرض در صورت نبود
42
- sr = output.get("sampling_rate", 22050)
43
 
44
  # تبدیل float32 به int16 برای scipy
45
  audio_int16 = (audio * 32767).astype(np.int16)
@@ -73,7 +82,7 @@ demo = gr.Interface(
73
  placeholder=SAMPLES[0],
74
  ),
75
  outputs=gr.Audio(type="filepath", label="Generated Audio"),
76
- title="Orpheus-3B Expressive TTS",
77
  examples=[[s] for s in SAMPLES],
78
  )
79
 
 
1
  import torch
2
  import numpy as np
3
  import gradio as gr
4
+ from transformers import AutoTokenizer, AutoModelForSpeechSeq2Seq
5
  import logging
6
  from scipy.io.wavfile import write
7
  import uuid
 
13
  logging.getLogger("transformers").setLevel(logging.ERROR)
14
 
15
  # -----------------------------
16
+ # LOAD LOCAL MODEL AND TOKENIZER
17
  # -----------------------------
18
  device = 0 if torch.cuda.is_available() else -1
19
+ model_dir = "./" # مسیر محلی در Space، همان‌جایی که adapter_model.safetensors قرار دارد
20
+
21
+ # بارگذاری tokenizer
22
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
23
+
24
+ # بارگذاری مدل
25
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
26
+ model_dir,
27
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
28
+ low_cpu_mem_usage=True,
29
  )
30
 
31
+ if device >= 0:
32
+ model = model.to(f"cuda:{device}")
33
+
34
  # -----------------------------
35
  # INFERENCE FUNCTION
36
  # -----------------------------
 
38
  if not text.strip():
39
  return None
40
 
41
+ # تبدیل متن به توکن
42
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
43
 
44
+ # تولید صوت
45
+ with torch.no_grad():
46
+ audio_out = model.generate_speech(**inputs)
47
 
48
+ audio = np.array(audio_out.cpu().numpy(), dtype=np.float32)
49
 
50
+ # sampling rate پیش‌فرض
51
+ sr = 22050
52
 
53
  # تبدیل float32 به int16 برای scipy
54
  audio_int16 = (audio * 32767).astype(np.int16)
 
82
  placeholder=SAMPLES[0],
83
  ),
84
  outputs=gr.Audio(type="filepath", label="Generated Audio"),
85
+ title="Custom TTS with Adapter Model",
86
  examples=[[s] for s in SAMPLES],
87
  )
88