KGNINJA commited on
Commit
79bab02
·
verified ·
1 Parent(s): ff5e6bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -52
app.py CHANGED
@@ -2,86 +2,80 @@ import os
2
  import numpy as np
3
  import librosa
4
  import pyworld as pw
5
- import gradio as gr
6
  from openai import OpenAI
7
- import io
 
8
 
9
- # -----------------------------
10
- # OpenAI Client
11
- # -----------------------------
12
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
- client = OpenAI(api_key=OPENAI_API_KEY)
14
 
15
 
16
- # ==========================
17
- # 1) 標準TTS(OpenAI)
18
- # ==========================
19
  def tts_standard(text):
20
- """
21
- OpenAIのTTS → WAVバイナリを返す
22
- """
23
  response = client.audio.speech.create(
24
  model="gpt-4o-mini-tts",
25
  voice="alloy",
26
- input=text,
27
- format="wav"
28
  )
29
  audio_bytes = response.read()
30
- return audio_bytes
31
 
 
 
 
 
 
 
 
 
32
 
33
- # ==========================
34
- # 2) F0を関西イントネーションへ変換
35
- # ==========================
36
- def convert_to_kansai_pitch(wav_bytes):
37
- """
38
- WAVのバイナリを読み込み → F0変形 → WAVに再エンコード
39
- """
40
 
41
- # BytesIO で読み込み
42
- audio, sr = librosa.load(io.BytesIO(wav_bytes), sr=None)
 
 
 
43
 
44
- # WORLD分解
45
  f0, sp, ap = pw.wav2world(audio.astype(np.float64), sr)
46
 
47
- nonzero_idx = np.where(f0 > 0)[0]
48
- if len(nonzero_idx) < 2:
49
- return wav_bytes # 再合成せず返す
50
 
51
- high = np.max(f0[nonzero_idx])
52
- low = np.min(f0[nonzero_idx])
53
 
54
- # HL曲線関西イントネーション
55
  f0_new = np.linspace(high, low, len(f0))
56
 
57
- # WORLD再合成
58
- y = pw.synthesize(f0_new, sp, ap, sr).astype(np.float32)
59
 
60
- # WAVに戻す
61
- out_buf = io.BytesIO()
62
- import soundfile as sf
63
- sf.write(out_buf, y, sr, format="WAV")
64
- return out_buf.getvalue()
65
 
66
-
67
- # ==========================
68
- # 3) Kansai TTS(統合)
69
- # ==========================
70
  def kansai_tts(text):
71
- wav = tts_standard(text)
72
- wav_kansai = convert_to_kansai_pitch(wav)
73
- # Gradio は (sr, audio_bytes) or just audio_bytes が使える
74
- return wav_kansai
 
 
75
 
 
76
 
77
- # ==========================
78
- # Gradio UI
79
- # ==========================
 
80
  with gr.Blocks() as demo:
81
  gr.Markdown("## 🔊 Kansign — Kansai Accent TTS(OpenAI版・安定稼働)")
82
 
83
- text_in = gr.Textbox(label="テキストを入力(例:なんでやねん)", value="なんでやねん")
84
-
85
  audio_out = gr.Audio(label="関西イントネーション音声", type="filepath")
86
 
87
  btn = gr.Button("関西イントネーションで喋る")
 
2
  import numpy as np
3
  import librosa
4
  import pyworld as pw
5
+ import soundfile as sf
6
  from openai import OpenAI
7
+ import gradio as gr
8
+ import tempfile
9
 
10
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
 
11
 
12
 
13
+ # ----------------------------------------
14
+ # 1) OpenAI 標準TTS
15
+ # ----------------------------------------
16
  def tts_standard(text):
17
+ """OpenAIの標準TTSをWAVとして取得"""
18
+
 
19
  response = client.audio.speech.create(
20
  model="gpt-4o-mini-tts",
21
  voice="alloy",
22
+ input=text
 
23
  )
24
  audio_bytes = response.read()
 
25
 
26
+ # 一度 temp wav に保存して librosa で読み込む
27
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
28
+ tmp.write(audio_bytes)
29
+ tmp_path = tmp.name
30
+
31
+ audio, sr = librosa.load(tmp_path, sr=22050)
32
+
33
+ return sr, audio
34
 
 
 
 
 
 
 
 
35
 
36
+ # ----------------------------------------
37
+ # 2) F0 を関西イントネーション(HL型)へ変換
38
+ # ----------------------------------------
39
+ def convert_to_kansai_pitch(audio, sr):
40
+ """標準TTSの音声 → 関西イントネーションHL型へ"""
41
 
 
42
  f0, sp, ap = pw.wav2world(audio.astype(np.float64), sr)
43
 
44
+ nz = np.where(f0 > 0)[0]
45
+ if len(nz) < 2:
46
+ return audio
47
 
48
+ high = np.max(f0[nz])
49
+ low = np.min(f0[nz])
50
 
51
+ # HL(High → Lowへ滑らかに下降
52
  f0_new = np.linspace(high, low, len(f0))
53
 
54
+ y = pw.synthesize(f0_new, sp, ap, sr)
55
+ return y.astype(np.float32)
56
 
 
 
 
 
 
57
 
58
+ # ----------------------------------------
59
+ # 3) Kansai Accent TTS コア関数
60
+ # ----------------------------------------
 
61
  def kansai_tts(text):
62
+ sr, audio_std = tts_standard(text)
63
+ audio_kansai = convert_to_kansai_pitch(audio_std, sr)
64
+
65
+ # 出力 wav を一時ファイルに保存
66
+ out_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
67
+ sf.write(out_path, audio_kansai, sr)
68
 
69
+ return out_path
70
 
71
+
72
+ # ----------------------------------------
73
+ # 4) Gradio UI
74
+ # ----------------------------------------
75
  with gr.Blocks() as demo:
76
  gr.Markdown("## 🔊 Kansign — Kansai Accent TTS(OpenAI版・安定稼働)")
77
 
78
+ text_in = gr.Textbox(label="テキストを入力(例:なんでやねん)")
 
79
  audio_out = gr.Audio(label="関西イントネーション音声", type="filepath")
80
 
81
  btn = gr.Button("関西イントネーションで喋る")