KGNINJA commited on
Commit
ff5e6bf
·
verified ·
1 Parent(s): fc8c147

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -37
app.py CHANGED
@@ -2,9 +2,9 @@ import os
2
  import numpy as np
3
  import librosa
4
  import pyworld as pw
5
- import soundfile as sf
6
  import gradio as gr
7
  from openai import OpenAI
 
8
 
9
  # -----------------------------
10
  # OpenAI Client
@@ -14,66 +14,64 @@ client = OpenAI(api_key=OPENAI_API_KEY)
14
 
15
 
16
  # ==========================
17
- # 1) OpenAI 標準音声 TTS
18
  # ==========================
19
  def tts_standard(text):
20
  """
21
- OpenAI gpt-4o-mini-ttsで基本音声生成
22
  """
23
  response = client.audio.speech.create(
24
  model="gpt-4o-mini-tts",
25
  voice="alloy",
26
- input=text
 
27
  )
28
  audio_bytes = response.read()
29
-
30
- # WAVとして読み込む
31
- sr, audio = sf.read(
32
- sf.SoundFile(
33
- io.BytesIO(audio_bytes),
34
- mode='r'
35
- )
36
- )
37
-
38
- return sr, audio.astype(np.float32)
39
 
40
 
41
  # ==========================
42
- # 2) F0を関西イントネーションHL型に変換
43
  # ==========================
44
- def convert_to_kansai_pitch(audio, sr):
45
  """
46
- 標準TTS音声関西弁イトネション(HLパターン)へ変換
47
  """
48
 
49
- # WORLD分解
 
 
 
50
  f0, sp, ap = pw.wav2world(audio.astype(np.float64), sr)
51
 
52
- # 有効フレーム
53
  nonzero_idx = np.where(f0 > 0)[0]
54
  if len(nonzero_idx) < 2:
55
- return audio
56
 
57
- # 基本周波数の範囲
58
  high = np.max(f0[nonzero_idx])
59
  low = np.min(f0[nonzero_idx])
60
 
61
- # HLパターン(前半高 → 後半低
62
- hl_curve = np.linspace(high, low, len(f0))
63
- f0_new = hl_curve
64
 
65
- # WORLD再合成
66
- y = pw.synthesize(f0_new, sp, ap, sr)
67
- return y.astype(np.float32)
 
 
 
 
 
68
 
69
 
70
  # ==========================
71
- # 3) 統合:Kansai TTS
72
  # ==========================
73
  def kansai_tts(text):
74
- sr, audio_std = tts_standard(text)
75
- audio_kansai = convert_to_kansai_pitch(audio_std, sr)
76
- return (sr, audio_kansai)
 
77
 
78
 
79
  # ==========================
@@ -82,15 +80,11 @@ def kansai_tts(text):
82
  with gr.Blocks() as demo:
83
  gr.Markdown("## 🔊 Kansign — Kansai Accent TTS(OpenAI版・安定稼働)")
84
 
85
- text_in = gr.Textbox(
86
- label="テキストを入力(例:なんでやねん)",
87
- value="なんでやねん"
88
- )
89
 
90
- audio_out = gr.Audio(label="関西イントネーション音声", type="numpy")
91
 
92
  btn = gr.Button("関西イントネーションで喋る")
93
  btn.click(kansai_tts, inputs=text_in, outputs=audio_out)
94
 
95
-
96
  demo.launch()
 
2
  import numpy as np
3
  import librosa
4
  import pyworld as pw
 
5
  import gradio as gr
6
  from openai import OpenAI
7
+ import io
8
 
9
  # -----------------------------
10
  # OpenAI Client
 
14
 
15
 
16
  # ==========================
17
+ # 1) 標準TTS(OpenAI)
18
  # ==========================
19
  def tts_standard(text):
20
  """
21
+ OpenAIのTTS → WAVバイナリ返す
22
  """
23
  response = client.audio.speech.create(
24
  model="gpt-4o-mini-tts",
25
  voice="alloy",
26
+ input=text,
27
+ format="wav"
28
  )
29
  audio_bytes = response.read()
30
+ return audio_bytes
 
 
 
 
 
 
 
 
 
31
 
32
 
33
  # ==========================
34
+ # 2) F0を関西イントネーション変換
35
  # ==========================
36
+ def convert_to_kansai_pitch(wav_bytes):
37
  """
38
+ WAVのバイナリを読み込みF0変形 → WAVに再エ
39
  """
40
 
41
+ # BytesIO 読み込み
42
+ audio, sr = librosa.load(io.BytesIO(wav_bytes), sr=None)
43
+
44
+ # WORLD分解
45
  f0, sp, ap = pw.wav2world(audio.astype(np.float64), sr)
46
 
 
47
  nonzero_idx = np.where(f0 > 0)[0]
48
  if len(nonzero_idx) < 2:
49
+ return wav_bytes # 再合成せず返す
50
 
 
51
  high = np.max(f0[nonzero_idx])
52
  low = np.min(f0[nonzero_idx])
53
 
54
+ # HL曲線(関西イントネション)
55
+ f0_new = np.linspace(high, low, len(f0))
 
56
 
57
+ # WORLD再合成
58
+ y = pw.synthesize(f0_new, sp, ap, sr).astype(np.float32)
59
+
60
+ # WAVに戻す
61
+ out_buf = io.BytesIO()
62
+ import soundfile as sf
63
+ sf.write(out_buf, y, sr, format="WAV")
64
+ return out_buf.getvalue()
65
 
66
 
67
  # ==========================
68
+ # 3) Kansai TTS(統合)
69
  # ==========================
70
  def kansai_tts(text):
71
+ wav = tts_standard(text)
72
+ wav_kansai = convert_to_kansai_pitch(wav)
73
+ # Gradio は (sr, audio_bytes) or just audio_bytes が使える
74
+ return wav_kansai
75
 
76
 
77
  # ==========================
 
80
  with gr.Blocks() as demo:
81
  gr.Markdown("## 🔊 Kansign — Kansai Accent TTS(OpenAI版・安定稼働)")
82
 
83
+ text_in = gr.Textbox(label="テキストを入力(例:なんでやねん)", value="なんでやねん")
 
 
 
84
 
85
+ audio_out = gr.Audio(label="関西イントネーション音声", type="filepath")
86
 
87
  btn = gr.Button("関西イントネーションで喋る")
88
  btn.click(kansai_tts, inputs=text_in, outputs=audio_out)
89
 
 
90
  demo.launch()