KGNINJA commited on
Commit
fc8c147
·
verified ·
1 Parent(s): ea83657

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -17
app.py CHANGED
@@ -1,42 +1,64 @@
 
1
  import numpy as np
2
  import librosa
3
  import pyworld as pw
4
  import soundfile as sf
5
- from transformers import AutoProcessor, AutoModel
6
  import gradio as gr
 
7
 
 
 
 
 
 
8
 
9
- from transformers import pipeline
10
-
11
- # 軽量TTS
12
- tts = pipeline("text-to-speech", model="p0p0pp/KoeTTS")
13
 
 
 
 
14
  def tts_standard(text):
15
- result = tts(text)
16
- audio = result["audio"]
17
- return (22050, audio)
18
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
  # ==========================
22
- # 2) F0 を関西イントネーションへ強制変換
23
  # ==========================
24
  def convert_to_kansai_pitch(audio, sr):
25
- """標準TTSの音声 → 関西イントネーションHL型へ"""
 
 
26
 
27
  # WORLDで分解
28
  f0, sp, ap = pw.wav2world(audio.astype(np.float64), sr)
29
 
30
- # HL型ピッチカブへの変換
31
  nonzero_idx = np.where(f0 > 0)[0]
32
  if len(nonzero_idx) < 2:
33
  return audio
34
 
35
- # 基本ピッチ
36
  high = np.max(f0[nonzero_idx])
37
  low = np.min(f0[nonzero_idx])
38
 
39
- # HLパターン生成:最初 → 後半下がる
40
  hl_curve = np.linspace(high, low, len(f0))
41
  f0_new = hl_curve
42
 
@@ -46,7 +68,7 @@ def convert_to_kansai_pitch(audio, sr):
46
 
47
 
48
  # ==========================
49
- # 3) 統合関数
50
  # ==========================
51
  def kansai_tts(text):
52
  sr, audio_std = tts_standard(text)
@@ -58,8 +80,13 @@ def kansai_tts(text):
58
  # Gradio UI
59
  # ==========================
60
  with gr.Blocks() as demo:
61
- gr.Markdown("## 🔊 Kansign — Kansai Accent TTS(ベータ版)")
62
- text_in = gr.Textbox(label="テキストを入力(例:なんでやねん)")
 
 
 
 
 
63
  audio_out = gr.Audio(label="関西イントネーション音声", type="numpy")
64
 
65
  btn = gr.Button("関西イントネーションで喋る")
 
1
+ import os
2
  import numpy as np
3
  import librosa
4
  import pyworld as pw
5
  import soundfile as sf
 
6
  import gradio as gr
7
+ from openai import OpenAI
8
 
9
+ # -----------------------------
10
+ # OpenAI Client
11
+ # -----------------------------
12
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
+ client = OpenAI(api_key=OPENAI_API_KEY)
14
 
 
 
 
 
15
 
16
+ # ==========================
17
+ # 1) OpenAI 標準音声 TTS
18
+ # ==========================
19
  def tts_standard(text):
20
+ """
21
+ OpenAI gpt-4o-mini-ttsで基本音声を生成
22
+ """
23
+ response = client.audio.speech.create(
24
+ model="gpt-4o-mini-tts",
25
+ voice="alloy",
26
+ input=text
27
+ )
28
+ audio_bytes = response.read()
29
+
30
+ # WAVとして読み込む
31
+ sr, audio = sf.read(
32
+ sf.SoundFile(
33
+ io.BytesIO(audio_bytes),
34
+ mode='r'
35
+ )
36
+ )
37
+
38
+ return sr, audio.astype(np.float32)
39
 
40
 
41
  # ==========================
42
+ # 2) F0を関西イントネーションHL型に変換
43
  # ==========================
44
  def convert_to_kansai_pitch(audio, sr):
45
+ """
46
+ 標準TTS音声 → 関西弁イントネーション(HLパターン)へ変換
47
+ """
48
 
49
  # WORLDで分解
50
  f0, sp, ap = pw.wav2world(audio.astype(np.float64), sr)
51
 
52
+ # 有効フレ
53
  nonzero_idx = np.where(f0 > 0)[0]
54
  if len(nonzero_idx) < 2:
55
  return audio
56
 
57
+ # 基本周波数の範囲
58
  high = np.max(f0[nonzero_idx])
59
  low = np.min(f0[nonzero_idx])
60
 
61
+ # HLパターン(前半高 → 後半低)
62
  hl_curve = np.linspace(high, low, len(f0))
63
  f0_new = hl_curve
64
 
 
68
 
69
 
70
  # ==========================
71
+ # 3) 統合:Kansai TTS
72
  # ==========================
73
  def kansai_tts(text):
74
  sr, audio_std = tts_standard(text)
 
80
  # Gradio UI
81
  # ==========================
82
  with gr.Blocks() as demo:
83
+ gr.Markdown("## 🔊 Kansign — Kansai Accent TTS(OpenAI・安定稼働)")
84
+
85
+ text_in = gr.Textbox(
86
+ label="テキストを入力(例:なんでやねん)",
87
+ value="なんでやねん"
88
+ )
89
+
90
  audio_out = gr.Audio(label="関西イントネーション音声", type="numpy")
91
 
92
  btn = gr.Button("関西イントネーションで喋る")