Spaces:

KGNINJA
/

Kansign

Sleeping

App Files Files Community

KGNINJA commited on Nov 15, 2025

Commit

fc8c147

verified ·

1 Parent(s): ea83657

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -17

app.py CHANGED Viewed

@@ -1,42 +1,64 @@
 import numpy as np
 import librosa
 import pyworld as pw
 import soundfile as sf
-from transformers import AutoProcessor, AutoModel
 import gradio as gr
-from transformers import pipeline
-# 軽量TTS
-tts = pipeline("text-to-speech", model="p0p0pp/KoeTTS")
 def tts_standard(text):
-    result = tts(text)
-    audio = result["audio"]
-    return (22050, audio)
 # ==========================
-# 2) F0 を関西イントネーションへ強制変換
 # ==========================
 def convert_to_kansai_pitch(audio, sr):
-    """標準TTSの音声 → 関西イントネーションHL型へ"""
     # WORLDで分解
     f0, sp, ap = pw.wav2world(audio.astype(np.float64), sr)
-    # HL型ピッチカーブへの変換
     nonzero_idx = np.where(f0 > 0)[0]
     if len(nonzero_idx) < 2:
         return audio
-    # 基本ピッチ
     high = np.max(f0[nonzero_idx])
     low = np.min(f0[nonzero_idx])
-    # HLパターン生成：最初高く → 後半下がる
     hl_curve = np.linspace(high, low, len(f0))
     f0_new = hl_curve
@@ -46,7 +68,7 @@ def convert_to_kansai_pitch(audio, sr):
 # ==========================
-# 3) 統合関数
 # ==========================
 def kansai_tts(text):
     sr, audio_std = tts_standard(text)
@@ -58,8 +80,13 @@ def kansai_tts(text):
 # Gradio UI
 # ==========================
 with gr.Blocks() as demo:
-    gr.Markdown("## 🔊 Kansign — Kansai Accent TTS（ベータ版）")
-    text_in = gr.Textbox(label="テキストを入力（例：なんでやねん）")
     audio_out = gr.Audio(label="関西イントネーション音声", type="numpy")
     btn = gr.Button("関西イントネーションで喋る")

+import os
 import numpy as np
 import librosa
 import pyworld as pw
 import soundfile as sf
 import gradio as gr
+from openai import OpenAI
+# -----------------------------
+# OpenAI Client
+# -----------------------------
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+client = OpenAI(api_key=OPENAI_API_KEY)
+# ==========================
+# 1) OpenAI 標準音声 TTS
+# ==========================
 def tts_standard(text):
+    """
+    OpenAI gpt-4o-mini-ttsで基本音声を生成
+    """
+    response = client.audio.speech.create(
+        model="gpt-4o-mini-tts",
+        voice="alloy",
+        input=text
+    )
+    audio_bytes = response.read()
+    # WAVとして読み込む
+    sr, audio = sf.read(
+        sf.SoundFile(
+            io.BytesIO(audio_bytes),
+            mode='r'
+        )
+    )
+    return sr, audio.astype(np.float32)
 # ==========================
+# 2) F0を関西イントネーションHL型に変換
 # ==========================
 def convert_to_kansai_pitch(audio, sr):
+    """
+    標準TTS音声 → 関西弁イントネーション（HLパターン）へ変換
+    """
     # WORLDで分解
     f0, sp, ap = pw.wav2world(audio.astype(np.float64), sr)
+    # 有効フレーム
     nonzero_idx = np.where(f0 > 0)[0]
     if len(nonzero_idx) < 2:
         return audio
+    # 基本周波数の範囲
     high = np.max(f0[nonzero_idx])
     low = np.min(f0[nonzero_idx])
+    # HLパターン（前半高 → 後半低）
     hl_curve = np.linspace(high, low, len(f0))
     f0_new = hl_curve
 # ==========================
+# 3) 統合：Kansai TTS
 # ==========================
 def kansai_tts(text):
     sr, audio_std = tts_standard(text)
 # Gradio UI
 # ==========================
 with gr.Blocks() as demo:
+    gr.Markdown("## 🔊 Kansign — Kansai Accent TTS（OpenAI版・安定稼働）")
+    text_in = gr.Textbox(
+        label="テキストを入力（例：なんでやねん）",
+        value="なんでやねん"
+    )
     audio_out = gr.Audio(label="関西イントネーション音声", type="numpy")
     btn = gr.Button("関西イントネーションで喋る")