Spaces:
Runtime error
Runtime error
| import io | |
| import numpy as np | |
| import librosa | |
| import pyworld as pw | |
| import gradio as gr | |
| from openai import OpenAI | |
| import soundfile as sf | |
| client = OpenAI() | |
| # ========================== | |
| # 1) 標準の TTS(OpenAI) | |
| # ========================== | |
| def tts_standard(text): | |
| response = client.audio.speech.create( | |
| model="gpt-4o-mini-tts", | |
| voice="alloy", | |
| input=text, | |
| ) | |
| audio_bytes = response.read() | |
| # WAVとして読み込み | |
| audio, sr = sf.read(io.BytesIO(audio_bytes)) | |
| return sr, audio.astype(np.float64) | |
| # ========================== | |
| # 2) Kansai HL F0 カーブ生成 | |
| # ========================== | |
| def kansai_hl_curve(length, f0_src): | |
| nz = np.where(f0_src > 0)[0] | |
| if len(nz) < 2: | |
| return f0_src | |
| low = np.percentile(f0_src[nz], 20) | |
| high = np.percentile(f0_src[nz], 85) | |
| L = length | |
| p1 = int(L * 0.25) | |
| p2 = int(L * 0.70) | |
| p3 = L | |
| seg1 = np.linspace(low, high, p1) | |
| seg2 = np.linspace(high, high, p2 - p1) | |
| seg3 = np.linspace(high, low, p3 - p2) | |
| out = np.concatenate([seg1, seg2, seg3]) | |
| if len(out) < length: | |
| out = np.pad(out, (0, length - len(out)), mode="edge") | |
| else: | |
| out = out[:length] | |
| return out | |
| # ========================== | |
| # 3) 標準 → 関西イントネーション化 | |
| # ========================== | |
| def convert_to_kansai_pitch(audio, sr): | |
| f0, sp, ap = pw.wav2world(audio, sr) | |
| f0_new = kansai_hl_curve(len(f0), f0) | |
| y = pw.synthesize(f0_new, sp, ap, sr) | |
| return y.astype(np.float32) | |
| # ========================== | |
| # 4) 統合 | |
| # ========================== | |
| def kansai_tts(text): | |
| sr, audio_std = tts_standard(text) | |
| audio_ks = convert_to_kansai_pitch(audio_std, sr) | |
| return (sr, audio_ks) | |
| # ========================== | |
| # 5) Gradio UI | |
| # ========================== | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🎙 Kansign — Kansai Accent TTS(本物HLイントネーション搭載)") | |
| text_in = gr.Textbox(label="テキストを入力(例:なんでやねん)") | |
| audio_out = gr.Audio(label="関西イントネーション音声", type="numpy") | |
| btn = gr.Button("関西イントネーションで喋る") | |
| btn.click(kansai_tts, inputs=text_in, outputs=audio_out) | |
| demo.launch() | |