Spaces:
Runtime error
Runtime error
File size: 2,257 Bytes
1b9afe7 848980e 1b9afe7 848980e 1b9afe7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import io
import numpy as np
import librosa
import pyworld as pw
import gradio as gr
from openai import OpenAI
import soundfile as sf
client = OpenAI()
# ==========================
# 1) 標準の TTS(OpenAI)
# ==========================
def tts_standard(text):
response = client.audio.speech.create(
model="gpt-4o-mini-tts",
voice="alloy",
input=text,
)
audio_bytes = response.read()
# WAVとして読み込み
audio, sr = sf.read(io.BytesIO(audio_bytes))
return sr, audio.astype(np.float64)
# ==========================
# 2) Kansai HL F0 カーブ生成
# ==========================
def kansai_hl_curve(length, f0_src):
nz = np.where(f0_src > 0)[0]
if len(nz) < 2:
return f0_src
low = np.percentile(f0_src[nz], 20)
high = np.percentile(f0_src[nz], 85)
L = length
p1 = int(L * 0.25)
p2 = int(L * 0.70)
p3 = L
seg1 = np.linspace(low, high, p1)
seg2 = np.linspace(high, high, p2 - p1)
seg3 = np.linspace(high, low, p3 - p2)
out = np.concatenate([seg1, seg2, seg3])
if len(out) < length:
out = np.pad(out, (0, length - len(out)), mode="edge")
else:
out = out[:length]
return out
# ==========================
# 3) 標準 → 関西イントネーション化
# ==========================
def convert_to_kansai_pitch(audio, sr):
f0, sp, ap = pw.wav2world(audio, sr)
f0_new = kansai_hl_curve(len(f0), f0)
y = pw.synthesize(f0_new, sp, ap, sr)
return y.astype(np.float32)
# ==========================
# 4) 統合
# ==========================
def kansai_tts(text):
sr, audio_std = tts_standard(text)
audio_ks = convert_to_kansai_pitch(audio_std, sr)
return (sr, audio_ks)
# ==========================
# 5) Gradio UI
# ==========================
with gr.Blocks() as demo:
gr.Markdown("## 🎙 Kansign — Kansai Accent TTS(本物HLイントネーション搭載)")
text_in = gr.Textbox(label="テキストを入力(例:なんでやねん)")
audio_out = gr.Audio(label="関西イントネーション音声", type="numpy")
btn = gr.Button("関西イントネーションで喋る")
btn.click(kansai_tts, inputs=text_in, outputs=audio_out)
demo.launch()
|