File size: 2,257 Bytes
019cdd5 fe864e6 d8c5e4b cc6936d 79bab02 f67ac33 019cdd5 7ce8f71 f67ac33 d8c5e4b f67ac33 cc6936d fc8c147 f67ac33 fc8c147 d8c5e4b f67ac33 79bab02 d8c5e4b f67ac33 79bab02 f67ac33 019cdd5 de052c2 f67ac33 019cdd5 de052c2 019cdd5 b8cd115 f67ac33 b8cd115 f67ac33 b8cd115 f67ac33 de052c2 f67ac33 019cdd5 f67ac33 79bab02 ff5e6bf d8c5e4b f67ac33 019cdd5 f67ac33 cc6936d 019cdd5 f67ac33 fe864e6 7ce8f71 f67ac33 7ce8f71 f67ac33 fc8c147 79bab02 f67ac33 cc6936d f67ac33 cc6936d 7ce8f71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import io
import numpy as np
import librosa
import pyworld as pw
import gradio as gr
from openai import OpenAI
import soundfile as sf
client = OpenAI()
# ==========================
# 1) 標準の TTS(OpenAI)
# ==========================
def tts_standard(text):
response = client.audio.speech.create(
model="gpt-4o-mini-tts",
voice="alloy",
input=text,
)
audio_bytes = response.read()
# WAVとして読み込み
audio, sr = sf.read(io.BytesIO(audio_bytes))
return sr, audio.astype(np.float64)
# ==========================
# 2) Kansai HL F0 カーブ生成
# ==========================
def kansai_hl_curve(length, f0_src):
nz = np.where(f0_src > 0)[0]
if len(nz) < 2:
return f0_src
low = np.percentile(f0_src[nz], 20)
high = np.percentile(f0_src[nz], 85)
L = length
p1 = int(L * 0.25)
p2 = int(L * 0.70)
p3 = L
seg1 = np.linspace(low, high, p1)
seg2 = np.linspace(high, high, p2 - p1)
seg3 = np.linspace(high, low, p3 - p2)
out = np.concatenate([seg1, seg2, seg3])
if len(out) < length:
out = np.pad(out, (0, length - len(out)), mode="edge")
else:
out = out[:length]
return out
# ==========================
# 3) 標準 → 関西イントネーション化
# ==========================
def convert_to_kansai_pitch(audio, sr):
f0, sp, ap = pw.wav2world(audio, sr)
f0_new = kansai_hl_curve(len(f0), f0)
y = pw.synthesize(f0_new, sp, ap, sr)
return y.astype(np.float32)
# ==========================
# 4) 統合
# ==========================
def kansai_tts(text):
sr, audio_std = tts_standard(text)
audio_ks = convert_to_kansai_pitch(audio_std, sr)
return (sr, audio_ks)
# ==========================
# 5) Gradio UI
# ==========================
with gr.Blocks() as demo:
gr.Markdown("## 🎙 Kansign — Kansai Accent TTS(本物HLイントネーション搭載)")
text_in = gr.Textbox(label="テキストを入力(例:なんでやねん)")
audio_out = gr.Audio(label="関西イントネーション音声", type="numpy")
btn = gr.Button("関西イントネーションで喋る")
btn.click(kansai_tts, inputs=text_in, outputs=audio_out)
demo.launch()
|