Spaces:

KGNINJA
/

MLIT_DATA_PLATFORM

Runtime error

File size: 2,257 Bytes

1b9afe7
 
 
 
848980e
1b9afe7
 
848980e
1b9afe7

import io
import numpy as np
import librosa
import pyworld as pw
import gradio as gr
from openai import OpenAI
import soundfile as sf

client = OpenAI()

# ==========================
# 1) 標準の TTS（OpenAI）
# ==========================
def tts_standard(text):
    response = client.audio.speech.create(
        model="gpt-4o-mini-tts",
        voice="alloy",
        input=text,
    )
    audio_bytes = response.read()

    # WAVとして読み込み
    audio, sr = sf.read(io.BytesIO(audio_bytes))
    return sr, audio.astype(np.float64)


# ==========================
# 2) Kansai HL F0 カーブ生成
# ==========================
def kansai_hl_curve(length, f0_src):
    nz = np.where(f0_src > 0)[0]
    if len(nz) < 2:
        return f0_src

    low = np.percentile(f0_src[nz], 20)
    high = np.percentile(f0_src[nz], 85)

    L = length
    p1 = int(L * 0.25)
    p2 = int(L * 0.70)
    p3 = L

    seg1 = np.linspace(low, high, p1)
    seg2 = np.linspace(high, high, p2 - p1)
    seg3 = np.linspace(high, low, p3 - p2)

    out = np.concatenate([seg1, seg2, seg3])

    if len(out) < length:
        out = np.pad(out, (0, length - len(out)), mode="edge")
    else:
        out = out[:length]

    return out


# ==========================
# 3) 標準 → 関西イントネーション化
# ==========================
def convert_to_kansai_pitch(audio, sr):
    f0, sp, ap = pw.wav2world(audio, sr)
    f0_new = kansai_hl_curve(len(f0), f0)
    y = pw.synthesize(f0_new, sp, ap, sr)
    return y.astype(np.float32)


# ==========================
# 4) 統合
# ==========================
def kansai_tts(text):
    sr, audio_std = tts_standard(text)
    audio_ks = convert_to_kansai_pitch(audio_std, sr)
    return (sr, audio_ks)


# ==========================
# 5) Gradio UI
# ==========================
with gr.Blocks() as demo:
    gr.Markdown("## 🎙 Kansign — Kansai Accent TTS（本物HLイントネーション搭載）")

    text_in = gr.Textbox(label="テキストを入力（例：なんでやねん）")
    audio_out = gr.Audio(label="関西イントネーション音声", type="numpy")
    btn = gr.Button("関西イントネーションで喋る")

    btn.click(kansai_tts, inputs=text_in, outputs=audio_out)

demo.launch()