Spaces:

KGNINJA
/

MLIT_DATA_PLATFORM

Runtime error

App Files Files Community

MLIT_DATA_PLATFORM / app.py

KGNINJA

Update app.py

1b9afe7 verified about 2 months ago

raw

history blame contribute delete

2.26 kB

	import io
	import numpy as np
	import librosa
	import pyworld as pw
	import gradio as gr
	from openai import OpenAI
	import soundfile as sf

	client = OpenAI()

	# ==========================
	# 1) 標準の TTS（OpenAI）
	# ==========================
	def tts_standard(text):
	response = client.audio.speech.create(
	model="gpt-4o-mini-tts",
	voice="alloy",
	input=text,
	)
	audio_bytes = response.read()

	# WAVとして読み込み
	audio, sr = sf.read(io.BytesIO(audio_bytes))
	return sr, audio.astype(np.float64)


	# ==========================
	# 2) Kansai HL F0 カーブ生成
	# ==========================
	def kansai_hl_curve(length, f0_src):
	nz = np.where(f0_src > 0)[0]
	if len(nz) < 2:
	return f0_src

	low = np.percentile(f0_src[nz], 20)
	high = np.percentile(f0_src[nz], 85)

	L = length
	p1 = int(L * 0.25)
	p2 = int(L * 0.70)
	p3 = L

	seg1 = np.linspace(low, high, p1)
	seg2 = np.linspace(high, high, p2 - p1)
	seg3 = np.linspace(high, low, p3 - p2)

	out = np.concatenate([seg1, seg2, seg3])

	if len(out) < length:
	out = np.pad(out, (0, length - len(out)), mode="edge")
	else:
	out = out[:length]

	return out


	# ==========================
	# 3) 標準 → 関西イントネーション化
	# ==========================
	def convert_to_kansai_pitch(audio, sr):
	f0, sp, ap = pw.wav2world(audio, sr)
	f0_new = kansai_hl_curve(len(f0), f0)
	y = pw.synthesize(f0_new, sp, ap, sr)
	return y.astype(np.float32)


	# ==========================
	# 4) 統合
	# ==========================
	def kansai_tts(text):
	sr, audio_std = tts_standard(text)
	audio_ks = convert_to_kansai_pitch(audio_std, sr)
	return (sr, audio_ks)


	# ==========================
	# 5) Gradio UI
	# ==========================
	with gr.Blocks() as demo:
	gr.Markdown("## 🎙 Kansign — Kansai Accent TTS（本物HLイントネーション搭載）")

	text_in = gr.Textbox(label="テキストを入力（例：なんでやねん）")
	audio_out = gr.Audio(label="関西イントネーション音声", type="numpy")
	btn = gr.Button("関西イントネーションで喋る")

	btn.click(kansai_tts, inputs=text_in, outputs=audio_out)

	demo.launch()