import gradio as gr
import whisper
import os
import tempfile
from pydub import AudioSegment

def transcribe_audio(file):
    # 一時ファイルの保存
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        audio = AudioSegment.from_file(file)
        audio.export(tmp.name, format="wav")
        audio_path = tmp.name

    # Whisperモデル読み込み
    model = whisper.load_model("base")  # "small", "medium", "large" でも可
    result = model.transcribe(audio_path, language='ja')

    # テキスト取得
    text_output = result.get("text", "").strip()

    # .txt ファイルとして保存
    output_path = os.path.join(tempfile.gettempdir(), "transcription_output.txt")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(text_output)

    return output_path, text_output

# Gradio インターフェース定義（ファイル＋テキスト表示）
iface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath", label="音声ファイルをアップロード"),
    outputs=[
        gr.File(label="書き起こしテキストファイル (.txt)"),
        gr.Textbox(label="文字起こし内容（画面表示）", lines=20, interactive=False)
    ],
    title="Whisper 書き起こしアプリ（TXT表示付き）",
    description="音声ファイルをアップロードすると、Whisperで文字起こしされた内容を画面表示し、テキストファイルでもダウンロードできます。"
)

if __name__ == "__main__":
    iface.launch()