Spaces:

ek15072809
/

Qwen-ASR

Sleeping

App Files Files Community

Qwen-ASR / app.py

ek15072809

Update app.py

6bb0659 verified about 1 month ago

raw

history blame contribute delete

2.45 kB

	import gradio as gr
	import torch
	import numpy as np
	import tempfile
	import os
	from scipy.io import wavfile
	from qwen_asr import Qwen3ASRModel

	# --- モデルの準備 ---
	model_id = "Qwen/Qwen3-ASR-0.6B"
	print("モデルを読み込み中 (CPU)...")
	model = Qwen3ASRModel.from_pretrained(
	model_id,
	torch_dtype=torch.float32,
	device_map="cpu"
	)
	print("ロード完了")

	def transcribe_streaming(audio, history):
	if audio is None:
	return history, history

	# audio は (sampling_rate, numpy_array)
	sr, y = audio

	# 1. 一時ファイルを作成して保存
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	temp_path = tmp.name
	# numpy 配列を WAV ファイルとして保存
	wavfile.write(temp_path, sr, y)

	try:
	# 2. ファイルパスをモデルに渡す
	results = model.transcribe(audio=temp_path)
	new_text = results[0].text

	# 3. 履歴の更新
	if new_text.strip():
	# 前のテキストと繋げる（重複が気になる場合はここを調整）
	updated_history = history + " " + new_text
	return updated_history, updated_history

	except Exception as e:
	print(f"Error during transcription: {e}")
	return history, history
	finally:
	# 使用済みの一時ファイルを削除
	if os.path.exists(temp_path):
	os.remove(temp_path)

	return history, history

	# --- UI構築 ---
	with gr.Blocks() as demo: # themeは最後に移動
	gr.Markdown("# Qwen3-ASR ストリーミング文字起こし")

	history_state = gr.State("")

	with gr.Row():
	audio_input = gr.Audio(
	sources=["microphone"],
	type="numpy",
	streaming=True,
	label="話している間、順次文字起こしされます"
	)

	output_text = gr.Textbox(label="結果", interactive=False)

	audio_input.stream(
	fn=transcribe_streaming,
	inputs=[audio_input, history_state],
	outputs=[history_state, output_text],
	show_progress="hidden",
	time_limit=10 # 5秒おきに送信
	)

	clear_btn = gr.Button("リセット")
	clear_btn.click(lambda: ("", ""), None, [history_state, output_text])

	# Gradio 6.0以降の警告に従い、themeはlaunchに渡すのが推奨です
	demo.launch(theme=gr.themes.Soft())