Spaces:

Teera
/

conversation-extraction

Sleeping

App Files Files Community

conversation-extraction / app.py

Teera

ก

8b1d8cc verified 20 days ago

raw

history blame contribute delete

7.58 kB

	import os
	import sys
	import azure.cognitiveservices.speech as speechsdk
	from dotenv import load_dotenv

	load_dotenv()

	SPEECH_KEY = os.getenv("SPEECH_KEY")
	SPEECH_REGION = os.getenv("SPEECH_REGION", "eastus")


	def create_speech_config(language="th-TH"):
	"""Create a SpeechConfig with the given language."""
	config = speechsdk.SpeechConfig(
	subscription=SPEECH_KEY,
	region=SPEECH_REGION,
	)
	config.speech_recognition_language = language
	return config


	def transcribe_from_mic():
	"""Transcribe from the local microphone (CLI mode)."""
	speech_config = create_speech_config("th-TH")
	audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
	recognizer = speechsdk.SpeechRecognizer(
	speech_config=speech_config,
	audio_config=audio_config,
	)

	print("🎤 Listening... Speak into your microphone.")
	result = recognizer.recognize_once()

	if result.reason == speechsdk.ResultReason.RecognizedSpeech:
	print("✅ Recognized: " + result.text)
	elif result.reason == speechsdk.ResultReason.NoMatch:
	print("❌ No speech could be recognized: " + str(result.no_match_details))
	elif result.reason == speechsdk.ResultReason.Canceled:
	cancellation_details = result.cancellation_details
	print("⚠️ Speech recognition canceled: " + str(cancellation_details.reason))
	if cancellation_details.reason == speechsdk.CancellationReason.Error:
	print("Error details: " + str(cancellation_details.error_details))
	print("Did you set the speech resource key and region?")


	def transcribe_audio_file(audio_path, language="th-TH"):
	"""Transcribe an audio file using Azure Speech SDK."""
	if audio_path is None:
	return "⚠️ กรุณาอัดเสียงก่อน"

	speech_config = create_speech_config(language)
	audio_config = speechsdk.audio.AudioConfig(filename=audio_path)
	recognizer = speechsdk.SpeechRecognizer(
	speech_config=speech_config,
	audio_config=audio_config,
	)

	# Use continuous recognition to get the full transcript
	all_results = []
	done = False

	def on_recognized(evt):
	if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
	all_results.append(evt.result.text)

	def on_canceled(evt):
	nonlocal done
	done = True

	def on_stopped(evt):
	nonlocal done
	done = True

	recognizer.recognized.connect(on_recognized)
	recognizer.canceled.connect(on_canceled)
	recognizer.session_stopped.connect(on_stopped)

	recognizer.start_continuous_recognition()

	import time
	while not done:
	time.sleep(0.1)

	recognizer.stop_continuous_recognition()

	if all_results:
	return "\n".join(all_results)
	else:
	return "❌ ไม่สามารถถอดเสียงได้ — ลองพูดดังขึ้นหรือตรวจสอบไมค์"


	def transcribe_and_analyze(audio_path, language):
	"""Transcribe audio, then analyze with LLM. Returns (transcript, analysis_json)."""
	transcript = transcribe_audio_file(audio_path, language)

	if transcript.startswith("❌") or transcript.startswith("⚠️"):
	return transcript, ""

	from llm_client import analyze_football_content, format_analysis_result
	result = analyze_football_content(transcript)
	analysis_json = format_analysis_result(result)

	return transcript, analysis_json


	def analyze_text_only(transcript):
	"""Analyze existing transcript text without re-transcribing."""
	if not transcript or not transcript.strip():
	return "⚠️ กรุณาใส่ข้อความก่อน"

	from llm_client import analyze_football_content, format_analysis_result
	result = analyze_football_content(transcript)
	return format_analysis_result(result)


	def run_web():
	"""Run the Gradio web UI."""
	import gradio as gr

	with gr.Blocks(
	title="ASR - Football Analysis",
	theme=gr.themes.Soft(
	primary_hue=gr.themes.colors.indigo,
	secondary_hue=gr.themes.colors.purple,
	neutral_hue=gr.themes.colors.slate,
	),
	css="""
	.gradio-container {
	max-width: 900px !important;
	margin: auto !important;
	}
	""",
	) as app:

	gr.Markdown(
	"""
	# ⚽ Football Speech Analyzer
	### ถอดเสียงพูด + วิเคราะห์เนื้อหาฟุตบอลด้วย AI
	---
	"""
	)

	with gr.Row():
	language = gr.Dropdown(
	choices=[
	("🇹🇭 ไทย", "th-TH"),
	("🇺🇸 English", "en-US"),
	("🇯🇵 日本語", "ja-JP"),
	("🇨🇳 中文", "zh-CN"),
	("🇰🇷 한국어", "ko-KR"),
	],
	value="th-TH",
	label="ภาษา",
	interactive=True,
	)

	gr.Markdown("### 🎤 อัดเสียงจากไมค์")
	audio_input = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="กดปุ่มอัดเสียง หรืออัปโหลดไฟล์เสียง",
	)

	with gr.Row():
	transcribe_btn = gr.Button(
	"✨ ถอดเสียงอย่างเดียว",
	variant="secondary",
	size="lg",
	)
	full_btn = gr.Button(
	"⚽ ถอดเสียง + วิเคราะห์ฟุตบอล",
	variant="primary",
	size="lg",
	)

	gr.Markdown("### 📝 ข้อความที่ถอดได้")
	output_text = gr.Textbox(
	label="Transcript",
	lines=6,
	show_copy_button=True,
	placeholder="ผลการถอดเสียงจะแสดงที่นี่...",
	)

	gr.Markdown("### 🧠 ผลวิเคราะห์จาก AI")
	with gr.Row():
	analyze_btn = gr.Button(
	"🔄 วิเคราะห์ข้อความข้างบนอีกครั้ง",
	variant="secondary",
	size="sm",
	)

	analysis_output = gr.Code(
	label="Football Analysis (JSON)",
	language="json",
	lines=20,
	)

	# --- Events ---

	# Transcribe only
	transcribe_btn.click(
	fn=transcribe_audio_file,
	inputs=[audio_input, language],
	outputs=output_text,
	)

	# Transcribe + Analyze
	full_btn.click(
	fn=transcribe_and_analyze,
	inputs=[audio_input, language],
	outputs=[output_text, analysis_output],
	)

	# Re-analyze existing transcript
	analyze_btn.click(
	fn=analyze_text_only,
	inputs=output_text,
	outputs=analysis_output,
	)

	# Auto-transcribe + analyze on recording stop
	audio_input.stop_recording(
	fn=transcribe_and_analyze,
	inputs=[audio_input, language],
	outputs=[output_text, analysis_output],
	)

	app.launch()



	if __name__ == "__main__":
	run_web()