import os import sys import azure.cognitiveservices.speech as speechsdk from dotenv import load_dotenv load_dotenv() SPEECH_KEY = os.getenv("SPEECH_KEY") SPEECH_REGION = os.getenv("SPEECH_REGION", "eastus") def create_speech_config(language="th-TH"): """Create a SpeechConfig with the given language.""" config = speechsdk.SpeechConfig( subscription=SPEECH_KEY, region=SPEECH_REGION, ) config.speech_recognition_language = language return config def transcribe_from_mic(): """Transcribe from the local microphone (CLI mode).""" speech_config = create_speech_config("th-TH") audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True) recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_config, ) print("🎤 Listening... Speak into your microphone.") result = recognizer.recognize_once() if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("✅ Recognized: " + result.text) elif result.reason == speechsdk.ResultReason.NoMatch: print("❌ No speech could be recognized: " + str(result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("⚠️ Speech recognition canceled: " + str(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: " + str(cancellation_details.error_details)) print("Did you set the speech resource key and region?") def transcribe_audio_file(audio_path, language="th-TH"): """Transcribe an audio file using Azure Speech SDK.""" if audio_path is None: return "⚠️ กรุณาอัดเสียงก่อน" speech_config = create_speech_config(language) audio_config = speechsdk.audio.AudioConfig(filename=audio_path) recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_config, ) # Use continuous recognition to get the full transcript all_results = [] done = False def on_recognized(evt): if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech: all_results.append(evt.result.text) def on_canceled(evt): nonlocal done done = True def on_stopped(evt): nonlocal done done = True recognizer.recognized.connect(on_recognized) recognizer.canceled.connect(on_canceled) recognizer.session_stopped.connect(on_stopped) recognizer.start_continuous_recognition() import time while not done: time.sleep(0.1) recognizer.stop_continuous_recognition() if all_results: return "\n".join(all_results) else: return "❌ ไม่สามารถถอดเสียงได้ — ลองพูดดังขึ้นหรือตรวจสอบไมค์" def transcribe_and_analyze(audio_path, language): """Transcribe audio, then analyze with LLM. Returns (transcript, analysis_json).""" transcript = transcribe_audio_file(audio_path, language) if transcript.startswith("❌") or transcript.startswith("⚠️"): return transcript, "" from llm_client import analyze_football_content, format_analysis_result result = analyze_football_content(transcript) analysis_json = format_analysis_result(result) return transcript, analysis_json def analyze_text_only(transcript): """Analyze existing transcript text without re-transcribing.""" if not transcript or not transcript.strip(): return "⚠️ กรุณาใส่ข้อความก่อน" from llm_client import analyze_football_content, format_analysis_result result = analyze_football_content(transcript) return format_analysis_result(result) def run_web(): """Run the Gradio web UI.""" import gradio as gr with gr.Blocks( title="ASR - Football Analysis", theme=gr.themes.Soft( primary_hue=gr.themes.colors.indigo, secondary_hue=gr.themes.colors.purple, neutral_hue=gr.themes.colors.slate, ), css=""" .gradio-container { max-width: 900px !important; margin: auto !important; } """, ) as app: gr.Markdown( """ # ⚽ Football Speech Analyzer ### ถอดเสียงพูด + วิเคราะห์เนื้อหาฟุตบอลด้วย AI --- """ ) with gr.Row(): language = gr.Dropdown( choices=[ ("🇹🇭 ไทย", "th-TH"), ("🇺🇸 English", "en-US"), ("🇯🇵 日本語", "ja-JP"), ("🇨🇳 中文", "zh-CN"), ("🇰🇷 한국어", "ko-KR"), ], value="th-TH", label="ภาษา", interactive=True, ) gr.Markdown("### 🎤 อัดเสียงจากไมค์") audio_input = gr.Audio( sources=["microphone", "upload"], type="filepath", label="กดปุ่มอัดเสียง หรืออัปโหลดไฟล์เสียง", ) with gr.Row(): transcribe_btn = gr.Button( "✨ ถอดเสียงอย่างเดียว", variant="secondary", size="lg", ) full_btn = gr.Button( "⚽ ถอดเสียง + วิเคราะห์ฟุตบอล", variant="primary", size="lg", ) gr.Markdown("### 📝 ข้อความที่ถอดได้") output_text = gr.Textbox( label="Transcript", lines=6, show_copy_button=True, placeholder="ผลการถอดเสียงจะแสดงที่นี่...", ) gr.Markdown("### 🧠 ผลวิเคราะห์จาก AI") with gr.Row(): analyze_btn = gr.Button( "🔄 วิเคราะห์ข้อความข้างบนอีกครั้ง", variant="secondary", size="sm", ) analysis_output = gr.Code( label="Football Analysis (JSON)", language="json", lines=20, ) # --- Events --- # Transcribe only transcribe_btn.click( fn=transcribe_audio_file, inputs=[audio_input, language], outputs=output_text, ) # Transcribe + Analyze full_btn.click( fn=transcribe_and_analyze, inputs=[audio_input, language], outputs=[output_text, analysis_output], ) # Re-analyze existing transcript analyze_btn.click( fn=analyze_text_only, inputs=output_text, outputs=analysis_output, ) # Auto-transcribe + analyze on recording stop audio_input.stop_recording( fn=transcribe_and_analyze, inputs=[audio_input, language], outputs=[output_text, analysis_output], ) app.launch() if __name__ == "__main__": run_web()