Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import azure.cognitiveservices.speech as speechsdk | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| SPEECH_KEY = os.getenv("SPEECH_KEY") | |
| SPEECH_REGION = os.getenv("SPEECH_REGION", "eastus") | |
| def create_speech_config(language="th-TH"): | |
| """Create a SpeechConfig with the given language.""" | |
| config = speechsdk.SpeechConfig( | |
| subscription=SPEECH_KEY, | |
| region=SPEECH_REGION, | |
| ) | |
| config.speech_recognition_language = language | |
| return config | |
| def transcribe_from_mic(): | |
| """Transcribe from the local microphone (CLI mode).""" | |
| speech_config = create_speech_config("th-TH") | |
| audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True) | |
| recognizer = speechsdk.SpeechRecognizer( | |
| speech_config=speech_config, | |
| audio_config=audio_config, | |
| ) | |
| print("🎤 Listening... Speak into your microphone.") | |
| result = recognizer.recognize_once() | |
| if result.reason == speechsdk.ResultReason.RecognizedSpeech: | |
| print("✅ Recognized: " + result.text) | |
| elif result.reason == speechsdk.ResultReason.NoMatch: | |
| print("❌ No speech could be recognized: " + str(result.no_match_details)) | |
| elif result.reason == speechsdk.ResultReason.Canceled: | |
| cancellation_details = result.cancellation_details | |
| print("⚠️ Speech recognition canceled: " + str(cancellation_details.reason)) | |
| if cancellation_details.reason == speechsdk.CancellationReason.Error: | |
| print("Error details: " + str(cancellation_details.error_details)) | |
| print("Did you set the speech resource key and region?") | |
| def transcribe_audio_file(audio_path, language="th-TH"): | |
| """Transcribe an audio file using Azure Speech SDK.""" | |
| if audio_path is None: | |
| return "⚠️ กรุณาอัดเสียงก่อน" | |
| speech_config = create_speech_config(language) | |
| audio_config = speechsdk.audio.AudioConfig(filename=audio_path) | |
| recognizer = speechsdk.SpeechRecognizer( | |
| speech_config=speech_config, | |
| audio_config=audio_config, | |
| ) | |
| # Use continuous recognition to get the full transcript | |
| all_results = [] | |
| done = False | |
| def on_recognized(evt): | |
| if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech: | |
| all_results.append(evt.result.text) | |
| def on_canceled(evt): | |
| nonlocal done | |
| done = True | |
| def on_stopped(evt): | |
| nonlocal done | |
| done = True | |
| recognizer.recognized.connect(on_recognized) | |
| recognizer.canceled.connect(on_canceled) | |
| recognizer.session_stopped.connect(on_stopped) | |
| recognizer.start_continuous_recognition() | |
| import time | |
| while not done: | |
| time.sleep(0.1) | |
| recognizer.stop_continuous_recognition() | |
| if all_results: | |
| return "\n".join(all_results) | |
| else: | |
| return "❌ ไม่สามารถถอดเสียงได้ — ลองพูดดังขึ้นหรือตรวจสอบไมค์" | |
| def transcribe_and_analyze(audio_path, language): | |
| """Transcribe audio, then analyze with LLM. Returns (transcript, analysis_json).""" | |
| transcript = transcribe_audio_file(audio_path, language) | |
| if transcript.startswith("❌") or transcript.startswith("⚠️"): | |
| return transcript, "" | |
| from llm_client import analyze_football_content, format_analysis_result | |
| result = analyze_football_content(transcript) | |
| analysis_json = format_analysis_result(result) | |
| return transcript, analysis_json | |
| def analyze_text_only(transcript): | |
| """Analyze existing transcript text without re-transcribing.""" | |
| if not transcript or not transcript.strip(): | |
| return "⚠️ กรุณาใส่ข้อความก่อน" | |
| from llm_client import analyze_football_content, format_analysis_result | |
| result = analyze_football_content(transcript) | |
| return format_analysis_result(result) | |
| def run_web(): | |
| """Run the Gradio web UI.""" | |
| import gradio as gr | |
| with gr.Blocks( | |
| title="ASR - Football Analysis", | |
| theme=gr.themes.Soft( | |
| primary_hue=gr.themes.colors.indigo, | |
| secondary_hue=gr.themes.colors.purple, | |
| neutral_hue=gr.themes.colors.slate, | |
| ), | |
| css=""" | |
| .gradio-container { | |
| max-width: 900px !important; | |
| margin: auto !important; | |
| } | |
| """, | |
| ) as app: | |
| gr.Markdown( | |
| """ | |
| # ⚽ Football Speech Analyzer | |
| ### ถอดเสียงพูด + วิเคราะห์เนื้อหาฟุตบอลด้วย AI | |
| --- | |
| """ | |
| ) | |
| with gr.Row(): | |
| language = gr.Dropdown( | |
| choices=[ | |
| ("🇹🇭 ไทย", "th-TH"), | |
| ("🇺🇸 English", "en-US"), | |
| ("🇯🇵 日本語", "ja-JP"), | |
| ("🇨🇳 中文", "zh-CN"), | |
| ("🇰🇷 한국어", "ko-KR"), | |
| ], | |
| value="th-TH", | |
| label="ภาษา", | |
| interactive=True, | |
| ) | |
| gr.Markdown("### 🎤 อัดเสียงจากไมค์") | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="กดปุ่มอัดเสียง หรืออัปโหลดไฟล์เสียง", | |
| ) | |
| with gr.Row(): | |
| transcribe_btn = gr.Button( | |
| "✨ ถอดเสียงอย่างเดียว", | |
| variant="secondary", | |
| size="lg", | |
| ) | |
| full_btn = gr.Button( | |
| "⚽ ถอดเสียง + วิเคราะห์ฟุตบอล", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| gr.Markdown("### 📝 ข้อความที่ถอดได้") | |
| output_text = gr.Textbox( | |
| label="Transcript", | |
| lines=6, | |
| show_copy_button=True, | |
| placeholder="ผลการถอดเสียงจะแสดงที่นี่...", | |
| ) | |
| gr.Markdown("### 🧠 ผลวิเคราะห์จาก AI") | |
| with gr.Row(): | |
| analyze_btn = gr.Button( | |
| "🔄 วิเคราะห์ข้อความข้างบนอีกครั้ง", | |
| variant="secondary", | |
| size="sm", | |
| ) | |
| analysis_output = gr.Code( | |
| label="Football Analysis (JSON)", | |
| language="json", | |
| lines=20, | |
| ) | |
| # --- Events --- | |
| # Transcribe only | |
| transcribe_btn.click( | |
| fn=transcribe_audio_file, | |
| inputs=[audio_input, language], | |
| outputs=output_text, | |
| ) | |
| # Transcribe + Analyze | |
| full_btn.click( | |
| fn=transcribe_and_analyze, | |
| inputs=[audio_input, language], | |
| outputs=[output_text, analysis_output], | |
| ) | |
| # Re-analyze existing transcript | |
| analyze_btn.click( | |
| fn=analyze_text_only, | |
| inputs=output_text, | |
| outputs=analysis_output, | |
| ) | |
| # Auto-transcribe + analyze on recording stop | |
| audio_input.stop_recording( | |
| fn=transcribe_and_analyze, | |
| inputs=[audio_input, language], | |
| outputs=[output_text, analysis_output], | |
| ) | |
| app.launch() | |
| if __name__ == "__main__": | |
| run_web() | |