Spaces:

Teera
/

conversation-extraction

Sleeping

App Files Files Community

Teera commited on Feb 10

Commit

8b1d8cc

verified ·

1 Parent(s): c5027b2

ก

Browse files

Files changed (1) hide show

app.py +233 -235

app.py CHANGED Viewed

@@ -1,235 +1,233 @@
-import os
-import sys
-import azure.cognitiveservices.speech as speechsdk
-from dotenv import load_dotenv
-load_dotenv()
-SPEECH_KEY = os.getenv("SPEECH_KEY")
-SPEECH_REGION = os.getenv("SPEECH_REGION", "eastus")
-def create_speech_config(language="th-TH"):
-    """Create a SpeechConfig with the given language."""
-    config = speechsdk.SpeechConfig(
-        subscription=SPEECH_KEY,
-        region=SPEECH_REGION,
-    )
-    config.speech_recognition_language = language
-    return config
-def transcribe_from_mic():
-    """Transcribe from the local microphone (CLI mode)."""
-    speech_config = create_speech_config("th-TH")
-    audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
-    recognizer = speechsdk.SpeechRecognizer(
-        speech_config=speech_config,
-        audio_config=audio_config,
-    )
-    print("🎤 Listening... Speak into your microphone.")
-    result = recognizer.recognize_once()
-    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
-        print("✅ Recognized: " + result.text)
-    elif result.reason == speechsdk.ResultReason.NoMatch:
-        print("❌ No speech could be recognized: " + str(result.no_match_details))
-    elif result.reason == speechsdk.ResultReason.Canceled:
-        cancellation_details = result.cancellation_details
-        print("⚠️ Speech recognition canceled: " + str(cancellation_details.reason))
-        if cancellation_details.reason == speechsdk.CancellationReason.Error:
-            print("Error details: " + str(cancellation_details.error_details))
-            print("Did you set the speech resource key and region?")
-def transcribe_audio_file(audio_path, language="th-TH"):
-    """Transcribe an audio file using Azure Speech SDK."""
-    if audio_path is None:
-        return "⚠️ กรุณาอัดเสียงก่อน"
-    speech_config = create_speech_config(language)
-    audio_config = speechsdk.audio.AudioConfig(filename=audio_path)
-    recognizer = speechsdk.SpeechRecognizer(
-        speech_config=speech_config,
-        audio_config=audio_config,
-    )
-    # Use continuous recognition to get the full transcript
-    all_results = []
-    done = False
-    def on_recognized(evt):
-        if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
-            all_results.append(evt.result.text)
-    def on_canceled(evt):
-        nonlocal done
-        done = True
-    def on_stopped(evt):
-        nonlocal done
-        done = True
-    recognizer.recognized.connect(on_recognized)
-    recognizer.canceled.connect(on_canceled)
-    recognizer.session_stopped.connect(on_stopped)
-    recognizer.start_continuous_recognition()
-    import time
-    while not done:
-        time.sleep(0.1)
-    recognizer.stop_continuous_recognition()
-    if all_results:
-        return "\n".join(all_results)
-    else:
-        return "❌ ไม่สามารถถอดเสียงได้ — ลองพูดดังขึ้นหรือตรวจสอบไมค์"
-def transcribe_and_analyze(audio_path, language):
-    """Transcribe audio, then analyze with LLM. Returns (transcript, analysis_json)."""
-    transcript = transcribe_audio_file(audio_path, language)
-    if transcript.startswith("❌") or transcript.startswith("⚠️"):
-        return transcript, ""
-    from llm_client import analyze_football_content, format_analysis_result
-    result = analyze_football_content(transcript)
-    analysis_json = format_analysis_result(result)
-    return transcript, analysis_json
-def analyze_text_only(transcript):
-    """Analyze existing transcript text without re-transcribing."""
-    if not transcript or not transcript.strip():
-        return "⚠️ กรุณาใส่ข้อความก่อน"
-    from llm_client import analyze_football_content, format_analysis_result
-    result = analyze_football_content(transcript)
-    return format_analysis_result(result)
-def run_web():
-    """Run the Gradio web UI."""
-    import gradio as gr
-    with gr.Blocks(
-        title="ASR - Football Analysis",
-        theme=gr.themes.Soft(
-            primary_hue=gr.themes.colors.indigo,
-            secondary_hue=gr.themes.colors.purple,
-            neutral_hue=gr.themes.colors.slate,
-        ),
-        css="""
-        .gradio-container {
-            max-width: 900px !important;
-            margin: auto !important;
-        }
-        """,
-    ) as app:
-        gr.Markdown(
-            """
-            # ⚽ Football Speech Analyzer
-            ### ถอดเสียงพูด + วิเคราะห์เนื้อหาฟุตบอลด้วย AI
-            ---
-            """
-        )
-        with gr.Row():
-            language = gr.Dropdown(
-                choices=[
-                    ("🇹🇭 ไทย", "th-TH"),
-                    ("🇺🇸 English", "en-US"),
-                    ("🇯🇵 日本語", "ja-JP"),
-                    ("🇨🇳 中文", "zh-CN"),
-                    ("🇰🇷 한국어", "ko-KR"),
-                ],
-                value="th-TH",
-                label="ภาษา",
-                interactive=True,
-            )
-        gr.Markdown("### 🎤 อัดเสียงจากไมค์")
-        audio_input = gr.Audio(
-            sources=["microphone", "upload"],
-            type="filepath",
-            label="กดปุ่มอัดเสียง หรืออัปโหลดไฟล์เสียง",
-        )
-        with gr.Row():
-            transcribe_btn = gr.Button(
-                "✨ ถอดเสียงอย่างเดียว",
-                variant="secondary",
-                size="lg",
-            )
-            full_btn = gr.Button(
-                "⚽ ถอดเสียง + วิเคราะห์ฟุตบอล",
-                variant="primary",
-                size="lg",
-            )
-        gr.Markdown("### 📝 ข้อความที่ถอดได้")
-        output_text = gr.Textbox(
-            label="Transcript",
-            lines=6,
-            show_copy_button=True,
-            placeholder="ผลการถอดเสียงจะแสดงที่นี่...",
-        )
-        gr.Markdown("### 🧠 ผลวิเคราะห์จาก AI")
-        with gr.Row():
-            analyze_btn = gr.Button(
-                "🔄 วิเคราะห์ข้อความข้างบนอีกครั้ง",
-                variant="secondary",
-                size="sm",
-            )
-        analysis_output = gr.Code(
-            label="Football Analysis (JSON)",
-            language="json",
-            lines=20,
-        )
-        # --- Events ---
-        # Transcribe only
-        transcribe_btn.click(
-            fn=transcribe_audio_file,
-            inputs=[audio_input, language],
-            outputs=output_text,
-        )
-        # Transcribe + Analyze
-        full_btn.click(
-            fn=transcribe_and_analyze,
-            inputs=[audio_input, language],
-            outputs=[output_text, analysis_output],
-        )
-        # Re-analyze existing transcript
-        analyze_btn.click(
-            fn=analyze_text_only,
-            inputs=output_text,
-            outputs=analysis_output,
-        )
-        # Auto-transcribe + analyze on recording stop
-        audio_input.stop_recording(
-            fn=transcribe_and_analyze,
-            inputs=[audio_input, language],
-            outputs=[output_text, analysis_output],
-        )
-    app.launch(server_name="127.0.0.1", server_port=7860)
-if __name__ == "__main__":
-    if "--cli" in sys.argv:
-        transcribe_from_mic()
-    else:
-        run_web()

+import os
+import sys
+import azure.cognitiveservices.speech as speechsdk
+from dotenv import load_dotenv
+load_dotenv()
+SPEECH_KEY = os.getenv("SPEECH_KEY")
+SPEECH_REGION = os.getenv("SPEECH_REGION", "eastus")
+def create_speech_config(language="th-TH"):
+    """Create a SpeechConfig with the given language."""
+    config = speechsdk.SpeechConfig(
+        subscription=SPEECH_KEY,
+        region=SPEECH_REGION,
+    )
+    config.speech_recognition_language = language
+    return config
+def transcribe_from_mic():
+    """Transcribe from the local microphone (CLI mode)."""
+    speech_config = create_speech_config("th-TH")
+    audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
+    recognizer = speechsdk.SpeechRecognizer(
+        speech_config=speech_config,
+        audio_config=audio_config,
+    )
+    print("🎤 Listening... Speak into your microphone.")
+    result = recognizer.recognize_once()
+    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
+        print("✅ Recognized: " + result.text)
+    elif result.reason == speechsdk.ResultReason.NoMatch:
+        print("❌ No speech could be recognized: " + str(result.no_match_details))
+    elif result.reason == speechsdk.ResultReason.Canceled:
+        cancellation_details = result.cancellation_details
+        print("⚠️ Speech recognition canceled: " + str(cancellation_details.reason))
+        if cancellation_details.reason == speechsdk.CancellationReason.Error:
+            print("Error details: " + str(cancellation_details.error_details))
+            print("Did you set the speech resource key and region?")
+def transcribe_audio_file(audio_path, language="th-TH"):
+    """Transcribe an audio file using Azure Speech SDK."""
+    if audio_path is None:
+        return "⚠️ กรุณาอัดเสียงก่อน"
+    speech_config = create_speech_config(language)
+    audio_config = speechsdk.audio.AudioConfig(filename=audio_path)
+    recognizer = speechsdk.SpeechRecognizer(
+        speech_config=speech_config,
+        audio_config=audio_config,
+    )
+    # Use continuous recognition to get the full transcript
+    all_results = []
+    done = False
+    def on_recognized(evt):
+        if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
+            all_results.append(evt.result.text)
+    def on_canceled(evt):
+        nonlocal done
+        done = True
+    def on_stopped(evt):
+        nonlocal done
+        done = True
+    recognizer.recognized.connect(on_recognized)
+    recognizer.canceled.connect(on_canceled)
+    recognizer.session_stopped.connect(on_stopped)
+    recognizer.start_continuous_recognition()
+    import time
+    while not done:
+        time.sleep(0.1)
+    recognizer.stop_continuous_recognition()
+    if all_results:
+        return "\n".join(all_results)
+    else:
+        return "❌ ไม่สามารถถอดเสียงได้ — ลองพูดดังขึ้นหรือตรวจสอบไมค์"
+def transcribe_and_analyze(audio_path, language):
+    """Transcribe audio, then analyze with LLM. Returns (transcript, analysis_json)."""
+    transcript = transcribe_audio_file(audio_path, language)
+    if transcript.startswith("❌") or transcript.startswith("⚠️"):
+        return transcript, ""
+    from llm_client import analyze_football_content, format_analysis_result
+    result = analyze_football_content(transcript)
+    analysis_json = format_analysis_result(result)
+    return transcript, analysis_json
+def analyze_text_only(transcript):
+    """Analyze existing transcript text without re-transcribing."""
+    if not transcript or not transcript.strip():
+        return "⚠️ กรุณาใส่ข้อความก่อน"
+    from llm_client import analyze_football_content, format_analysis_result
+    result = analyze_football_content(transcript)
+    return format_analysis_result(result)
+def run_web():
+    """Run the Gradio web UI."""
+    import gradio as gr
+    with gr.Blocks(
+        title="ASR - Football Analysis",
+        theme=gr.themes.Soft(
+            primary_hue=gr.themes.colors.indigo,
+            secondary_hue=gr.themes.colors.purple,
+            neutral_hue=gr.themes.colors.slate,
+        ),
+        css="""
+        .gradio-container {
+            max-width: 900px !important;
+            margin: auto !important;
+        }
+        """,
+    ) as app:
+        gr.Markdown(
+            """
+            # ⚽ Football Speech Analyzer
+            ### ถอดเสียงพูด + วิเคราะห์เนื้อหาฟุตบอลด้วย AI
+            ---
+            """
+        )
+        with gr.Row():
+            language = gr.Dropdown(
+                choices=[
+                    ("🇹🇭 ไทย", "th-TH"),
+                    ("🇺🇸 English", "en-US"),
+                    ("🇯🇵 日本語", "ja-JP"),
+                    ("🇨🇳 中文", "zh-CN"),
+                    ("🇰🇷 한국어", "ko-KR"),
+                ],
+                value="th-TH",
+                label="ภาษา",
+                interactive=True,
+            )
+        gr.Markdown("### 🎤 อัดเสียงจากไมค์")
+        audio_input = gr.Audio(
+            sources=["microphone", "upload"],
+            type="filepath",
+            label="กดปุ่มอัดเสียง หรืออัปโหลดไฟล์เสียง",
+        )
+        with gr.Row():
+            transcribe_btn = gr.Button(
+                "✨ ถอดเสียงอย่างเดียว",
+                variant="secondary",
+                size="lg",
+            )
+            full_btn = gr.Button(
+                "⚽ ถอดเสียง + วิเคราะห์ฟุตบอล",
+                variant="primary",
+                size="lg",
+            )
+        gr.Markdown("### 📝 ข้อความที่ถอดได้")
+        output_text = gr.Textbox(
+            label="Transcript",
+            lines=6,
+            show_copy_button=True,
+            placeholder="ผลการถอดเสียงจะแสดงที่นี่...",
+        )
+        gr.Markdown("### 🧠 ผลวิเคราะห์จาก AI")
+        with gr.Row():
+            analyze_btn = gr.Button(
+                "🔄 วิเคราะห์ข้อความข้างบนอีกครั้ง",
+                variant="secondary",
+                size="sm",
+            )
+        analysis_output = gr.Code(
+            label="Football Analysis (JSON)",
+            language="json",
+            lines=20,
+        )
+        # --- Events ---
+        # Transcribe only
+        transcribe_btn.click(
+            fn=transcribe_audio_file,
+            inputs=[audio_input, language],
+            outputs=output_text,
+        )
+        # Transcribe + Analyze
+        full_btn.click(
+            fn=transcribe_and_analyze,
+            inputs=[audio_input, language],
+            outputs=[output_text, analysis_output],
+        )
+        # Re-analyze existing transcript
+        analyze_btn.click(
+            fn=analyze_text_only,
+            inputs=output_text,
+            outputs=analysis_output,
+        )
+        # Auto-transcribe + analyze on recording stop
+        audio_input.stop_recording(
+            fn=transcribe_and_analyze,
+            inputs=[audio_input, language],
+            outputs=[output_text, analysis_output],
+        )
+    app.launch()
+if __name__ == "__main__":
+    run_web()