File size: 7,582 Bytes
8b1d8cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import os
import sys
import azure.cognitiveservices.speech as speechsdk
from dotenv import load_dotenv

load_dotenv()

SPEECH_KEY = os.getenv("SPEECH_KEY")
SPEECH_REGION = os.getenv("SPEECH_REGION", "eastus")


def create_speech_config(language="th-TH"):
    """Create a SpeechConfig with the given language."""
    config = speechsdk.SpeechConfig(
        subscription=SPEECH_KEY,
        region=SPEECH_REGION,
    )
    config.speech_recognition_language = language
    return config


def transcribe_from_mic():
    """Transcribe from the local microphone (CLI mode)."""
    speech_config = create_speech_config("th-TH")
    audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
    recognizer = speechsdk.SpeechRecognizer(
        speech_config=speech_config,
        audio_config=audio_config,
    )

    print("🎤 Listening... Speak into your microphone.")
    result = recognizer.recognize_once()

    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        print("✅ Recognized: " + result.text)
    elif result.reason == speechsdk.ResultReason.NoMatch:
        print("❌ No speech could be recognized: " + str(result.no_match_details))
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("⚠️ Speech recognition canceled: " + str(cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print("Error details: " + str(cancellation_details.error_details))
            print("Did you set the speech resource key and region?")


def transcribe_audio_file(audio_path, language="th-TH"):
    """Transcribe an audio file using Azure Speech SDK."""
    if audio_path is None:
        return "⚠️ กรุณาอัดเสียงก่อน"

    speech_config = create_speech_config(language)
    audio_config = speechsdk.audio.AudioConfig(filename=audio_path)
    recognizer = speechsdk.SpeechRecognizer(
        speech_config=speech_config,
        audio_config=audio_config,
    )

    # Use continuous recognition to get the full transcript
    all_results = []
    done = False

    def on_recognized(evt):
        if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
            all_results.append(evt.result.text)

    def on_canceled(evt):
        nonlocal done
        done = True

    def on_stopped(evt):
        nonlocal done
        done = True

    recognizer.recognized.connect(on_recognized)
    recognizer.canceled.connect(on_canceled)
    recognizer.session_stopped.connect(on_stopped)

    recognizer.start_continuous_recognition()

    import time
    while not done:
        time.sleep(0.1)

    recognizer.stop_continuous_recognition()

    if all_results:
        return "\n".join(all_results)
    else:
        return "❌ ไม่สามารถถอดเสียงได้ — ลองพูดดังขึ้นหรือตรวจสอบไมค์"


def transcribe_and_analyze(audio_path, language):
    """Transcribe audio, then analyze with LLM. Returns (transcript, analysis_json)."""
    transcript = transcribe_audio_file(audio_path, language)

    if transcript.startswith("❌") or transcript.startswith("⚠️"):
        return transcript, ""

    from llm_client import analyze_football_content, format_analysis_result
    result = analyze_football_content(transcript)
    analysis_json = format_analysis_result(result)

    return transcript, analysis_json


def analyze_text_only(transcript):
    """Analyze existing transcript text without re-transcribing."""
    if not transcript or not transcript.strip():
        return "⚠️ กรุณาใส่ข้อความก่อน"

    from llm_client import analyze_football_content, format_analysis_result
    result = analyze_football_content(transcript)
    return format_analysis_result(result)


def run_web():
    """Run the Gradio web UI."""
    import gradio as gr

    with gr.Blocks(
        title="ASR - Football Analysis",
        theme=gr.themes.Soft(
            primary_hue=gr.themes.colors.indigo,
            secondary_hue=gr.themes.colors.purple,
            neutral_hue=gr.themes.colors.slate,
        ),
        css="""
        .gradio-container {
            max-width: 900px !important;
            margin: auto !important;
        }
        """,
    ) as app:

        gr.Markdown(
            """
            # ⚽ Football Speech Analyzer
            ### ถอดเสียงพูด + วิเคราะห์เนื้อหาฟุตบอลด้วย AI
            ---
            """
        )

        with gr.Row():
            language = gr.Dropdown(
                choices=[
                    ("🇹🇭 ไทย", "th-TH"),
                    ("🇺🇸 English", "en-US"),
                    ("🇯🇵 日本語", "ja-JP"),
                    ("🇨🇳 中文", "zh-CN"),
                    ("🇰🇷 한국어", "ko-KR"),
                ],
                value="th-TH",
                label="ภาษา",
                interactive=True,
            )

        gr.Markdown("### 🎤 อัดเสียงจากไมค์")
        audio_input = gr.Audio(
            sources=["microphone", "upload"],
            type="filepath",
            label="กดปุ่มอัดเสียง หรืออัปโหลดไฟล์เสียง",
        )

        with gr.Row():
            transcribe_btn = gr.Button(
                "✨ ถอดเสียงอย่างเดียว",
                variant="secondary",
                size="lg",
            )
            full_btn = gr.Button(
                "⚽ ถอดเสียง + วิเคราะห์ฟุตบอล",
                variant="primary",
                size="lg",
            )

        gr.Markdown("### 📝 ข้อความที่ถอดได้")
        output_text = gr.Textbox(
            label="Transcript",
            lines=6,
            show_copy_button=True,
            placeholder="ผลการถอดเสียงจะแสดงที่นี่...",
        )

        gr.Markdown("### 🧠 ผลวิเคราะห์จาก AI")
        with gr.Row():
            analyze_btn = gr.Button(
                "🔄 วิเคราะห์ข้อความข้างบนอีกครั้ง",
                variant="secondary",
                size="sm",
            )

        analysis_output = gr.Code(
            label="Football Analysis (JSON)",
            language="json",
            lines=20,
        )

        # --- Events ---

        # Transcribe only
        transcribe_btn.click(
            fn=transcribe_audio_file,
            inputs=[audio_input, language],
            outputs=output_text,
        )

        # Transcribe + Analyze
        full_btn.click(
            fn=transcribe_and_analyze,
            inputs=[audio_input, language],
            outputs=[output_text, analysis_output],
        )

        # Re-analyze existing transcript
        analyze_btn.click(
            fn=analyze_text_only,
            inputs=output_text,
            outputs=analysis_output,
        )

        # Auto-transcribe + analyze on recording stop
        audio_input.stop_recording(
            fn=transcribe_and_analyze,
            inputs=[audio_input, language],
            outputs=[output_text, analysis_output],
        )

    app.launch()



if __name__ == "__main__":
    run_web()