Spaces:

Teera
/

conversation-extraction

Sleeping

App Files Files Community

Teera commited on Feb 10

Commit

c5027b2

verified ·

1 Parent(s): be08f8c

d

Browse files

Files changed (6) hide show

.env +9 -0
app.py +235 -70
llm_client.py +77 -0
prompts.py +65 -0
readme.md +0 -0
requirements.txt +4 -0

.env ADDED Viewed

	@@ -0,0 +1,9 @@

+SPEECH_KEY="4CcN3sox1gfqI81AOhFHIYZwGusC6frPa1kSO32gIjjPSCFxke0EJQQJ99CBACYeBjFXJ3w3AAAYACOGd9oM"
+SPEECH_ENDPOINT="https://eastus.api.cognitive.microsoft.com/"
+SPEECH_REGION="eastus"
+# Azure OpenAI
+AZURE_OPENAI_KEY="8wKFXqTCFBDBZ8eMj1ePBxVF0XMUbH9H50XXuV3ReJ0ZpAMrRfCcJQQJ99BEACHYHv6XJ3w3AAAAACOGqdEB"
+AZURE_OPENAI_ENDPOINT="https://teera-maz475y3-eastus2.cognitiveservices.azure.com/"
+AZURE_OPENAI_API_VERSION="2024-12-01-preview"
+AZURE_OPENAI_DEPLOYMENT="gpt-5.2-chat"

app.py CHANGED Viewed

@@ -1,70 +1,235 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
-if __name__ == "__main__":
-    demo.launch()

+import os
+import sys
+import azure.cognitiveservices.speech as speechsdk
+from dotenv import load_dotenv
+load_dotenv()
+SPEECH_KEY = os.getenv("SPEECH_KEY")
+SPEECH_REGION = os.getenv("SPEECH_REGION", "eastus")
+def create_speech_config(language="th-TH"):
+    """Create a SpeechConfig with the given language."""
+    config = speechsdk.SpeechConfig(
+        subscription=SPEECH_KEY,
+        region=SPEECH_REGION,
+    )
+    config.speech_recognition_language = language
+    return config
+def transcribe_from_mic():
+    """Transcribe from the local microphone (CLI mode)."""
+    speech_config = create_speech_config("th-TH")
+    audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
+    recognizer = speechsdk.SpeechRecognizer(
+        speech_config=speech_config,
+        audio_config=audio_config,
+    )
+    print("🎤 Listening... Speak into your microphone.")
+    result = recognizer.recognize_once()
+    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
+        print("✅ Recognized: " + result.text)
+    elif result.reason == speechsdk.ResultReason.NoMatch:
+        print("❌ No speech could be recognized: " + str(result.no_match_details))
+    elif result.reason == speechsdk.ResultReason.Canceled:
+        cancellation_details = result.cancellation_details
+        print("⚠️ Speech recognition canceled: " + str(cancellation_details.reason))
+        if cancellation_details.reason == speechsdk.CancellationReason.Error:
+            print("Error details: " + str(cancellation_details.error_details))
+            print("Did you set the speech resource key and region?")
+def transcribe_audio_file(audio_path, language="th-TH"):
+    """Transcribe an audio file using Azure Speech SDK."""
+    if audio_path is None:
+        return "⚠️ กรุณาอัดเสียงก่อน"
+    speech_config = create_speech_config(language)
+    audio_config = speechsdk.audio.AudioConfig(filename=audio_path)
+    recognizer = speechsdk.SpeechRecognizer(
+        speech_config=speech_config,
+        audio_config=audio_config,
+    )
+    # Use continuous recognition to get the full transcript
+    all_results = []
+    done = False
+    def on_recognized(evt):
+        if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
+            all_results.append(evt.result.text)
+    def on_canceled(evt):
+        nonlocal done
+        done = True
+    def on_stopped(evt):
+        nonlocal done
+        done = True
+    recognizer.recognized.connect(on_recognized)
+    recognizer.canceled.connect(on_canceled)
+    recognizer.session_stopped.connect(on_stopped)
+    recognizer.start_continuous_recognition()
+    import time
+    while not done:
+        time.sleep(0.1)
+    recognizer.stop_continuous_recognition()
+    if all_results:
+        return "\n".join(all_results)
+    else:
+        return "❌ ไม่สามารถถอดเสียงได้ — ลองพูดดังขึ้นหรือตรวจสอบไมค์"
+def transcribe_and_analyze(audio_path, language):
+    """Transcribe audio, then analyze with LLM. Returns (transcript, analysis_json)."""
+    transcript = transcribe_audio_file(audio_path, language)
+    if transcript.startswith("❌") or transcript.startswith("⚠️"):
+        return transcript, ""
+    from llm_client import analyze_football_content, format_analysis_result
+    result = analyze_football_content(transcript)
+    analysis_json = format_analysis_result(result)
+    return transcript, analysis_json
+def analyze_text_only(transcript):
+    """Analyze existing transcript text without re-transcribing."""
+    if not transcript or not transcript.strip():
+        return "⚠️ กรุณาใส่ข้อความก่อน"
+    from llm_client import analyze_football_content, format_analysis_result
+    result = analyze_football_content(transcript)
+    return format_analysis_result(result)
+def run_web():
+    """Run the Gradio web UI."""
+    import gradio as gr
+    with gr.Blocks(
+        title="ASR - Football Analysis",
+        theme=gr.themes.Soft(
+            primary_hue=gr.themes.colors.indigo,
+            secondary_hue=gr.themes.colors.purple,
+            neutral_hue=gr.themes.colors.slate,
+        ),
+        css="""
+        .gradio-container {
+            max-width: 900px !important;
+            margin: auto !important;
+        }
+        """,
+    ) as app:
+        gr.Markdown(
+            """
+            # ⚽ Football Speech Analyzer
+            ### ถอดเสียงพูด + วิเคราะห์เนื้อหาฟุตบอลด้วย AI
+            ---
+            """
+        )
+        with gr.Row():
+            language = gr.Dropdown(
+                choices=[
+                    ("🇹🇭 ไทย", "th-TH"),
+                    ("🇺🇸 English", "en-US"),
+                    ("🇯🇵 日本語", "ja-JP"),
+                    ("🇨🇳 中文", "zh-CN"),
+                    ("🇰🇷 한국어", "ko-KR"),
+                ],
+                value="th-TH",
+                label="ภาษา",
+                interactive=True,
+            )
+        gr.Markdown("### 🎤 อัดเสียงจากไมค์")
+        audio_input = gr.Audio(
+            sources=["microphone", "upload"],
+            type="filepath",
+            label="กดปุ่มอัดเสียง หรืออัปโหลดไฟล์เสียง",
+        )
+        with gr.Row():
+            transcribe_btn = gr.Button(
+                "✨ ถอดเสียงอย่างเดียว",
+                variant="secondary",
+                size="lg",
+            )
+            full_btn = gr.Button(
+                "⚽ ถอดเสียง + วิเคราะห์ฟุตบอล",
+                variant="primary",
+                size="lg",
+            )
+        gr.Markdown("### 📝 ข้อความที่ถอดได้")
+        output_text = gr.Textbox(
+            label="Transcript",
+            lines=6,
+            show_copy_button=True,
+            placeholder="ผลการถอดเสียงจะแสดงที่นี่...",
+        )
+        gr.Markdown("### 🧠 ผลวิเคราะห์จาก AI")
+        with gr.Row():
+            analyze_btn = gr.Button(
+                "🔄 วิเคราะห์ข้อความข้างบนอีกครั้ง",
+                variant="secondary",
+                size="sm",
+            )
+        analysis_output = gr.Code(
+            label="Football Analysis (JSON)",
+            language="json",
+            lines=20,
+        )
+        # --- Events ---
+        # Transcribe only
+        transcribe_btn.click(
+            fn=transcribe_audio_file,
+            inputs=[audio_input, language],
+            outputs=output_text,
+        )
+        # Transcribe + Analyze
+        full_btn.click(
+            fn=transcribe_and_analyze,
+            inputs=[audio_input, language],
+            outputs=[output_text, analysis_output],
+        )
+        # Re-analyze existing transcript
+        analyze_btn.click(
+            fn=analyze_text_only,
+            inputs=output_text,
+            outputs=analysis_output,
+        )
+        # Auto-transcribe + analyze on recording stop
+        audio_input.stop_recording(
+            fn=transcribe_and_analyze,
+            inputs=[audio_input, language],
+            outputs=[output_text, analysis_output],
+        )
+    app.launch(server_name="127.0.0.1", server_port=7860)
+if __name__ == "__main__":
+    if "--cli" in sys.argv:
+        transcribe_from_mic()
+    else:
+        run_web()

llm_client.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+Azure OpenAI client wrapper for LLM-based analysis.
+"""
+import os
+import json
+from openai import AzureOpenAI
+from dotenv import load_dotenv
+from prompts import FOOTBALL_ANALYSIS_SYSTEM_PROMPT, FOOTBALL_ANALYSIS_USER_PROMPT
+load_dotenv()
+AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
+AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
+AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")
+AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT", "gpt-5.2-chat")
+def get_openai_client():
+    """Create and return an Azure OpenAI client."""
+    return AzureOpenAI(
+        api_key=AZURE_OPENAI_KEY,
+        azure_endpoint=AZURE_OPENAI_ENDPOINT,
+        api_version=AZURE_OPENAI_API_VERSION,
+    )
+def analyze_football_content(transcript: str) -> dict:
+    """
+    Send transcribed text to Azure OpenAI for football content analysis.
+    Args:
+        transcript: The transcribed speech text.
+    Returns:
+        A dict with categorized football data (teams, leagues, sentiment, etc.)
+    """
+    if not transcript or not transcript.strip():
+        return {"error": "ไม่มีข้อความให้วิเคราะห์"}
+    client = get_openai_client()
+    user_message = FOOTBALL_ANALYSIS_USER_PROMPT.format(transcript=transcript)
+    try:
+        response = client.chat.completions.create(
+            model=AZURE_OPENAI_DEPLOYMENT,
+            messages=[
+                {"role": "system", "content": FOOTBALL_ANALYSIS_SYSTEM_PROMPT},
+                {"role": "user", "content": user_message},
+            ],
+            max_completion_tokens=4096,
+        )
+        content = response.choices[0].message.content.strip()
+        # Clean up markdown code fences if the model wraps JSON in ```json ... ```
+        if content.startswith("```"):
+            content = content.split("\n", 1)[1]  # Remove first line (```json)
+            content = content.rsplit("```", 1)[0]  # Remove last ```
+            content = content.strip()
+        result = json.loads(content)
+        return result
+    except json.JSONDecodeError:
+        return {
+            "error": "LLM ตอบกลับมาไม่ใช่ JSON ที่ถูกต้อง",
+            "raw_response": content,
+        }
+    except Exception as e:
+        return {"error": f"เกิดข้อผิดพลาด: {str(e)}"}
+def format_analysis_result(result: dict) -> str:
+    """Format the analysis result as a pretty JSON string for display."""
+    return json.dumps(result, ensure_ascii=False, indent=2)

prompts.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""
+Prompt templates for LLM-based football content analysis.
+"""
+FOOTBALL_ANALYSIS_SYSTEM_PROMPT = """คุณเป็น AI ผู้เชี่ยวชาญด้านการวิเคราะห์เนื้อหาฟุตบอล
+หน้าที่ของคุณคือวิเคราะห์ข้อความที่ถอดเสียงมา (transcript) แล้วจัดหมวดหมู่ข้อมูลเกี่ยวกับฟุตบอล
+คุณต้องตอบกลับเป็น JSON เท่านั้น ตามโครงสร้างนี้:
+{
+  "teams_mentioned": [
+    {
+      "name": "ชื่อทีม (ภาษาอังกฤษ)",
+      "name_th": "ชื่อทีม (ภาษาไทย ถ้ามี)",
+      "context": "บริบทที่พูดถึงทีมนี้โดยย่อ"
+    }
+  ],
+  "leagues_mentioned": [
+    {
+      "name": "ชื่อลีก (ภาษาอังกฤษ)",
+      "name_th": "ชื่อลีก (ภาษาไทย ถ้ามี)",
+      "country": "ประเทศ"
+    }
+  ],
+  "players_mentioned": [
+    {
+      "name": "ชื่อนักเตะ",
+      "team": "ทีมที่สังกัด (ถ้าระบุได้)",
+      "context": "บริบทที่พูดถึง"
+    }
+  ],
+  "topics": ["หัวข้อที่พูดถึง เช่น ผลการแข่งขัน, ตลาดการย้ายทีม, อาการบาดเจ็บ, ..."],
+  "sentiment": {
+    "overall": "positive | negative | neutral | mixed",
+    "score": 0.0,
+    "details": "อธิบายเหตุผลโดยย่อ"
+  },
+  "match_info": {
+    "is_match_discussed": true,
+    "home_team": "ทีมเหย้า (ถ้ามี)",
+    "away_team": "ทีมเยือน (ถ้ามี)",
+    "score": "ผลสกอร์ (ถ้ามี)",
+    "competition": "รายการแข่งขัน (ถ้ามี)"
+  },
+  "summary": "สรุปเนื้อหาโดยย่อ 1-2 ประโยค",
+  "confidence": 0.0,
+  "is_football_content": true
+}
+กฎ:
+1. ถ้าเนื้อหาไม่เกี่ยวกับฟุตบอลเลย ให้ตั้ง "is_football_content" เป็น false และใส่ข้อมูลที่เกี่ยวข้องน้อยที่สุด
+2. "sentiment.score" อยู่ในช่วง -1.0 (ลบมาก) ถึง 1.0 (บวกมาก), 0.0 คือ neutral
+3. "confidence" อยู่ในช่วง 0.0-1.0 แสดงความมั่นใจในการวิเคราะห์
+4. ตอบเป็น JSON เท่านั้น ห้ามมีข้อความอื่นนอกเหนือจาก JSON
+5. ถ้าไม่มีข้อมูลในฟิลด์ใด ให้ใส่ null หรือ array ว่าง []
+6. พยายามระบุชื่อเป็นภาษาอังกฤษมาตรฐานเสมอ (เช่น "Liverpool", ไม่ใช่ "ลิเวอร์พูล" อย่างเดียว)
+"""
+FOOTBALL_ANALYSIS_USER_PROMPT = """วิเคราะห์ข้อความที่ถอดเสียงมานี้:
+---
+{transcript}
+---
+ตอบเป็น JSON ตามโครงสร้างที่กำหนด"""

readme.md ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+azure-cognitiveservices-speech
+python-dotenv
+gradio
+openai