Spaces:

samir72
/

AudioChatTranscriber

Running

App Files Files Community

samir72 commited on Sep 17, 2025

Commit

7bb3812

1 Parent(s): 9e2fc27

Add app.py and requirements.txt for HF space

Browse files

Files changed (2) hide show

app.py +139 -0
requirements.txt +1 -0

app.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import os
+import base64
+import tempfile
+import requests
+import gradio as gr
+from dotenv import load_dotenv
+from openai import AzureOpenAI  # official OpenAI SDK, works with Azure endpoints
+# --- LLM call (Azure OpenAI with API key) -----------------------------------
+def summarize_audio_b64(audio_b64: str, sys_prompt: str, user_prompt: str) -> str:
+    """
+    Calls Azure OpenAI Chat Completions with audio input (base64 mp3).
+    """
+    load_dotenv()
+    endpoint = os.getenv("AC_OPENAI_ENDPOINT")
+    api_key = os.getenv("AC_OPENAI_API_KEY")
+    deployment = os.getenv("AC_MODEL_DEPLOYMENT")
+    api_version = os.getenv("AC_OPENAI_API_VERSION")
+    if not endpoint or not api_key or not deployment:
+        return "Server misconfiguration: required env vars missing."
+    try:
+        client = AzureOpenAI(
+            api_key=api_key,
+            api_version=api_version,
+            azure_endpoint=endpoint,
+        )
+        system_message = sys_prompt.strip() if sys_prompt else (
+            "You are an AI assistant with a charter to clearly analyze the customer enquiry."
+        )
+        user_text = user_prompt.strip() if user_prompt else "Summarize the audio content."
+        response = client.chat.completions.create(
+            model=deployment,
+            messages=[
+                {"role": "system", "content": system_message},
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": user_text},
+                        {
+                            "type": "input_audio",
+                            "input_audio": {"data": audio_b64, "format": "mp3"},
+                        },
+                    ],
+                },
+            ],
+        )
+        return response.choices[0].message.content
+    except Exception as ex:
+        return f"Error from Azure OpenAI: {ex}"
+# --- I/O helpers ------------------------------------------------------------
+def encode_audio_from_path(path: str) -> str:
+    with open(path, "rb") as f:
+        return base64.b64encode(f.read()).decode("utf-8")
+def download_to_temp_mp3(url: str) -> str:
+    r = requests.get(url, stream=True, timeout=30)
+    r.raise_for_status()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
+        for chunk in r.iter_content(chunk_size=8192):
+            if chunk:
+                tmp.write(chunk)
+        return tmp.name
+def process_audio(upload_path, record_path, url, sys_prompt, user_prompt):
+    tmp_to_cleanup = []
+    try:
+        audio_path = None
+        if upload_path:
+            audio_path = upload_path
+        elif record_path:
+            audio_path = record_path
+        elif url and url.strip():
+            audio_path = download_to_temp_mp3(url.strip())
+            tmp_to_cleanup.append(audio_path)
+        if not audio_path:
+            return "Please provide an audio file via upload, recording, or URL."
+        audio_b64 = encode_audio_from_path(audio_path)
+        return summarize_audio_b64(audio_b64, sys_prompt, user_prompt)
+    finally:
+        for p in tmp_to_cleanup:
+            try:
+                if os.path.exists(p):
+                    os.remove(p)
+            except Exception:
+                pass
+# --- UI ---------------------------------------------------------------------
+with gr.Blocks(title="Audio Summarizer") as demo:
+    gr.Markdown("# Audio File Summarizer (Azure OpenAI)")
+    gr.Markdown("Upload a mp3, record audio, or paste a URL. The app sends base64 audio to Azure OpenAI.")
+    with gr.Row():
+        with gr.Column():
+            upload_audio = gr.Audio(sources=["upload"], type="filepath", label="Upload mp3")
+        with gr.Column():
+            record_audio = gr.Audio(sources=["microphone"], type="filepath", label="Record Audio")
+        with gr.Column():
+            url_input = gr.Textbox(label="mp3 URL", placeholder="https://example.com/audio.mp3")
+    with gr.Row():
+        userprompt_input = gr.Textbox(
+            label="User Prompt",
+            value="Summarize the audio content",
+            placeholder="e.g., Extract key points and action items",
+        )
+        sysprompt_input = gr.Textbox(
+            label="System Prompt",
+            value="You are an AI assistant with a listening charter to clearly analyze the customer enquiry.",
+        )
+    submit_btn = gr.Button("Summarize")
+    output = gr.Textbox(label="Summary", lines=12)
+    submit_btn.click(
+        fn=process_audio,
+        inputs=[upload_audio, record_audio, url_input, sysprompt_input, userprompt_input],
+        outputs=output,
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python-dotenv==1.1.1 gradio==5.45.0 requests==2.32.5 azure-identity==1.25.0 azure-ai-projects==1.0.0 numpy==1.26.4 openai==1.107.3