Spaces:

MichaelChou0806
/

LINE_audio_transcript

Sleeping

App Files Files Community

MichaelChou0806 commited on Oct 8, 2025

Commit

678d6a4

verified ·

1 Parent(s): c8ee0eb

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -54

app.py CHANGED Viewed

@@ -2,8 +2,6 @@ import os, shutil, base64, uuid, mimetypes
 from pydub import AudioSegment
 from openai import OpenAI
 import gradio as gr
-from fastapi import Request
-from fastapi.responses import JSONResponse
 # ====== 基本設定 ======
 PASSWORD = os.getenv("APP_PASSWORD", "chou")
@@ -115,6 +113,7 @@ def transcribe_core(path, model="whisper-1"):
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
     print(f"\n🎯 Web UI Request | Password: {password[:2] if password else ''}***")
     if not password or password.strip() != PASSWORD:
         return "❌ Password incorrect", "", ""
@@ -128,40 +127,165 @@ def transcribe_ui(password, file):
         print(f"❌ Error: {e}")
         return f"❌ Error: {e}", "", ""
 # ====== Gradio 介面 ======
 with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
     gr.Markdown("# 🎧 LINE Audio Transcription & Summary")
-    with gr.Row():
-        with gr.Column(scale=1):
-            pw_ui = gr.Textbox(label="Password", type="password", placeholder="Enter password")
-            file_ui = gr.File(label="Upload Audio File", file_types=["audio"])
-            btn_ui = gr.Button("Start Transcription 🚀", variant="primary", size="lg")
-        with gr.Column(scale=2):
-            status_ui = gr.Textbox(label="Status", interactive=False)
-            transcript_ui = gr.Textbox(label="Transcription Result", lines=10)
-            summary_ui = gr.Textbox(label="AI Summary", lines=6)
-    btn_ui.click(transcribe_ui, [pw_ui, file_ui], [status_ui, transcript_ui, summary_ui])
-    with gr.Accordion("📱 API Documentation (iPhone Shortcut)", open=False):
         gr.Markdown("""
-        ### API Endpoint
-        **POST** `/api/transcribe`
-        ### Request Format (JSON)
         ```json
         {
-          "password": "your_password",
-          "file": {
-            "data": "data:audio/m4a;base64,UklGR...",
-            "orig_name": "recording.m4a"
-          }
         }
         ```
-        ### Response Format
         ```json
         {
           "status": "success",
@@ -170,38 +294,23 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
         }
         ```
-        💡 **Tip**: Use this endpoint in iPhone Shortcuts for automated transcription
         """)
-# ====== 自訂 API 路由 ======
-@demo.fastapi_app.post("/api/transcribe")
-async def api_transcribe(request: Request):
-    """iPhone 捷徑專用的 API 端點"""
-    try:
-        body = await request.json()
-        print(f"\n🎯 API Request | Keys: {list(body.keys())}")
-        password = body.get("password", "")
-        if password.strip() != PASSWORD:
-            return JSONResponse(status_code=401, content={"error": "Password incorrect"})
-        file_obj = body.get("file")
-        if not file_obj:
-            return JSONResponse(status_code=400, content={"error": "No file provided"})
-        path = _extract_effective_path(file_obj)
-        text, summary = transcribe_core(path)
-        return JSONResponse(content={
-            "status": "success",
-            "transcription": text,
-            "summary": summary
-        })
-    except Exception as e:
-        import traceback
-        print(f"❌ API Error:\n{traceback.format_exc()}")
-        return JSONResponse(status_code=500, content={"error": str(e)})
 # ====== 啟動 ======
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 from pydub import AudioSegment
 from openai import OpenAI
 import gradio as gr
 # ====== 基本設定 ======
 PASSWORD = os.getenv("APP_PASSWORD", "chou")
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
+    """網頁版轉錄函式"""
     print(f"\n🎯 Web UI Request | Password: {password[:2] if password else ''}***")
     if not password or password.strip() != PASSWORD:
         return "❌ Password incorrect", "", ""
         print(f"❌ Error: {e}")
         return f"❌ Error: {e}", "", ""
+# ====== API 函式 (通過 Gradio 端點呼叫) ======
+def transcribe_api(password, file_data, file_name):
+    """
+    API 版本的轉錄函式
+    參數:
+    - password: 密碼字串
+    - file_data: data:audio/...;base64,... 格式的字串
+    - file_name: 原始檔名
+    """
+    print(f"\n🎯 API Request | Password: {password[:2] if password else ''}***")
+    print(f"📁 File data length: {len(file_data) if file_data else 0}")
+    print(f"📁 File name: {file_name}")
+    if not password or password.strip() != PASSWORD:
+        return {
+            "status": "error",
+            "error": "Password incorrect",
+            "transcription": "",
+            "summary": ""
+        }
+    if not file_data or not file_data.startswith("data:"):
+        return {
+            "status": "error",
+            "error": "Invalid file data format. Must be data:audio/...;base64,...",
+            "transcription": "",
+            "summary": ""
+        }
+    try:
+        file_dict = {
+            "data": file_data,
+            "orig_name": file_name or "recording.m4a"
+        }
+        path = _extract_effective_path(file_dict)
+        text, summary = transcribe_core(path)
+        return {
+            "status": "success",
+            "transcription": text,
+            "summary": summary
+        }
+    except Exception as e:
+        import traceback
+        print(f"❌ Error:\n{traceback.format_exc()}")
+        return {
+            "status": "error",
+            "error": str(e),
+            "transcription": "",
+            "summary": ""
+        }
 # ====== Gradio 介面 ======
 with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
     gr.Markdown("# 🎧 LINE Audio Transcription & Summary")
+    with gr.Tab("🌐 Web Upload"):
+        gr.Markdown("### Upload audio file directly from browser")
+        with gr.Row():
+            with gr.Column(scale=1):
+                pw_ui = gr.Textbox(
+                    label="Password",
+                    type="password",
+                    placeholder="Enter password"
+                )
+                file_ui = gr.File(
+                    label="Upload Audio File",
+                    file_types=["audio"]
+                )
+                btn_ui = gr.Button(
+                    "Start Transcription 🚀",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Column(scale=2):
+                status_ui = gr.Textbox(label="Status", interactive=False)
+                transcript_ui = gr.Textbox(
+                    label="Transcription Result",
+                    lines=10,
+                    placeholder="Transcription will appear here..."
+                )
+                summary_ui = gr.Textbox(
+                    label="AI Summary",
+                    lines=6,
+                    placeholder="Summary will appear here..."
+                )
+        btn_ui.click(
+            transcribe_ui,
+            inputs=[pw_ui, file_ui],
+            outputs=[status_ui, transcript_ui, summary_ui]
+        )
+    with gr.Tab("📱 API (iPhone Shortcut)"):
+        gr.Markdown("""
+        ### For iPhone Shortcuts & Automation
+        This tab provides a Gradio-based API endpoint that accepts Base64-encoded audio.
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                pw_api = gr.Textbox(
+                    label="Password",
+                    type="password",
+                    value="chou",
+                    placeholder="Enter password"
+                )
+                file_data_api = gr.Textbox(
+                    label="File Data (Base64)",
+                    placeholder="data:audio/m4a;base64,UklGR...",
+                    lines=3,
+                    info="Paste your base64-encoded audio data URL here"
+                )
+                file_name_api = gr.Textbox(
+                    label="Original Filename",
+                    value="recording.m4a",
+                    placeholder="recording.m4a"
+                )
+                btn_api = gr.Button(
+                    "Test API 🧪",
+                    variant="secondary",
+                    size="lg"
+                )
+            with gr.Column(scale=2):
+                result_api = gr.JSON(
+                    label="API Response",
+                    show_label=True
+                )
+        btn_api.click(
+            transcribe_api,
+            inputs=[pw_api, file_data_api, file_name_api],
+            outputs=[result_api],
+            api_name="transcribe"  # 🔴 關鍵:設定 api_name
+        )
         gr.Markdown("""
+        ---
+        ### 📖 How to use with iPhone Shortcuts
+        **Gradio API Endpoint**:
+        ```
+        POST /gradio_api/call/transcribe
+        ```
+        **Request Format (JSON)**:
         ```json
         {
+          "data": [
+            "your_password",
+            "data:audio/m4a;base64,UklGR...",
+            "recording.m4a"
+          ]
         }
         ```
+        **Response Format**:
         ```json
         {
           "status": "success",
         }
         ```
+        💡 **Important**:
+        - The endpoint is `/gradio_api/call/transcribe` (note: `call/transcribe`)
+        - The `data` array must have exactly 3 items: [password, file_data, file_name]
+        - Use `queue=false` parameter or set `api_name="transcribe"` in your request
         """)
+    gr.Markdown("""
+    ---
+    💡 **Supported Formats**: MP4, M4A, MP3, WAV, OGG, WEBM
+    📦 **Max File Size**: 25MB per chunk (larger files auto-split)
+    🔒 **Security**: Password-protected access
+    """)
 # ====== 啟動 ======
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_api=True  # 顯示 API 文件
+    )