Spaces:

MichaelChou0806
/

LINE_audio_transcript

Sleeping

App Files Files Community

MichaelChou0806 commited on Oct 8, 2025

Commit

c76e92c

verified ·

1 Parent(s): ec64510

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -216

app.py CHANGED Viewed

@@ -2,6 +2,9 @@ import os, shutil, base64, uuid, mimetypes, json, time
 from pydub import AudioSegment
 from openai import OpenAI
 import gradio as gr
 # ====== 基本設定 ======
 PASSWORD = os.getenv("APP_PASSWORD", "chou")
@@ -24,14 +27,11 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
     try:
         header, b64 = data_url.split(",", 1)
     except ValueError:
-        print(f"  → [_dataurl_to_file] ❌ 錯誤: data URL 格式錯誤")
         raise ValueError("data URL format error")
     mime = header.split(";")[0].split(":", 1)[-1].strip()
     ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
     fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
-    print(f"  → [_dataurl_to_file] MIME: {mime}, 副檔名: {ext}")
-    print(f"  → [_dataurl_to_file] 目標檔名: {fname}")
-    print(f"  → [_dataurl_to_file] Base64 長度: {len(b64)}")
     with open(fname, "wb") as f:
         f.write(base64.b64decode(b64))
     file_size = os.path.getsize(fname)
@@ -40,111 +40,90 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
 def _extract_effective_path(file_obj) -> str:
     """從各種格式中提取有效檔案路徑"""
-    print(f"\n[_extract_effective_path] 開始解析檔案...")
     print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
-    print(f"[_extract_effective_path] 收到內容前100字: {str(file_obj)[:100]}...")
     # 字串模式
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
-        print(f"  → [模式 A] 字串模式")
         if s.startswith("data:"):
-            print(f"  → [模式 A] 偵測到 data URL, 長度: {len(s)}")
             return _dataurl_to_file(s, None)
         if os.path.isfile(s):
-            print(f"  → [模式 A] 找到檔案路徑: {s}")
             return s
     # 字典模式
     if isinstance(file_obj, dict):
-        print(f"  → [模式 B] 字典模式")
-        print(f"  → [模式 B] Keys: {list(file_obj.keys())}")
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
-            print(f"  → [模式 B] 找到 data URL! 長度: {len(data)}")
             return _dataurl_to_file(data, file_obj.get("orig_name"))
         p = str(file_obj.get("path") or "").strip().strip('"')
         if p and os.path.isfile(p):
-            print(f"  → [模式 B] 找到 path: {p}")
             return p
     # 物件模式
-    print(f"  → [模式 C] 物件模式")
     for attr in ("name", "path"):
         p = getattr(file_obj, attr, None)
         if isinstance(p, str):
             s = p.strip().strip('"')
             if os.path.isfile(s):
-                print(f"  → [模式 C] 找到屬性 {attr}: {s}")
                 return s
-    print(f"[_extract_effective_path] ❌ 無法解析檔案")
     raise FileNotFoundError("Cannot parse uploaded file")
 # ====== 分段處理 ======
 def split_audio(path):
-    print(f"\n[split_audio] 檢查檔案大小...")
     size = os.path.getsize(path)
     print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
     if size <= MAX_SIZE:
-        print(f"[split_audio] 檔案小於 25MB, 不需分割")
         return [path]
-    print(f"[split_audio] 檔案大於 25MB, 開始分割...")
     audio = AudioSegment.from_file(path)
     n = int(size / MAX_SIZE) + 1
     chunk_ms = len(audio) / n
-    print(f"[split_audio] 將分割成 {n} 個片段, 每段約 {chunk_ms/1000:.1f} 秒")
     parts = []
     for i in range(n):
         fn = f"chunk_{i+1}.wav"
         audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
-        print(f"[split_audio] 已產生片段 {i+1}/{n}: {fn}")
         parts.append(fn)
     return parts
 # ====== 轉錄核心 ======
 def transcribe_core(path, model="whisper-1"):
     print(f"\n{'='*60}")
-    print(f"[transcribe_core] 開始轉錄流程")
-    print(f"[transcribe_core] 檔案路徑: {path}")
     print(f"{'='*60}")
     start_time = time.time()
     if path.lower().endswith(".mp4"):
-        print(f"[transcribe_core] 偵測到 .mp4 檔案, 轉換為 .m4a")
         fixed = path[:-4] + ".m4a"
         try:
             shutil.copy(path, fixed)
             path = fixed
-            print(f"[transcribe_core] ✅ 已轉換: {path}")
-        except Exception as e:
-            print(f"[transcribe_core] ⚠️ 轉換失敗: {e}")
-    print(f"\n[transcribe_core] === 步驟 1: 分割音檔 ===")
     chunks = split_audio(path)
-    print(f"[transcribe_core] 共 {len(chunks)} 個片段")
-    print(f"\n[transcribe_core] === 步驟 2: Whisper 轉錄 ===")
     raw = []
     for i, c in enumerate(chunks, 1):
-        print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}: {c}")
-        chunk_start = time.time()
         with open(c, "rb") as af:
             txt = client.audio.transcriptions.create(
                 model=model, file=af, response_format="text"
             )
             raw.append(txt)
-        chunk_time = time.time() - chunk_start
-        print(f"[transcribe_core] ✅ 片段 {i} 完成 (耗時 {chunk_time:.1f}秒)")
-        print(f"[transcribe_core] 片段 {i} 內容: {txt[:100]}...")
     raw_txt = "\n".join(raw)
-    print(f"\n[transcribe_core] 原始轉錄總長度: {len(raw_txt)} 字元")
-    print(f"[transcribe_core] 原始內容前200字: {raw_txt[:200]}...")
-    print(f"\n[transcribe_core] === 步驟 3: 簡轉繁 ===")
-    conv_start = time.time()
     conv = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -154,13 +133,9 @@ def transcribe_core(path, model="whisper-1"):
         temperature=0.0
     )
     trad = conv.choices[0].message.content.strip()
-    conv_time = time.time() - conv_start
-    print(f"[transcribe_core] ✅ 繁體轉換完成 (耗時 {conv_time:.1f}秒)")
-    print(f"[transcribe_core] 繁體內容長度: {len(trad)} 字元")
-    print(f"[transcribe_core] 繁體內容前200字: {trad[:200]}...")
-    print(f"\n[transcribe_core] === 步驟 4: AI 摘要 ===")
-    summ_start = time.time()
     summ = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -170,92 +145,87 @@ def transcribe_core(path, model="whisper-1"):
         temperature=0.2
     )
     summary = summ.choices[0].message.content.strip()
-    summ_time = time.time() - summ_start
-    print(f"[transcribe_core] ✅ 摘要完成 (耗時 {summ_time:.1f}秒)")
-    print(f"[transcribe_core] 摘要內容: {summary}")
     total_time = time.time() - start_time
     print(f"\n{'='*60}")
-    print(f"[transcribe_core] ✅✅✅ 轉錄流程全部完成!")
-    print(f"[transcribe_core] 總耗時: {total_time:.1f} 秒")
     print(f"{'='*60}\n")
     return trad, summary
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
-    """網頁版轉錄函式"""
-    print(f"\n{'🌐'*30}")
-    print(f"🎯 [UI] 收到網頁版請求")
-    print(f"🔑 [UI] 密碼: {password[:2] if password else ''}*** (長度: {len(password) if password else 0})")
-    print(f"📁 [UI] 檔案類型: {type(file)}")
-    print(f"{'🌐'*30}")
     if not password or password.strip() != PASSWORD:
-        print(f"❌ [UI] 密碼驗證失敗")
         return "❌ Password incorrect", "", ""
     if not file:
-        print(f"❌ [UI] 未收到檔案")
         return "⚠️ No file uploaded", "", ""
     try:
         path = _extract_effective_path(file)
-        print(f"✅ [UI] 檔案解析成功: {path}")
         text, summary = transcribe_core(path)
-        print(f"✅ [UI] 轉錄完成, 準備返回結果")
         return "✅ Transcription completed", text, summary
     except Exception as e:
         import traceback
-        error_trace = traceback.format_exc()
-        print(f"❌ [UI] 發生錯誤:\n{error_trace}")
         return f"❌ Error: {e}", "", ""
-# ====== API 函式 ======
-def transcribe_api(password, file_data, file_name):
-    """
-    API 版本的轉錄函式
     """
-    print(f"\n{'📱'*30}")
-    print(f"🎯 [API] 收到 API 請求")
-    print(f"🔑 [API] 密碼: {password[:2] if password else ''}*** (長度: {len(password) if password else 0})")
-    print(f"📁 [API] file_data 類型: {type(file_data)}")
-    print(f"📁 [API] file_data 長度: {len(file_data) if file_data else 0}")
-    print(f"📁 [API] file_data 前50字: {str(file_data)[:50] if file_data else 'None'}...")
-    print(f"📁 [API] file_name: {file_name}")
-    print(f"{'📱'*30}")
-    if not password or password.strip() != PASSWORD:
-        result = {
-            "status": "error",
-            "error": "Password incorrect",
-            "transcription": "",
-            "summary": ""
-        }
-        print(f"❌ [API] 密碼驗證失敗")
-        print(f"[API] 返回結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
-        return result
-    if not file_data or not file_data.startswith("data:"):
-        result = {
-            "status": "error",
-            "error": "Invalid file data format. Must be data:audio/...;base64,...",
-            "transcription": "",
-            "summary": ""
-        }
-        print(f"❌ [API] 檔案格式錯誤")
-        print(f"[API] 返回結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
-        return result
     try:
-        file_dict = {
-            "data": file_data,
-            "orig_name": file_name or "recording.m4a"
-        }
-        print(f"[API] 開始解析檔案...")
         path = _extract_effective_path(file_dict)
-        print(f"✅ [API] 檔案解析成功: {path}")
-        print(f"[API] 開始轉錄流程...")
         text, summary = transcribe_core(path)
         result = {
@@ -263,30 +233,24 @@ def transcribe_api(password, file_data, file_name):
             "transcription": text,
             "summary": summary
         }
         print(f"\n{'✅'*30}")
-        print(f"✅✅✅ [API] 全部完成!")
-        print(f"[API] 轉錄長度: {len(text)} 字元")
-        print(f"[API] 摘要長度: {len(summary)} 字元")
-        print(f"[API] 返回結果:")
         print(json.dumps(result, ensure_ascii=False, indent=2))
         print(f"{'✅'*30}\n")
-        return result
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(f"\n{'❌'*30}")
-        print(f"❌ [API] 發生錯誤:")
-        print(error_trace)
         print(f"{'❌'*30}\n")
-        result = {
-            "status": "error",
-            "error": str(e),
-            "transcription": "",
-            "summary": ""
-        }
-        print(f"[API] 返回錯誤結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
-        return result
 # ====== Gradio 介面 ======
 with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
@@ -296,129 +260,111 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
         gr.Markdown("### Upload audio file directly from browser")
         with gr.Row():
             with gr.Column(scale=1):
-                pw_ui = gr.Textbox(
-                    label="Password",
-                    type="password",
-                    placeholder="Enter password"
-                )
-                file_ui = gr.File(
-                    label="Upload Audio File",
-                    file_types=["audio"]
-                )
-                btn_ui = gr.Button(
-                    "Start Transcription 🚀",
-                    variant="primary",
-                    size="lg"
-                )
             with gr.Column(scale=2):
                 status_ui = gr.Textbox(label="Status", interactive=False)
-                transcript_ui = gr.Textbox(
-                    label="Transcription Result",
-                    lines=10,
-                    placeholder="Transcription will appear here..."
-                )
-                summary_ui = gr.Textbox(
-                    label="AI Summary",
-                    lines=6,
-                    placeholder="Summary will appear here..."
-                )
-        btn_ui.click(
-            transcribe_ui,
-            inputs=[pw_ui, file_ui],
-            outputs=[status_ui, transcript_ui, summary_ui]
-        )
-    with gr.Tab("📱 API (iPhone Shortcut)"):
         gr.Markdown("""
-        ### For iPhone Shortcuts & Automation
-        Test the API endpoint here before using in iPhone Shortcuts.
-        """)
-        with gr.Row():
-            with gr.Column(scale=1):
-                pw_api = gr.Textbox(
-                    label="Password",
-                    type="password",
-                    value="chou",
-                    placeholder="Enter password"
-                )
-                file_data_api = gr.Textbox(
-                    label="File Data (Base64)",
-                    placeholder="data:audio/m4a;base64,UklGR...",
-                    lines=3,
-                    info="Paste your base64-encoded audio data URL here"
-                )
-                file_name_api = gr.Textbox(
-                    label="Original Filename",
-                    value="recording.m4a",
-                    placeholder="recording.m4a"
-                )
-                btn_api = gr.Button(
-                    "Test API 🧪",
-                    variant="secondary",
-                    size="lg"
-                )
-            with gr.Column(scale=2):
-                result_api = gr.JSON(
-                    label="API Response",
-                    show_label=True
-                )
-        btn_api.click(
-            transcribe_api,
-            inputs=[pw_api, file_data_api, file_name_api],
-            outputs=[result_api],
-            api_name="transcribe",
-            queue=False  # 🔴 關鍵: 禁用 queue
-        )
-        gr.Markdown("""
         ---
-        ### 📖 iPhone Shortcuts Configuration
-        **Endpoint**: `/gradio_api/call/transcribe`
-        **Request Format (JSON)**:
         ```json
         {
-          "data": [
-            "your_password",
-            "data:audio/m4a;base64,UklGR...",
-            "recording.m4a"
-          ]
         }
         ```
-        **Response Format**:
         ```json
         {
-          "data": {
-            "status": "success",
-            "transcription": "轉錄內容...",
-            "summary": "摘要..."
-          }
         }
         ```
         """)
     gr.Markdown("""
     ---
     💡 **Supported Formats**: MP4, M4A, MP3, WAV, OGG, WEBM
-    📦 **Max File Size**: 25MB per chunk (larger files auto-split)
-    🔒 **Security**: Password-protected access
     """)
 # ====== 啟動 ======
 if __name__ == "__main__":
     print("\n" + "="*60)
-    print("準備啟動 Gradio 應用...")
     print("="*60 + "\n")
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_api=True
-    )

 from pydub import AudioSegment
 from openai import OpenAI
 import gradio as gr
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
 # ====== 基本設定 ======
 PASSWORD = os.getenv("APP_PASSWORD", "chou")
     try:
         header, b64 = data_url.split(",", 1)
     except ValueError:
         raise ValueError("data URL format error")
     mime = header.split(";")[0].split(":", 1)[-1].strip()
     ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
     fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
+    print(f"  → [_dataurl_to_file] 檔名: {fname}, Base64長度: {len(b64)}")
     with open(fname, "wb") as f:
         f.write(base64.b64decode(b64))
     file_size = os.path.getsize(fname)
 def _extract_effective_path(file_obj) -> str:
     """從各種格式中提取有效檔案路徑"""
     print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
     # 字串模式
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
         if s.startswith("data:"):
+            print(f"  → 偵測到 data URL")
             return _dataurl_to_file(s, None)
         if os.path.isfile(s):
+            print(f"  → 找到檔案路徑: {s}")
             return s
     # 字典模式
     if isinstance(file_obj, dict):
+        print(f"  → 字典模式, Keys: {list(file_obj.keys())}")
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
+            print(f"  → 找到 data URL")
             return _dataurl_to_file(data, file_obj.get("orig_name"))
         p = str(file_obj.get("path") or "").strip().strip('"')
         if p and os.path.isfile(p):
             return p
     # 物件模式
     for attr in ("name", "path"):
         p = getattr(file_obj, attr, None)
         if isinstance(p, str):
             s = p.strip().strip('"')
             if os.path.isfile(s):
                 return s
     raise FileNotFoundError("Cannot parse uploaded file")
 # ====== 分段處理 ======
 def split_audio(path):
     size = os.path.getsize(path)
     print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
     if size <= MAX_SIZE:
+        print(f"[split_audio] 不需分割")
         return [path]
+    print(f"[split_audio] 開始分割...")
     audio = AudioSegment.from_file(path)
     n = int(size / MAX_SIZE) + 1
     chunk_ms = len(audio) / n
+    print(f"[split_audio] 分割成 {n} 個片段")
     parts = []
     for i in range(n):
         fn = f"chunk_{i+1}.wav"
         audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
         parts.append(fn)
     return parts
 # ====== 轉錄核心 ======
 def transcribe_core(path, model="whisper-1"):
     print(f"\n{'='*60}")
+    print(f"[transcribe_core] 開始轉錄: {path}")
     print(f"{'='*60}")
     start_time = time.time()
     if path.lower().endswith(".mp4"):
         fixed = path[:-4] + ".m4a"
         try:
             shutil.copy(path, fixed)
             path = fixed
+        except:
+            pass
     chunks = split_audio(path)
+    print(f"\n[transcribe_core] === Whisper 轉錄 ({len(chunks)} 片段) ===")
     raw = []
     for i, c in enumerate(chunks, 1):
+        print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}")
         with open(c, "rb") as af:
             txt = client.audio.transcriptions.create(
                 model=model, file=af, response_format="text"
             )
             raw.append(txt)
+        print(f"[transcribe_core] ✅ 片段 {i} 完成")
     raw_txt = "\n".join(raw)
+    print(f"[transcribe_core] 原始轉錄: {len(raw_txt)} 字元")
+    print(f"\n[transcribe_core] === 簡轉繁 ===")
     conv = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
         temperature=0.0
     )
     trad = conv.choices[0].message.content.strip()
+    print(f"[transcribe_core] ✅ 繁體轉換完成: {len(trad)} 字元")
+    print(f"\n[transcribe_core] === AI 摘要 ===")
     summ = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
         temperature=0.2
     )
     summary = summ.choices[0].message.content.strip()
     total_time = time.time() - start_time
     print(f"\n{'='*60}")
+    print(f"[transcribe_core] ✅✅✅ 全部完成! 總耗時: {total_time:.1f}秒")
     print(f"{'='*60}\n")
     return trad, summary
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
+    print(f"\n🌐 [UI] 網頁版請求")
     if not password or password.strip() != PASSWORD:
         return "❌ Password incorrect", "", ""
     if not file:
         return "⚠️ No file uploaded", "", ""
     try:
         path = _extract_effective_path(file)
         text, summary = transcribe_core(path)
         return "✅ Transcription completed", text, summary
     except Exception as e:
         import traceback
+        print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}")
         return f"❌ Error: {e}", "", ""
+# ====== 建立 FastAPI 應用 ======
+fastapi_app = FastAPI()
+# CORS 設定
+fastapi_app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ====== 完全同步的 API 端點 ======
+@fastapi_app.post("/api/transcribe")
+async def api_transcribe_sync(request: Request):
     """
+    完全同步的 API 端點 - 直接返回結果,不用輪詢
+    請求格式:
+    {
+      "password": "chou",
+      "file_data": "data:audio/m4a;base64,...",
+      "file_name": "recording.m4a"
+    }
+    """
     try:
+        body = await request.json()
+        print(f"\n{'📱'*30}")
+        print(f"🎯 [SYNC API] 收到同步 API 請求")
+        print(f"📦 Keys: {list(body.keys())}")
+        print(f"{'📱'*30}")
+        password = body.get("password", "")
+        if password.strip() != PASSWORD:
+            print(f"❌ [SYNC API] 密碼錯誤")
+            return JSONResponse(
+                status_code=401,
+                content={"status": "error", "error": "Password incorrect"}
+            )
+        file_data = body.get("file_data", "")
+        file_name = body.get("file_name", "recording.m4a")
+        if not file_data or not file_data.startswith("data:"):
+            print(f"❌ [SYNC API] 檔案格式錯誤")
+            return JSONResponse(
+                status_code=400,
+                content={"status": "error", "error": "Invalid file data format"}
+            )
+        print(f"[SYNC API] 檔案長度: {len(file_data)}, 檔名: {file_name}")
+        # 直接處理,同步執行
+        file_dict = {"data": file_data, "orig_name": file_name}
         path = _extract_effective_path(file_dict)
+        print(f"✅ [SYNC API] 檔案解析成功: {path}")
         text, summary = transcribe_core(path)
         result = {
             "transcription": text,
             "summary": summary
         }
         print(f"\n{'✅'*30}")
+        print(f"✅✅✅ [SYNC API] 完成! 返回結果")
         print(json.dumps(result, ensure_ascii=False, indent=2))
         print(f"{'✅'*30}\n")
+        return JSONResponse(content=result)
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(f"\n{'❌'*30}")
+        print(f"❌ [SYNC API] 錯誤:\n{error_trace}")
         print(f"{'❌'*30}\n")
+        return JSONResponse(
+            status_code=500,
+            content={"status": "error", "error": str(e)}
+        )
 # ====== Gradio 介面 ======
 with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
         gr.Markdown("### Upload audio file directly from browser")
         with gr.Row():
             with gr.Column(scale=1):
+                pw_ui = gr.Textbox(label="Password", type="password")
+                file_ui = gr.File(label="Upload Audio File", file_types=["audio"])
+                btn_ui = gr.Button("Start Transcription 🚀", variant="primary", size="lg")
             with gr.Column(scale=2):
                 status_ui = gr.Textbox(label="Status", interactive=False)
+                transcript_ui = gr.Textbox(label="Transcription Result", lines=10)
+                summary_ui = gr.Textbox(label="AI Summary", lines=6)
+        btn_ui.click(transcribe_ui, [pw_ui, file_ui], [status_ui, transcript_ui, summary_ui])
+    with gr.Tab("📱 API Documentation"):
         gr.Markdown("""
+        ### 🚀 Synchronous API (Recommended for iPhone Shortcuts)
+        **Endpoint**: `/api/transcribe` (POST)
+        ✅ **完全同步** - 直接返回結果,無需輪詢
+        ✅ **穩定可靠** - 不受音檔長度影響,自動等待完成
         ---
+        #### Request Format (JSON):
         ```json
         {
+          "password": "your_password",
+          "file_data": "data:audio/m4a;base64,UklGR...",
+          "file_name": "recording.m4a"
         }
         ```
+        #### Response Format:
         ```json
         {
+          "status": "success",
+          "transcription": "轉錄內容...",
+          "summary": "摘要內容..."
         }
         ```
+        ---
+        ### 📱 iPhone Shortcuts 設定
+        **動作流程:**
+        1. **取得檔案** → 語音檔
+        2. **Base64 編碼**
+        3. **文字** (組合 data URL):
+           ```
+           data:audio/m4a;base64,Base64編碼結果
+           ```
+        4. **字典** (請求本文):
+           - 鍵: `password`, 值: `chou`
+           - 鍵: `file_data`, 值: 上一步的文字
+           - 鍵: `file_name`, 值: `recording.m4a`
+        5. **取得 URL 內容**:
+           - URL: `https://你的網址/api/transcribe`
+           - 方法: `POST`
+           - 標頭: `Content-Type` = `application/json`
+           - 請求本文: 上一步的字典
+           - 請求本文類型: `JSON`
+        6. **從字典取得值**:
+           - 鍵: `transcription` → 轉錄結果
+           - 鍵: `summary` → 摘要
+        ---
+        ### 💡 重要提醒
+        - ✅ 這個端點**完全同步**,會等待轉錄完成後才返回
+        - ✅ 無論音檔多長,都會自動處理��成
+        - ✅ 不需要設定等待時間或輪詢機制
+        - ✅ 直接取得最終結果,不會有 `event_id`
+        ### 🧪 測試 API
+        使用 curl 測試:
+        ```bash
+        curl -X POST https://你的網址/api/transcribe \\
+          -H "Content-Type: application/json" \\
+          -d '{
+            "password": "chou",
+            "file_data": "data:audio/m4a;base64,AAAA...",
+            "file_name": "test.m4a"
+          }'
+        ```
         """)
     gr.Markdown("""
     ---
     💡 **Supported Formats**: MP4, M4A, MP3, WAV, OGG, WEBM
+    📦 **Max File Size**: 25MB per chunk (auto-split)
+    🔒 **Security**: Password-protected
     """)
+# ====== 掛載 Gradio 到 FastAPI ======
+app = gr.mount_gradio_app(fastapi_app, demo, path="/")
 # ====== 啟動 ======
 if __name__ == "__main__":
     print("\n" + "="*60)
+    print("🚀 啟動 FastAPI + Gradio 應用")
+    print("📱 同步 API: /api/transcribe")
+    print("🌐 網頁介面: /")
     print("="*60 + "\n")
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)