Spaces:

MichaelChou0806
/

LINE_audio_transcript

Sleeping

App Files Files Community

MichaelChou0806 commited on Oct 8, 2025

Commit

a433c1d

verified ·

1 Parent(s): e402a2d

Update app.py

Browse files

Files changed (1) hide show

app.py +171 -63

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 print("===== 🚀 啟動中 =====")
 print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
-# ====== 工具函式 ======
 MIME_EXT = {
     "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
     "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
@@ -23,7 +23,7 @@ MIME_EXT = {
 }
 def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
-    print(f"  → 處理 data URL, 長度: {len(data_url)}")
     try:
         header, b64 = data_url.split(",", 1)
     except ValueError:
@@ -31,43 +31,60 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
     mime = header.split(";")[0].split(":", 1)[-1].strip()
     ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
     fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
     with open(fname, "wb") as f:
         f.write(base64.b64decode(b64))
-    print(f"  → ✅ 檔案建立: {fname}, {os.path.getsize(fname)} bytes")
     return fname
 def _extract_effective_path(file_obj) -> str:
-    print(f"解析檔案, 類型: {type(file_obj)}")
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
         if s.startswith("data:"):
             return _dataurl_to_file(s, None)
         if os.path.isfile(s):
             return s
     if isinstance(file_obj, dict):
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
             return _dataurl_to_file(data, file_obj.get("orig_name"))
         p = str(file_obj.get("path") or "").strip().strip('"')
         if p and os.path.isfile(p):
             return p
     for attr in ("name", "path"):
         p = getattr(file_obj, attr, None)
         if isinstance(p, str):
             s = p.strip().strip('"')
             if os.path.isfile(s):
                 return s
-    raise FileNotFoundError("Cannot parse file")
 def split_audio(path):
     size = os.path.getsize(path)
-    print(f"檔案大小: {size/1024/1024:.2f} MB")
     if size <= MAX_SIZE:
         return [path]
     audio = AudioSegment.from_file(path)
     n = int(size / MAX_SIZE) + 1
     chunk_ms = len(audio) / n
-    print(f"分割成 {n} 個片段")
     parts = []
     for i in range(n):
         fn = f"chunk_{i+1}.wav"
@@ -75,9 +92,13 @@ def split_audio(path):
         parts.append(fn)
     return parts
 def transcribe_core(path, model="whisper-1"):
-    print(f"\n{'='*50}\n開始轉錄: {path}\n{'='*50}")
-    start = time.time()
     if path.lower().endswith(".mp4"):
         fixed = path[:-4] + ".m4a"
@@ -88,18 +109,21 @@ def transcribe_core(path, model="whisper-1"):
             pass
     chunks = split_audio(path)
-    print(f"Whisper 轉錄 ({len(chunks)} 片段)")
     raw = []
     for i, c in enumerate(chunks, 1):
-        print(f"片段 {i}/{len(chunks)}")
         with open(c, "rb") as af:
             txt = client.audio.transcriptions.create(
                 model=model, file=af, response_format="text"
             )
             raw.append(txt)
     raw_txt = "\n".join(raw)
-    print("簡轉繁")
     conv = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -109,8 +133,9 @@ def transcribe_core(path, model="whisper-1"):
         temperature=0.0
     )
     trad = conv.choices[0].message.content.strip()
-    print("AI 摘要")
     summ = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -119,27 +144,35 @@ def transcribe_core(path, model="whisper-1"):
         ],
         temperature=0.2
     )
-    print(f"✅ 完成! 耗時: {time.time()-start:.1f}秒\n{'='*50}\n")
-    return trad, summ.choices[0].message.content.strip()
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
-    print(f"\n🌐 網頁版請求")
     if not password or password.strip() != PASSWORD:
         return "❌ Password incorrect", "", ""
     if not file:
-        return "⚠️ No file", "", ""
     try:
         path = _extract_effective_path(file)
         text, summary = transcribe_core(path)
-        return "✅ Completed", text, summary
     except Exception as e:
-        print(f"❌ 錯誤: {e}")
         return f"❌ Error: {e}", "", ""
-# ====== FastAPI 應用 ======
 fastapi_app = FastAPI()
 fastapi_app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -148,15 +181,29 @@ fastapi_app.add_middleware(
     allow_headers=["*"],
 )
 @fastapi_app.post("/api/transcribe")
-async def api_transcribe(request: Request):
-    """同步 API 端點"""
     try:
         body = await request.json()
-        print(f"\n📱 API 請求")
         password = body.get("password", "")
         if password.strip() != PASSWORD:
             return JSONResponse(
                 status_code=401,
                 content={"status": "error", "error": "Password incorrect"}
@@ -166,13 +213,19 @@ async def api_transcribe(request: Request):
         file_name = body.get("file_name", "recording.m4a")
         if not file_data or not file_data.startswith("data:"):
             return JSONResponse(
                 status_code=400,
-                content={"status": "error", "error": "Invalid file data"}
             )
         file_dict = {"data": file_data, "orig_name": file_name}
         path = _extract_effective_path(file_dict)
         text, summary = transcribe_core(path)
         result = {
@@ -180,83 +233,138 @@ async def api_transcribe(request: Request):
             "transcription": text,
             "summary": summary
         }
-        print(f"✅ API 完成\n{json.dumps(result, ensure_ascii=False, indent=2)}\n")
         return JSONResponse(content=result)
     except Exception as e:
         import traceback
-        print(f"❌ API 錯誤:\n{traceback.format_exc()}")
         return JSONResponse(
             status_code=500,
             content={"status": "error", "error": str(e)}
         )
 # ====== Gradio 介面 ======
-with gr.Blocks(title="LINE Audio Transcription") as demo:
-    gr.Markdown("# 🎧 LINE Audio Transcription")
-    with gr.Tab("Web Upload"):
-        pw = gr.Textbox(label="Password", type="password", placeholder="Enter password")
-        audio_file = gr.File(label="Upload Audio", file_types=["audio"])
-        btn = gr.Button("🚀 Start Transcription", variant="primary")
-        status = gr.Textbox(label="Status", interactive=False)
-        result = gr.Textbox(label="Transcription", lines=8, show_copy_button=True)
-        summary = gr.Textbox(label="Summary", lines=5, show_copy_button=True)
-        btn.click(transcribe_ui, inputs=[pw, audio_file], outputs=[status, result, summary])
-    with gr.Tab("API Info"):
         gr.Markdown("""
-        ### iPhone Shortcuts Integration
-        **Endpoint:** `POST /api/transcribe`
-        **Request:**
         ```json
         {
-          "password": "chou",
-          "file_data": "data:audio/m4a;base64,...",
           "file_name": "recording.m4a"
         }
         ```
-        **Response:**
         ```json
         {
           "status": "success",
-          "transcription": "...",
-          "summary": "..."
         }
         ```
         ---
-        ✅ Synchronous - returns directly
-        ✅ No polling needed
-        ✅ Works with any audio length
         ---
-        **Setup:**
-        1. Get file → Audio
-        2. Base64 encode
-        3. Text: `data:audio/m4a;base64,[encoded]`
-        4. Dictionary (3 text fields):
-           - `password`: `chou`
-           - `file_data`: Step 3
-           - `file_name`: `recording.m4a`
-        5. Get URL: `/api/transcribe` (POST, JSON)
-        6. Extract `transcription` & `summary`
         """)
-    gr.Markdown("💡 **Formats:** MP4, M4A, MP3, WAV, OGG, WEBM | **Max:** 25MB/chunk")
-# ====== 掛載與啟動 ======
 app = gr.mount_gradio_app(fastapi_app, demo, path="/")
 if __name__ == "__main__":
-    print("\n🚀 啟動應用")
-    print("📱 API: /api/transcribe")
-    print("🌐 Web: /\n")
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 print("===== 🚀 啟動中 =====")
 print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
+# ====== 工具:把 data:URL 轉成臨時檔 ======
 MIME_EXT = {
     "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
     "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
 }
 def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
+    print(f"  → [_dataurl_to_file] 開始處理 data URL...")
     try:
         header, b64 = data_url.split(",", 1)
     except ValueError:
     mime = header.split(";")[0].split(":", 1)[-1].strip()
     ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
     fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
+    print(f"  → [_dataurl_to_file] 檔名: {fname}, Base64長度: {len(b64)}")
     with open(fname, "wb") as f:
         f.write(base64.b64decode(b64))
+    file_size = os.path.getsize(fname)
+    print(f"  → [_dataurl_to_file] ✅ 檔案已建立, 大小: {file_size} bytes")
     return fname
 def _extract_effective_path(file_obj) -> str:
+    """從各種格式中提取有效檔案路徑"""
+    print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
+    # 字串模式
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
         if s.startswith("data:"):
+            print(f"  → 偵測到 data URL")
             return _dataurl_to_file(s, None)
         if os.path.isfile(s):
+            print(f"  → 找到檔案路徑: {s}")
             return s
+    # 字典模式
     if isinstance(file_obj, dict):
+        print(f"  → 字典模式, Keys: {list(file_obj.keys())}")
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
+            print(f"  → 找到 data URL")
             return _dataurl_to_file(data, file_obj.get("orig_name"))
         p = str(file_obj.get("path") or "").strip().strip('"')
         if p and os.path.isfile(p):
             return p
+    # 物件模式
     for attr in ("name", "path"):
         p = getattr(file_obj, attr, None)
         if isinstance(p, str):
             s = p.strip().strip('"')
             if os.path.isfile(s):
                 return s
+    raise FileNotFoundError("Cannot parse uploaded file")
+# ====== 分段處理 ======
 def split_audio(path):
     size = os.path.getsize(path)
+    print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
     if size <= MAX_SIZE:
+        print(f"[split_audio] 不需分割")
         return [path]
+    print(f"[split_audio] 開始分割...")
     audio = AudioSegment.from_file(path)
     n = int(size / MAX_SIZE) + 1
     chunk_ms = len(audio) / n
+    print(f"[split_audio] 分割成 {n} 個片段")
     parts = []
     for i in range(n):
         fn = f"chunk_{i+1}.wav"
         parts.append(fn)
     return parts
+# ====== 轉錄核心 ======
 def transcribe_core(path, model="whisper-1"):
+    print(f"\n{'='*60}")
+    print(f"[transcribe_core] 開始轉錄: {path}")
+    print(f"{'='*60}")
+    start_time = time.time()
     if path.lower().endswith(".mp4"):
         fixed = path[:-4] + ".m4a"
             pass
     chunks = split_audio(path)
+    print(f"\n[transcribe_core] === Whisper 轉錄 ({len(chunks)} 片段) ===")
     raw = []
     for i, c in enumerate(chunks, 1):
+        print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}")
         with open(c, "rb") as af:
             txt = client.audio.transcriptions.create(
                 model=model, file=af, response_format="text"
             )
             raw.append(txt)
+        print(f"[transcribe_core] ✅ 片段 {i} 完成")
     raw_txt = "\n".join(raw)
+    print(f"[transcribe_core] 原始轉錄: {len(raw_txt)} 字元")
+    print(f"\n[transcribe_core] === 簡轉繁 ===")
     conv = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
         temperature=0.0
     )
     trad = conv.choices[0].message.content.strip()
+    print(f"[transcribe_core] ✅ 繁體轉換完成: {len(trad)} 字元")
+    print(f"\n[transcribe_core] === AI 摘要 ===")
     summ = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
         ],
         temperature=0.2
     )
+    summary = summ.choices[0].message.content.strip()
+    total_time = time.time() - start_time
+    print(f"\n{'='*60}")
+    print(f"[transcribe_core] ✅✅✅ 全部完成! 總耗時: {total_time:.1f}秒")
+    print(f"{'='*60}\n")
+    return trad, summary
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
+    print(f"\n🌐 [UI] 網頁版請求")
     if not password or password.strip() != PASSWORD:
         return "❌ Password incorrect", "", ""
     if not file:
+        return "⚠️ No file uploaded", "", ""
     try:
         path = _extract_effective_path(file)
         text, summary = transcribe_core(path)
+        return "✅ Transcription completed", text, summary
     except Exception as e:
+        import traceback
+        print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}")
         return f"❌ Error: {e}", "", ""
+# ====== 建立 FastAPI 應用 ======
 fastapi_app = FastAPI()
+# CORS 設定
 fastapi_app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# ====== 完全同步的 API 端點 ======
 @fastapi_app.post("/api/transcribe")
+async def api_transcribe_sync(request: Request):
+    """
+    完全同步的 API 端點 - 直接返回結果,不用輪詢
+    請求格式:
+    {
+      "password": "chou",
+      "file_data": "data:audio/m4a;base64,...",
+      "file_name": "recording.m4a"
+    }
+    """
     try:
         body = await request.json()
+        print(f"\n{'📱'*30}")
+        print(f"🎯 [SYNC API] 收到同步 API 請求")
+        print(f"📦 Keys: {list(body.keys())}")
+        print(f"{'📱'*30}")
         password = body.get("password", "")
         if password.strip() != PASSWORD:
+            print(f"❌ [SYNC API] 密碼錯誤")
             return JSONResponse(
                 status_code=401,
                 content={"status": "error", "error": "Password incorrect"}
         file_name = body.get("file_name", "recording.m4a")
         if not file_data or not file_data.startswith("data:"):
+            print(f"❌ [SYNC API] 檔案格式錯誤")
             return JSONResponse(
                 status_code=400,
+                content={"status": "error", "error": "Invalid file data format"}
             )
+        print(f"[SYNC API] 檔案長度: {len(file_data)}, 檔名: {file_name}")
+        # 直接處理,同步執行
         file_dict = {"data": file_data, "orig_name": file_name}
         path = _extract_effective_path(file_dict)
+        print(f"✅ [SYNC API] 檔案解析成功: {path}")
         text, summary = transcribe_core(path)
         result = {
             "transcription": text,
             "summary": summary
         }
+        print(f"\n{'✅'*30}")
+        print(f"✅✅✅ [SYNC API] 完成! 返回結果")
+        print(json.dumps(result, ensure_ascii=False, indent=2))
+        print(f"{'✅'*30}\n")
         return JSONResponse(content=result)
     except Exception as e:
         import traceback
+        error_trace = traceback.format_exc()
+        print(f"\n{'❌'*30}")
+        print(f"❌ [SYNC API] 錯誤:\n{error_trace}")
+        print(f"{'❌'*30}\n")
         return JSONResponse(
             status_code=500,
             content={"status": "error", "error": str(e)}
         )
 # ====== Gradio 介面 ======
+with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
+    gr.Markdown("# 🎧 LINE Audio Transcription & Summary")
+    with gr.Tab("🌐 Web Upload"):
+        gr.Markdown("### Upload audio file directly from browser")
+        with gr.Row():
+            with gr.Column(scale=1):
+                pw_ui = gr.Textbox(label="Password", type="password")
+                file_ui = gr.File(label="Upload Audio File", file_types=["audio"])
+                btn_ui = gr.Button("Start Transcription 🚀", variant="primary", size="lg")
+            with gr.Column(scale=2):
+                status_ui = gr.Textbox(label="Status", interactive=False)
+                transcript_ui = gr.Textbox(label="Transcription Result", lines=10)
+                summary_ui = gr.Textbox(label="AI Summary", lines=6)
+        btn_ui.click(transcribe_ui, [pw_ui, file_ui], [status_ui, transcript_ui, summary_ui])
+    with gr.Tab("📱 API Documentation"):
         gr.Markdown("""
+        ### 🚀 Synchronous API (Recommended for iPhone Shortcuts)
+        **Endpoint**: `/api/transcribe` (POST)
+        ✅ **完全同步** - 直接返回結果,無需輪詢
+        ✅ **穩定可靠** - 不受音檔長度影響,自動等待完成
+        ---
+        #### Request Format (JSON):
         ```json
         {
+          "password": "your_password",
+          "file_data": "data:audio/m4a;base64,UklGR...",
           "file_name": "recording.m4a"
         }
         ```
+        #### Response Format:
         ```json
         {
           "status": "success",
+          "transcription": "轉錄內容...",
+          "summary": "摘要內容..."
         }
         ```
         ---
+        ### 📱 iPhone Shortcuts 設定
+        **動作流程:**
+        1. **取得檔案** → 語音檔
+        2. **Base64 編碼**
+        3. **文字** (組合 data URL):
+           ```
+           data:audio/m4a;base64,Base64編碼結果
+           ```
+        4. **字典** (請求本文):
+           - 鍵: `password`, 值: `chou`
+           - 鍵: `file_data`, 值: 上一步的文字
+           - 鍵: `file_name`, 值: `recording.m4a`
+        5. **取得 URL 內容**:
+           - URL: `https://你的網址/api/transcribe`
+           - 方法: `POST`
+           - 標頭: `Content-Type` = `application/json`
+           - 請求本文: 上一步的字典
+           - 請求本文類型: `JSON`
+        6. **從字典取得值**:
+           - 鍵: `transcription` → 轉錄結果
+           - 鍵: `summary` → 摘要
         ---
+        ### 💡 重要提醒
+        - ✅ 這個端點**完全同步**,會等待轉錄完成後才返回
+        - ✅ 無論音檔多長,都會自動處理完成
+        - ✅ 不需要設定等待時間或輪詢機制
+        - ✅ 直接取得最終結果,不會有 `event_id`
+        ### 🧪 測試 API
+        使用 curl 測試:
+        ```bash
+        curl -X POST https://你的網址/api/transcribe \\
+          -H "Content-Type: application/json" \\
+          -d '{
+            "password": "chou",
+            "file_data": "data:audio/m4a;base64,AAAA...",
+            "file_name": "test.m4a"
+          }'
+        ```
         """)
+    gr.Markdown("""
+    ---
+    💡 **Supported Formats**: MP4, M4A, MP3, WAV, OGG, WEBM
+    📦 **Max File Size**: 25MB per chunk (auto-split)
+    🔒 **Security**: Password-protected
+    """)
+# ====== 掛載 Gradio 到 FastAPI ======
 app = gr.mount_gradio_app(fastapi_app, demo, path="/")
+# ====== 啟動 ======
 if __name__ == "__main__":
+    print("\n" + "="*60)
+    print("🚀 啟動 FastAPI + Gradio 應用")
+    print("📱 同步 API: /api/transcribe")
+    print("🌐 網頁介面: /")
+    print("="*60 + "\n")
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)