Spaces:

MichaelChou0806
/

LINE_audio_transcript

Sleeping

App Files Files Community

MichaelChou0806 commited on Oct 8, 2025

Commit

7de90eb

verified ·

1 Parent(s): e5e8cf2

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -207

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 print("===== 🚀 啟動中 =====")
 print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
-# ====== 工具:把 data:URL 轉成臨時檔 ======
 MIME_EXT = {
     "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
     "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
@@ -23,7 +23,7 @@ MIME_EXT = {
 }
 def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
-    print(f"  → [_dataurl_to_file] 開始處理 data URL...")
     try:
         header, b64 = data_url.split(",", 1)
     except ValueError:
@@ -31,60 +31,43 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
     mime = header.split(";")[0].split(":", 1)[-1].strip()
     ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
     fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
-    print(f"  → [_dataurl_to_file] 檔名: {fname}, Base64長度: {len(b64)}")
     with open(fname, "wb") as f:
         f.write(base64.b64decode(b64))
-    file_size = os.path.getsize(fname)
-    print(f"  → [_dataurl_to_file] ✅ 檔案已建立, 大小: {file_size} bytes")
     return fname
 def _extract_effective_path(file_obj) -> str:
-    """從各種格式中提取有效檔案路徑"""
-    print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
-    # 字串模式
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
         if s.startswith("data:"):
-            print(f"  → 偵測到 data URL")
             return _dataurl_to_file(s, None)
         if os.path.isfile(s):
-            print(f"  → 找到檔案路徑: {s}")
             return s
-    # 字典模式
     if isinstance(file_obj, dict):
-        print(f"  → 字典模式, Keys: {list(file_obj.keys())}")
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
-            print(f"  → 找到 data URL")
             return _dataurl_to_file(data, file_obj.get("orig_name"))
         p = str(file_obj.get("path") or "").strip().strip('"')
         if p and os.path.isfile(p):
             return p
-    # 物件模式
     for attr in ("name", "path"):
         p = getattr(file_obj, attr, None)
         if isinstance(p, str):
             s = p.strip().strip('"')
             if os.path.isfile(s):
                 return s
-    raise FileNotFoundError("Cannot parse uploaded file")
-# ====== 分段處理 ======
 def split_audio(path):
     size = os.path.getsize(path)
-    print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
     if size <= MAX_SIZE:
-        print(f"[split_audio] 不需分割")
         return [path]
-    print(f"[split_audio] 開始分割...")
     audio = AudioSegment.from_file(path)
     n = int(size / MAX_SIZE) + 1
     chunk_ms = len(audio) / n
-    print(f"[split_audio] 分割成 {n} 個片段")
     parts = []
     for i in range(n):
         fn = f"chunk_{i+1}.wav"
@@ -92,13 +75,9 @@ def split_audio(path):
         parts.append(fn)
     return parts
-# ====== 轉錄核心 ======
 def transcribe_core(path, model="whisper-1"):
-    print(f"\n{'='*60}")
-    print(f"[transcribe_core] 開始轉錄: {path}")
-    print(f"{'='*60}")
-    start_time = time.time()
     if path.lower().endswith(".mp4"):
         fixed = path[:-4] + ".m4a"
@@ -109,21 +88,18 @@ def transcribe_core(path, model="whisper-1"):
             pass
     chunks = split_audio(path)
-    print(f"\n[transcribe_core] === Whisper 轉錄 ({len(chunks)} 片段) ===")
     raw = []
     for i, c in enumerate(chunks, 1):
-        print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}")
         with open(c, "rb") as af:
             txt = client.audio.transcriptions.create(
                 model=model, file=af, response_format="text"
             )
             raw.append(txt)
-        print(f"[transcribe_core] ✅ 片段 {i} 完成")
     raw_txt = "\n".join(raw)
-    print(f"[transcribe_core] 原始轉錄: {len(raw_txt)} 字元")
-    print(f"\n[transcribe_core] === 簡轉繁 ===")
     conv = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -133,9 +109,8 @@ def transcribe_core(path, model="whisper-1"):
         temperature=0.0
     )
     trad = conv.choices[0].message.content.strip()
-    print(f"[transcribe_core] ✅ 繁體轉換完成: {len(trad)} 字元")
-    print(f"\n[transcribe_core] === AI 摘要 ===")
     summ = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -144,35 +119,27 @@ def transcribe_core(path, model="whisper-1"):
         ],
         temperature=0.2
     )
-    summary = summ.choices[0].message.content.strip()
-    total_time = time.time() - start_time
-    print(f"\n{'='*60}")
-    print(f"[transcribe_core] ✅✅✅ 全部完成! 總耗時: {total_time:.1f}秒")
-    print(f"{'='*60}\n")
-    return trad, summary
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
-    print(f"\n🌐 [UI] 網頁版請求")
     if not password or password.strip() != PASSWORD:
         return "❌ Password incorrect", "", ""
     if not file:
-        return "⚠️ No file uploaded", "", ""
     try:
         path = _extract_effective_path(file)
         text, summary = transcribe_core(path)
-        return "✅ Transcription completed", text, summary
     except Exception as e:
-        import traceback
-        print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}")
         return f"❌ Error: {e}", "", ""
-# ====== 建立 FastAPI 應用 ======
 fastapi_app = FastAPI()
-# CORS 設定
 fastapi_app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -181,29 +148,15 @@ fastapi_app.add_middleware(
     allow_headers=["*"],
 )
-# ====== 完全同步的 API 端點 ======
 @fastapi_app.post("/api/transcribe")
-async def api_transcribe_sync(request: Request):
-    """
-    完全同步的 API 端點 - 直接返回結果,不用輪詢
-    請求格式:
-    {
-      "password": "chou",
-      "file_data": "data:audio/m4a;base64,...",
-      "file_name": "recording.m4a"
-    }
-    """
     try:
         body = await request.json()
-        print(f"\n{'📱'*30}")
-        print(f"🎯 [SYNC API] 收到同步 API 請求")
-        print(f"📦 Keys: {list(body.keys())}")
-        print(f"{'📱'*30}")
         password = body.get("password", "")
         if password.strip() != PASSWORD:
-            print(f"❌ [SYNC API] 密碼錯誤")
             return JSONResponse(
                 status_code=401,
                 content={"status": "error", "error": "Password incorrect"}
@@ -213,19 +166,13 @@ async def api_transcribe_sync(request: Request):
         file_name = body.get("file_name", "recording.m4a")
         if not file_data or not file_data.startswith("data:"):
-            print(f"❌ [SYNC API] 檔案格式錯誤")
             return JSONResponse(
                 status_code=400,
-                content={"status": "error", "error": "Invalid file data format"}
             )
-        print(f"[SYNC API] 檔案長度: {len(file_data)}, 檔名: {file_name}")
-        # 直接處理,同步執行
         file_dict = {"data": file_data, "orig_name": file_name}
         path = _extract_effective_path(file_dict)
-        print(f"✅ [SYNC API] 檔案解析成功: {path}")
         text, summary = transcribe_core(path)
         result = {
@@ -233,136 +180,44 @@ async def api_transcribe_sync(request: Request):
             "transcription": text,
             "summary": summary
         }
-        print(f"\n{'✅'*30}")
-        print(f"✅✅✅ [SYNC API] 完成! 返回結果")
-        print(json.dumps(result, ensure_ascii=False, indent=2))
-        print(f"{'✅'*30}\n")
         return JSONResponse(content=result)
     except Exception as e:
         import traceback
-        error_trace = traceback.format_exc()
-        print(f"\n{'❌'*30}")
-        print(f"❌ [SYNC API] 錯誤:\n{error_trace}")
-        print(f"{'❌'*30}\n")
         return JSONResponse(
             status_code=500,
             content={"status": "error", "error": str(e)}
         )
 # ====== Gradio 介面 ======
-with gr.Blocks(
-    theme=gr.themes.Soft(),
-    title="LINE Audio Transcription",
-    css="""
-    /* 手機優化樣式 */
-    @media (max-width: 768px) {
-        .gradio-container {
-            padding: 8px !important;
-        }
-        /* 限制檔案上傳區塊高度 */
-        .file-upload {
-            max-height: 180px !important;
-        }
-        /* 限制圖示大小 */
-        .file-upload svg, .file-upload img {
-            max-width: 80px !important;
-            max-height: 80px !important;
-        }
-        /* 調整按鈕 */
-        button {
-            font-size: 16px !important;
-            padding: 14px !important;
-            margin: 8px 0 !important;
-        }
-        /* Tab 標籤 */
-        .tabs button {
-            padding: 10px 14px !important;
-            font-size: 14px !important;
-        }
-        /* 標題更緊湊 */
-        h1 {
-            font-size: 24px !important;
-            margin: 10px 0 !important;
-        }
-        h3 {
-            font-size: 16px !important;
-            margin: 8px 0 !important;
-        }
-        /* 輸入框 */
-        input, textarea {
-            font-size: 16px !important;
-        }
-        /* 減少內邊距 */
-        .block {
-            padding: 8px !important;
-        }
-    }
-    """
-) as demo:
     gr.Markdown("# 🎧 LINE Audio Transcription")
-    with gr.Tab("🌐 Web Upload"):
-        gr.Markdown("### Upload audio from browser")
-        pw_ui = gr.Textbox(
-            label="Password",
-            type="password",
-            placeholder="Enter password",
-            scale=1
-        )
-        file_ui = gr.File(
-            label="Upload Audio File",
-            file_types=["audio"],
-            file_count="single",
-            scale=1,
-            elem_classes=["file-upload"]
-        )
-        btn_ui = gr.Button(
-            "🚀 Start Transcription",
-            variant="primary",
-            size="lg",
-            scale=1
-        )
-        status_ui = gr.Textbox(
-            label="Status",
-            interactive=False,
-            scale=1
-        )
-        transcript_ui = gr.Textbox(
-            label="Transcription",
-            lines=6,
-            placeholder="Transcription will appear here...",
-            show_copy_button=True,
-            scale=1
-        )
-        summary_ui = gr.Textbox(
-            label="Summary",
-            lines=4,
-            placeholder="Summary will appear here...",
-            show_copy_button=True,
-            scale=1
-        )
-        btn_ui.click(
-            transcribe_ui,
-            inputs=[pw_ui, file_ui],
-            outputs=[status_ui, transcript_ui, summary_ui]
-        )
-    with gr.Tab("📱 API"):
         gr.Markdown("""
-        ### iPhone Shortcuts
         **Endpoint:** `POST /api/transcribe`
         **Request:**
         ```json
         {
-          "password": "your_password",
           "file_data": "data:audio/m4a;base64,...",
           "file_name": "recording.m4a"
         }
@@ -379,43 +234,32 @@ with gr.Blocks(
         ---
-        ✅ Fully synchronous
         ✅ No polling needed
         ✅ Works with any audio length
         ---
-        **Setup Steps:**
         1. Get file → Audio
         2. Base64 encode
         3. Text: `data:audio/m4a;base64,[encoded]`
-        4. Dictionary with 3 text fields:
            - `password`: `chou`
            - `file_data`: Step 3
            - `file_name`: `recording.m4a`
-        5. Get URL contents:
-           - URL: `/api/transcribe`
-           - Method: POST
-           - Header: `Content-Type: application/json`
-           - Body: Step 4 (JSON)
-        6. Get `transcription` & `summary`
         """)
-    gr.Markdown("""
-    ---
-    💡 **Formats:** MP4, M4A, MP3, WAV, OGG, WEBM | **Max:** 25MB/chunk | 🔒 Password-protected
-    """)
-# ====== 掛載 Gradio 到 FastAPI ======
 app = gr.mount_gradio_app(fastapi_app, demo, path="/")
-# ====== 啟動 ======
 if __name__ == "__main__":
-    print("\n" + "="*60)
-    print("🚀 啟動 FastAPI + Gradio 應用")
-    print("📱 同步 API: /api/transcribe")
-    print("🌐 網頁介面: /")
-    print("="*60 + "\n")
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 print("===== 🚀 啟動中 =====")
 print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
+# ====== 工具函式 ======
 MIME_EXT = {
     "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
     "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
 }
 def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
+    print(f"  → 處理 data URL, 長度: {len(data_url)}")
     try:
         header, b64 = data_url.split(",", 1)
     except ValueError:
     mime = header.split(";")[0].split(":", 1)[-1].strip()
     ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
     fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
     with open(fname, "wb") as f:
         f.write(base64.b64decode(b64))
+    print(f"  → ✅ 檔案建立: {fname}, {os.path.getsize(fname)} bytes")
     return fname
 def _extract_effective_path(file_obj) -> str:
+    print(f"解析檔案, 類型: {type(file_obj)}")
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
         if s.startswith("data:"):
             return _dataurl_to_file(s, None)
         if os.path.isfile(s):
             return s
     if isinstance(file_obj, dict):
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
             return _dataurl_to_file(data, file_obj.get("orig_name"))
         p = str(file_obj.get("path") or "").strip().strip('"')
         if p and os.path.isfile(p):
             return p
     for attr in ("name", "path"):
         p = getattr(file_obj, attr, None)
         if isinstance(p, str):
             s = p.strip().strip('"')
             if os.path.isfile(s):
                 return s
+    raise FileNotFoundError("Cannot parse file")
 def split_audio(path):
     size = os.path.getsize(path)
+    print(f"檔案大小: {size/1024/1024:.2f} MB")
     if size <= MAX_SIZE:
         return [path]
     audio = AudioSegment.from_file(path)
     n = int(size / MAX_SIZE) + 1
     chunk_ms = len(audio) / n
+    print(f"分割成 {n} 個片段")
     parts = []
     for i in range(n):
         fn = f"chunk_{i+1}.wav"
         parts.append(fn)
     return parts
 def transcribe_core(path, model="whisper-1"):
+    print(f"\n{'='*50}\n開始轉錄: {path}\n{'='*50}")
+    start = time.time()
     if path.lower().endswith(".mp4"):
         fixed = path[:-4] + ".m4a"
             pass
     chunks = split_audio(path)
+    print(f"Whisper 轉錄 ({len(chunks)} 片段)")
     raw = []
     for i, c in enumerate(chunks, 1):
+        print(f"片段 {i}/{len(chunks)}")
         with open(c, "rb") as af:
             txt = client.audio.transcriptions.create(
                 model=model, file=af, response_format="text"
             )
             raw.append(txt)
     raw_txt = "\n".join(raw)
+    print("簡轉繁")
     conv = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
         temperature=0.0
     )
     trad = conv.choices[0].message.content.strip()
+    print("AI 摘要")
     summ = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
         ],
         temperature=0.2
     )
+    print(f"✅ 完成! 耗時: {time.time()-start:.1f}秒\n{'='*50}\n")
+    return trad, summ.choices[0].message.content.strip()
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
+    print(f"\n🌐 網頁版請求")
     if not password or password.strip() != PASSWORD:
         return "❌ Password incorrect", "", ""
     if not file:
+        return "⚠️ No file", "", ""
     try:
         path = _extract_effective_path(file)
         text, summary = transcribe_core(path)
+        return "✅ Completed", text, summary
     except Exception as e:
+        print(f"❌ 錯誤: {e}")
         return f"❌ Error: {e}", "", ""
+# ====== FastAPI 應用 ======
 fastapi_app = FastAPI()
 fastapi_app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
 @fastapi_app.post("/api/transcribe")
+async def api_transcribe(request: Request):
+    """同步 API 端點"""
     try:
         body = await request.json()
+        print(f"\n📱 API 請求")
         password = body.get("password", "")
         if password.strip() != PASSWORD:
             return JSONResponse(
                 status_code=401,
                 content={"status": "error", "error": "Password incorrect"}
         file_name = body.get("file_name", "recording.m4a")
         if not file_data or not file_data.startswith("data:"):
             return JSONResponse(
                 status_code=400,
+                content={"status": "error", "error": "Invalid file data"}
             )
         file_dict = {"data": file_data, "orig_name": file_name}
         path = _extract_effective_path(file_dict)
         text, summary = transcribe_core(path)
         result = {
             "transcription": text,
             "summary": summary
         }
+        print(f"✅ API 完成\n{json.dumps(result, ensure_ascii=False, indent=2)}\n")
         return JSONResponse(content=result)
     except Exception as e:
         import traceback
+        print(f"❌ API 錯誤:\n{traceback.format_exc()}")
         return JSONResponse(
             status_code=500,
             content={"status": "error", "error": str(e)}
         )
 # ====== Gradio 介面 ======
+demo = gr.Blocks(title="LINE Audio Transcription")
+with demo:
     gr.Markdown("# 🎧 LINE Audio Transcription")
+    with gr.Tab("Web Upload"):
+        with gr.Column():
+            pw = gr.Textbox(label="Password", type="password", placeholder="Enter password")
+            audio = gr.Audio(label="Upload Audio", type="filepath", sources=["upload"])
+            btn = gr.Button("🚀 Start Transcription", variant="primary")
+            status = gr.Textbox(label="Status", interactive=False)
+            result = gr.Textbox(label="Transcription", lines=8, show_copy_button=True)
+            summary = gr.Textbox(label="Summary", lines=5, show_copy_button=True)
+        btn.click(transcribe_ui, inputs=[pw, audio], outputs=[status, result, summary])
+    with gr.Tab("API Info"):
         gr.Markdown("""
+        ### iPhone Shortcuts Integration
         **Endpoint:** `POST /api/transcribe`
         **Request:**
         ```json
         {
+          "password": "chou",
           "file_data": "data:audio/m4a;base64,...",
           "file_name": "recording.m4a"
         }
         ---
+        ✅ Synchronous - returns directly
         ✅ No polling needed
         ✅ Works with any audio length
         ---
+        **Setup:**
         1. Get file → Audio
         2. Base64 encode
         3. Text: `data:audio/m4a;base64,[encoded]`
+        4. Dictionary (3 text fields):
            - `password`: `chou`
            - `file_data`: Step 3
            - `file_name`: `recording.m4a`
+        5. Get URL: `/api/transcribe` (POST, JSON)
+        6. Extract `transcription` & `summary`
         """)
+    gr.Markdown("💡 **Formats:** MP4, M4A, MP3, WAV, OGG, WEBM | **Max:** 25MB/chunk")
+# ====== 掛載與啟動 ======
 app = gr.mount_gradio_app(fastapi_app, demo, path="/")
 if __name__ == "__main__":
+    print("\n🚀 啟動應用")
+    print("📱 API: /api/transcribe")
+    print("🌐 Web: /\n")
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)