Spaces:

MichaelChou0806
/

LINE_audio_transcript

Sleeping

App Files Files Community

MichaelChou0806 commited on Oct 8, 2025

Commit

6ddb6b7

verified ·

1 Parent(s): 6c57120

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -328

app.py CHANGED Viewed

@@ -37,7 +37,9 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
 def _extract_effective_path(file_obj) -> str:
     """從各種格式中提取有效檔案路徑"""
-    # 字串模式
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
         if s.startswith("data:"):
@@ -45,8 +47,9 @@ def _extract_effective_path(file_obj) -> str:
         if os.path.isfile(s):
             return s
-    # 字典模式
     if isinstance(file_obj, dict):
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
             return _dataurl_to_file(data, file_obj.get("orig_name"))
@@ -54,15 +57,23 @@ def _extract_effective_path(file_obj) -> str:
         if p and os.path.isfile(p):
             return p
-    # 物件模式
-    for attr in ("name", "path"):
-        p = getattr(file_obj, attr, None)
-        if isinstance(p, str):
-            s = p.strip().strip('"')
-            if os.path.isfile(s):
-                return s
-    raise FileNotFoundError("Cannot parse uploaded file")
 def split_audio(path):
     """將音訊檔案分割成多個小於 25MB 的片段"""
@@ -139,36 +150,48 @@ def transcribe_core(path, model="whisper-1"):
 # ====== Gradio UI 函式 ======
 def transcribe_web(password, audio_file):
-    """網頁版轉錄處理"""
-    print(f"\n🌐 [WEB] 收到網頁請求")
     # 驗證密碼
-    if not password or password.strip() != PASSWORD:
-        return "❌ Incorrect password. Please try again.", "", ""
     # 檢查檔案
     if not audio_file:
-        return "⚠️ Please upload an audio file first.", "", ""
     try:
         # 處理檔案
         path = _extract_effective_path(audio_file)
-        print(f"[WEB] 檔案路徑: {path}")
         # 轉錄
         text, summary = transcribe_core(path)
         # 統計資訊
         char_count = len(text)
-        status = f"✅ Transcription completed successfully!\n📝 Total characters: {char_count}"
-        print(f"[WEB] ✅ 成功完成")
         return status, text, summary
     except Exception as e:
         import traceback
         error_msg = traceback.format_exc()
-        print(f"❌ [WEB] 錯誤:\n{error_msg}")
         return f"❌ Error: {str(e)}", "", ""
 # ====== FastAPI 應用 ======
@@ -187,11 +210,14 @@ async def api_transcribe(request: Request):
     """API 端點 - 用於手機等外部調用"""
     try:
         body = await request.json()
-        print(f"\n📱 [API] 收到 API 請求")
         # 驗證密碼
         password = body.get("password", "")
         if password.strip() != PASSWORD:
             return JSONResponse(
                 status_code=401,
                 content={"status": "error", "error": "Password incorrect"}
@@ -202,6 +228,7 @@ async def api_transcribe(request: Request):
         file_name = body.get("file_name", "recording.m4a")
         if not file_data or not file_data.startswith("data:"):
             return JSONResponse(
                 status_code=400,
                 content={"status": "error", "error": "Invalid file data format"}
@@ -210,7 +237,7 @@ async def api_transcribe(request: Request):
         # 處理檔案
         file_dict = {"data": file_data, "orig_name": file_name}
         path = _extract_effective_path(file_dict)
-        print(f"[API] 檔案解析成功: {path}")
         # 轉錄
         text, summary = transcribe_core(path)
@@ -221,355 +248,109 @@ async def api_transcribe(request: Request):
             "summary": summary
         }
-        print(f"[API] ✅ 成功完成\n")
         return JSONResponse(content=result)
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
-        print(f"❌ [API] 錯誤:\n{error_trace}\n")
         return JSONResponse(
             status_code=500,
             content={"status": "error", "error": str(e)}
         )
-# ====== 自定義樣式 ======
-custom_css = """
-/* 全局設定 */
-* {
-    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-}
-.gradio-container {
-    max-width: 1400px !important;
-    margin: 0 auto !important;
-}
-/* 主容器 */
-.main-container {
-    padding: 2rem;
-}
-/* 標題區 */
-.hero-section {
-    text-align: center;
-    padding: 3rem 2rem;
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    border-radius: 16px;
-    margin-bottom: 3rem;
-    box-shadow: 0 8px 32px rgba(102, 126, 234, 0.3);
-}
-.hero-section h1 {
-    color: white;
-    font-size: 2.5rem;
-    font-weight: 700;
-    margin: 0 0 0.5rem 0;
-    letter-spacing: -0.02em;
-}
-.hero-section p {
-    color: rgba(255, 255, 255, 0.9);
-    font-size: 1.15rem;
-    margin: 0;
-}
-/* 卡片樣式 */
-.card {
-    background: white;
-    border-radius: 12px;
-    padding: 2rem;
-    box-shadow: 0 4px 16px rgba(0, 0, 0, 0.08);
-    margin-bottom: 1.5rem;
-}
-.card h2 {
-    font-size: 1.5rem;
-    font-weight: 600;
-    margin: 0 0 1.5rem 0;
-    color: #1f2937;
-}
-/* 輸入框樣式 */
-.input-group {
-    margin-bottom: 1.5rem;
-}
-.input-group label {
-    display: block;
-    font-weight: 600;
-    color: #374151;
-    margin-bottom: 0.5rem;
-    font-size: 0.95rem;
-}
-input[type="password"],
-textarea {
-    width: 100%;
-    padding: 0.75rem;
-    border: 2px solid #e5e7eb;
-    border-radius: 8px;
-    font-size: 0.95rem;
-    transition: all 0.2s;
-}
-input[type="password"]:focus,
-textarea:focus {
-    outline: none;
-    border-color: #667eea;
-    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
-}
-/* 按鈕樣式 */
-button.primary-btn {
-    width: 100%;
-    padding: 1rem 2rem !important;
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-    border: none !important;
-    color: white !important;
-    font-size: 1.1rem !important;
-    font-weight: 600 !important;
-    border-radius: 10px !important;
-    cursor: pointer !important;
-    transition: all 0.3s !important;
-    box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3) !important;
-}
-button.primary-btn:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important;
-}
-/* 檔案上傳區 */
-.file-upload-area {
-    border: 2px dashed #d1d5db;
-    border-radius: 12px;
-    padding: 2.5rem;
-    text-align: center;
-    background: #f9fafb;
-    transition: all 0.3s;
-    cursor: pointer;
-}
-.file-upload-area:hover {
-    border-color: #667eea;
-    background: #f0f4ff;
-}
-/* 狀態框 */
-.status-box {
-    padding: 1rem;
-    border-radius: 8px;
-    margin-bottom: 1rem;
-    font-size: 0.95rem;
-    line-height: 1.5;
-}
-.status-success {
-    background: #d1fae5;
-    border-left: 4px solid #10b981;
-    color: #065f46;
-}
-.status-error {
-    background: #fee2e2;
-    border-left: 4px solid #ef4444;
-    color: #991b1b;
-}
-.status-warning {
-    background: #fef3c7;
-    border-left: 4px solid #f59e0b;
-    color: #92400e;
-}
-/* 結果文字框 */
-textarea.result-text {
-    min-height: 200px !important;
-    font-family: "SF Mono", Monaco, monospace !important;
-    font-size: 0.9rem !important;
-    line-height: 1.6 !important;
-    background: #f9fafb !important;
-}
-/* 資訊提示 */
-.info-banner {
-    background: #eff6ff;
-    border: 1px solid #bfdbfe;
-    border-radius: 8px;
-    padding: 1rem;
-    margin: 1rem 0;
-    font-size: 0.9rem;
-    color: #1e40af;
-}
-/* 分隔線 */
-.divider {
-    height: 1px;
-    background: #e5e7eb;
-    margin: 2rem 0;
-}
-/* API 文檔區 */
-.api-section {
-    background: #f9fafb;
-    border-radius: 12px;
-    padding: 2rem;
-    margin-top: 2rem;
-}
-.api-section h3 {
-    font-size: 1.25rem;
-    font-weight: 600;
-    color: #1f2937;
-    margin: 0 0 1rem 0;
-}
-.api-endpoint {
-    background: #1f2937;
-    color: #f3f4f6;
-    padding: 1rem;
-    border-radius: 8px;
-    font-family: monospace;
-    font-size: 0.9rem;
-    margin: 1rem 0;
-}
-/* 響應式設計 */
-@media (max-width: 768px) {
-    .hero-section h1 {
-        font-size: 2rem;
-    }
-    .card {
-        padding: 1.5rem;
-    }
-}
-"""
 # ====== Gradio 介面 ======
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription Service") as demo:
-    # 標題
-    gr.HTML("""
-        <div class="hero-section">
-            <h1>🎧 Audio Transcription Service</h1>
-            <p>AI-Powered Speech Recognition & Summarization</p>
-        </div>
     """)
-    # 主要上傳區域
-    gr.HTML('<div class="card">')
-    gr.Markdown("## 🎵 Upload & Transcribe")
     with gr.Row():
         with gr.Column(scale=1):
             password_input = gr.Textbox(
-                label="🔐 Password",
                 type="password",
-                placeholder="Enter password",
-                elem_classes="input-group"
             )
-            audio_input = gr.File(
-                label="📁 Audio File",
-                file_types=["audio", ".mp4"],
-                file_count="single",
-                elem_classes="file-upload-area"
             )
-            gr.HTML("""
-                <div class="info-banner">
-                    <strong>💡 Supported formats:</strong> MP3, M4A, WAV, OGG, WEBM, MP4<br>
-                    <strong>📦 File size:</strong> Automatic chunking for large files
-                </div>
-            """)
-            submit_button = gr.Button(
                 "🚀 Start Transcription",
                 variant="primary",
-                elem_classes="primary-btn"
             )
         with gr.Column(scale=2):
             status_output = gr.Textbox(
-                label="📊 Status",
                 interactive=False,
-                lines=2,
-                elem_classes="status-box"
             )
             transcription_output = gr.Textbox(
-                label="📝 Transcription Result",
-                lines=15,
-                placeholder="Transcription will appear here...",
-                show_copy_button=True,
-                elem_classes="result-text"
             )
             summary_output = gr.Textbox(
-                label="💡 AI Summary",
                 lines=6,
-                placeholder="AI-generated summary will appear here...",
-                show_copy_button=True,
-                elem_classes="result-text"
             )
-    gr.HTML('</div>')
-    # API 文檔
-    gr.HTML('<div class="api-section">')
-    gr.Markdown("## 📱 API Integration")
     gr.Markdown("""
-### For Mobile Apps & External Services
-**Endpoint:** `POST /api/transcribe`
-**Request Body (JSON):**
-```json
-{
-  "password": "your_password",
-  "file_data": "data:audio/m4a;base64,...",
-  "file_name": "recording.m4a"
-}
-```
-**Response:**
-```json
-{
-  "status": "success",
-  "transcription": "Full text...",
-  "summary": "Summary..."
-}
-```
-**Features:**
-- ✅ Fully synchronous - returns complete results
-- ✅ Automatic file chunking for large files
-- ✅ Traditional Chinese output
-- ✅ AI-powered summarization
-**Use Cases:**
-- iPhone Shortcuts automation
-- Mobile app integration
-- Webhook processing
-- Batch transcription systems
-    """)
-    gr.HTML('</div>')
-    # 頁腳
-    gr.HTML("""
-        <div style="text-align: center; margin-top: 3rem; padding: 1.5rem; color: #6b7280; font-size: 0.9rem;">
-            <p><strong>Audio Transcription Service</strong> v2.0</p>
-            <p>Powered by OpenAI Whisper & GPT-4</p>
-        </div>
     """)
-    # 綁定事件
-    submit_button.click(
         fn=transcribe_web,
         inputs=[password_input, audio_input],
-        outputs=[status_output, transcription_output, summary_output]
     )
 # ====== 掛載到 FastAPI ======
@@ -578,9 +359,9 @@ app = gr.mount_gradio_app(fastapi_app, demo, path="/")
 # ====== 啟動 ======
 if __name__ == "__main__":
     print("\n" + "="*60)
-    print("🚀 啟動服務")
-    print("🌐 網頁介面: http://0.0.0.0:7860")
-    print("📱 API 端點: http://0.0.0.0:7860/api/transcribe")
     print("="*60 + "\n")
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 def _extract_effective_path(file_obj) -> str:
     """從各種格式中提取有效檔案路徑"""
+    print(f"[DEBUG] 檔案物件類型: {type(file_obj)}")
+    # 如果是字串路徑
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
         if s.startswith("data:"):
         if os.path.isfile(s):
             return s
+    # 如果是字典
     if isinstance(file_obj, dict):
+        print(f"[DEBUG] 字典 keys: {list(file_obj.keys())}")
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
             return _dataurl_to_file(data, file_obj.get("orig_name"))
         if p and os.path.isfile(p):
             return p
+    # 如果是物件，嘗試獲取 path 或 name 屬性
+    if hasattr(file_obj, 'name') and file_obj.name:
+        if os.path.isfile(file_obj.name):
+            return file_obj.name
+    if hasattr(file_obj, 'path') and file_obj.path:
+        if os.path.isfile(file_obj.path):
+            return file_obj.path
+    # 最後嘗試：直接當作路徑字串
+    try:
+        if os.path.isfile(str(file_obj)):
+            return str(file_obj)
+    except:
+        pass
+    raise FileNotFoundError(f"Cannot parse uploaded file: {file_obj}")
 def split_audio(path):
     """將音訊檔案分割成多個小於 25MB 的片段"""
 # ====== Gradio UI 函式 ======
 def transcribe_web(password, audio_file):
+    """網頁版轉錄處理 - 必須返回三個值"""
+    print(f"\n{'='*60}")
+    print(f"🌐 [WEB] 收到網頁請求")
+    print(f"密碼: {'已提供' if password else '未提供'}")
+    print(f"檔案: {audio_file}")
+    print(f"{'='*60}")
     # 驗證密碼
+    if not password:
+        print("[WEB] ❌ 密碼為空")
+        return "❌ Please enter password", "", ""
+    if password.strip() != PASSWORD:
+        print(f"[WEB] ❌ 密碼錯誤: '{password}' != '{PASSWORD}'")
+        return "❌ Incorrect password", "", ""
     # 檢查檔案
     if not audio_file:
+        print("[WEB] ❌ 未上傳檔案")
+        return "⚠️ Please upload an audio file", "", ""
     try:
         # 處理檔案
+        print(f"[WEB] 開始處理檔案...")
         path = _extract_effective_path(audio_file)
+        print(f"[WEB] ✅ 檔案路徑: {path}")
         # 轉錄
+        print(f"[WEB] 開始轉錄...")
         text, summary = transcribe_core(path)
         # 統計資訊
         char_count = len(text)
+        status = f"✅ Completed! ({char_count} characters)"
+        print(f"[WEB] ✅ 轉錄成功\n")
         return status, text, summary
     except Exception as e:
         import traceback
         error_msg = traceback.format_exc()
+        print(f"❌ [WEB] 發生錯誤:\n{error_msg}\n")
         return f"❌ Error: {str(e)}", "", ""
 # ====== FastAPI 應用 ======
     """API 端點 - 用於手機等外部調用"""
     try:
         body = await request.json()
+        print(f"\n{'='*60}")
+        print(f"📱 [API] 收到 API 請求")
+        print(f"{'='*60}")
         # 驗證密碼
         password = body.get("password", "")
         if password.strip() != PASSWORD:
+            print(f"[API] ❌ 密碼錯誤")
             return JSONResponse(
                 status_code=401,
                 content={"status": "error", "error": "Password incorrect"}
         file_name = body.get("file_name", "recording.m4a")
         if not file_data or not file_data.startswith("data:"):
+            print(f"[API] ❌ 檔案格式錯誤")
             return JSONResponse(
                 status_code=400,
                 content={"status": "error", "error": "Invalid file data format"}
         # 處理檔案
         file_dict = {"data": file_data, "orig_name": file_name}
         path = _extract_effective_path(file_dict)
+        print(f"[API] ✅ 檔案解析成功: {path}")
         # 轉錄
         text, summary = transcribe_core(path)
             "summary": summary
         }
+        print(f"[API] ✅ 轉錄成功\n")
         return JSONResponse(content=result)
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
+        print(f"❌ [API] 發生錯誤:\n{error_trace}\n")
         return JSONResponse(
             status_code=500,
             content={"status": "error", "error": str(e)}
         )
 # ====== Gradio 介面 ======
+with gr.Blocks(title="Audio Transcription", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🎧 Audio Transcription Service
+    ### AI-Powered Speech-to-Text with Summarization
     """)
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### 📤 Upload")
             password_input = gr.Textbox(
+                label="Password",
                 type="password",
+                placeholder="Enter password"
             )
+            audio_input = gr.Audio(
+                label="Audio File",
+                type="filepath",
+                sources=["upload"]
             )
+            submit_btn = gr.Button(
                 "🚀 Start Transcription",
                 variant="primary",
+                size="lg"
             )
+            gr.Markdown("""
+            **Supported formats:**
+            MP3, M4A, WAV, OGG, WEBM, MP4
+            **Processing:**
+            Automatic chunking for large files
+            """)
         with gr.Column(scale=2):
+            gr.Markdown("### 📊 Results")
             status_output = gr.Textbox(
+                label="Status",
                 interactive=False,
+                lines=1
             )
             transcription_output = gr.Textbox(
+                label="Transcription",
+                lines=12,
+                show_copy_button=True
             )
             summary_output = gr.Textbox(
+                label="Summary",
                 lines=6,
+                show_copy_button=True
             )
+    gr.Markdown("---")
     gr.Markdown("""
+    ## 📱 API Integration
+    **Endpoint:** `POST /api/transcribe`
+    **Request:**
+    ```json
+    {
+      "password": "your_password",
+      "file_data": "data:audio/m4a;base64,...",
+      "file_name": "recording.m4a"
+    }
+    ```
+    **Response:**
+    ```json
+    {
+      "status": "success",
+      "transcription": "...",
+      "summary": "..."
+    }
+    ```
     """)
+    # 事件綁定 - 這是關鍵！
+    submit_btn.click(
         fn=transcribe_web,
         inputs=[password_input, audio_input],
+        outputs=[status_output, transcription_output, summary_output],
+        api_name="transcribe"
     )
 # ====== 掛載到 FastAPI ======
 # ====== 啟動 ======
 if __name__ == "__main__":
     print("\n" + "="*60)
+    print("🚀 服務啟動")
+    print("🌐 網頁: http://0.0.0.0:7860")
+    print("📱 API: http://0.0.0.0:7860/api/transcribe")
     print("="*60 + "\n")
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)