Spaces:

MichaelChou0806
/

LINE_audio_transcript

Sleeping

App Files Files Community

MichaelChou0806 commited on Oct 8, 2025

Commit

ec64510

verified ·

1 Parent(s): ebd798f

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -36

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, shutil, base64, uuid, mimetypes
 from pydub import AudioSegment
 from openai import OpenAI
 import gradio as gr
@@ -20,77 +20,131 @@ MIME_EXT = {
 }
 def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
     try:
         header, b64 = data_url.split(",", 1)
     except ValueError:
         raise ValueError("data URL format error")
     mime = header.split(";")[0].split(":", 1)[-1].strip()
     ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
     fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
     with open(fname, "wb") as f:
         f.write(base64.b64decode(b64))
     return fname
 def _extract_effective_path(file_obj) -> str:
     """從各種格式中提取有效檔案路徑"""
     # 字串模式
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
         if s.startswith("data:"):
             return _dataurl_to_file(s, None)
         if os.path.isfile(s):
             return s
     # 字典模式
     if isinstance(file_obj, dict):
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
             return _dataurl_to_file(data, file_obj.get("orig_name"))
         p = str(file_obj.get("path") or "").strip().strip('"')
         if p and os.path.isfile(p):
             return p
     # 物件模式
     for attr in ("name", "path"):
         p = getattr(file_obj, attr, None)
         if isinstance(p, str):
             s = p.strip().strip('"')
             if os.path.isfile(s):
                 return s
     raise FileNotFoundError("Cannot parse uploaded file")
 # ====== 分段處理 ======
 def split_audio(path):
     size = os.path.getsize(path)
     if size <= MAX_SIZE:
         return [path]
     audio = AudioSegment.from_file(path)
     n = int(size / MAX_SIZE) + 1
     chunk_ms = len(audio) / n
     parts = []
     for i in range(n):
         fn = f"chunk_{i+1}.wav"
         audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
         parts.append(fn)
     return parts
 # ====== 轉錄核心 ======
 def transcribe_core(path, model="whisper-1"):
     if path.lower().endswith(".mp4"):
         fixed = path[:-4] + ".m4a"
         try:
             shutil.copy(path, fixed)
             path = fixed
-        except:
-            pass
     chunks = split_audio(path)
     raw = []
-    for c in chunks:
         with open(c, "rb") as af:
             txt = client.audio.transcriptions.create(
                 model=model, file=af, response_format="text"
             )
             raw.append(txt)
     raw_txt = "\n".join(raw)
     conv = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -100,7 +154,13 @@ def transcribe_core(path, model="whisper-1"):
         temperature=0.0
     )
     trad = conv.choices[0].message.content.strip()
     summ = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -109,74 +169,124 @@ def transcribe_core(path, model="whisper-1"):
         ],
         temperature=0.2
     )
-    return trad, summ.choices[0].message.content.strip()
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
     """網頁版轉錄函式"""
-    print(f"\n🎯 Web UI Request | Password: {password[:2] if password else ''}***")
     if not password or password.strip() != PASSWORD:
         return "❌ Password incorrect", "", ""
     if not file:
         return "⚠️ No file uploaded", "", ""
     try:
         path = _extract_effective_path(file)
         text, summary = transcribe_core(path)
         return "✅ Transcription completed", text, summary
     except Exception as e:
-        print(f"❌ Error: {e}")
         return f"❌ Error: {e}", "", ""
-# ====== API 函式 (通過 Gradio 端點呼叫) ======
 def transcribe_api(password, file_data, file_name):
     """
     API 版本的轉錄函式
-    參數:
-    - password: 密碼字串
-    - file_data: data:audio/...;base64,... 格式的字串
-    - file_name: 原始檔名
     """
-    print(f"\n🎯 API Request | Password: {password[:2] if password else ''}***")
-    print(f"📁 File data length: {len(file_data) if file_data else 0}")
-    print(f"📁 File name: {file_name}")
     if not password or password.strip() != PASSWORD:
-        return {
             "status": "error",
             "error": "Password incorrect",
             "transcription": "",
             "summary": ""
         }
     if not file_data or not file_data.startswith("data:"):
-        return {
             "status": "error",
             "error": "Invalid file data format. Must be data:audio/...;base64,...",
             "transcription": "",
             "summary": ""
         }
     try:
         file_dict = {
             "data": file_data,
             "orig_name": file_name or "recording.m4a"
         }
         path = _extract_effective_path(file_dict)
         text, summary = transcribe_core(path)
-        return {
             "status": "success",
             "transcription": text,
             "summary": summary
         }
     except Exception as e:
         import traceback
-        print(f"❌ Error:\n{traceback.format_exc()}")
-        return {
             "status": "error",
             "error": str(e),
             "transcription": "",
             "summary": ""
         }
 # ====== Gradio 介面 ======
 with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
@@ -224,7 +334,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
         gr.Markdown("""
         ### For iPhone Shortcuts & Automation
-        This tab provides a Gradio-based API endpoint that accepts Base64-encoded audio.
         """)
         with gr.Row():
@@ -263,17 +373,14 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
             inputs=[pw_api, file_data_api, file_name_api],
             outputs=[result_api],
             api_name="transcribe",
-            queue=False
         )
         gr.Markdown("""
         ---
-        ### 📖 How to use with iPhone Shortcuts
-        **Gradio API Endpoint**:
-        ```
-        POST /gradio_api/call/transcribe
-        ```
         **Request Format (JSON)**:
         ```json
@@ -289,16 +396,13 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
         **Response Format**:
         ```json
         {
-          "status": "success",
-          "transcription": "轉錄內容...",
-          "summary": "摘要內容..."
         }
         ```
-        💡 **Important**:
-        - The endpoint is `/gradio_api/call/transcribe` (note: `call/transcribe`)
-        - The `data` array must have exactly 3 items: [password, file_data, file_name]
-        - Use `queue=false` parameter or set `api_name="transcribe"` in your request
         """)
     gr.Markdown("""
@@ -310,8 +414,11 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
 # ====== 啟動 ======
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        show_api=True  # 顯示 API 文件
     )

+import os, shutil, base64, uuid, mimetypes, json, time
 from pydub import AudioSegment
 from openai import OpenAI
 import gradio as gr
 }
 def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
+    print(f"  → [_dataurl_to_file] 開始處理 data URL...")
     try:
         header, b64 = data_url.split(",", 1)
     except ValueError:
+        print(f"  → [_dataurl_to_file] ❌ 錯誤: data URL 格式錯誤")
         raise ValueError("data URL format error")
     mime = header.split(";")[0].split(":", 1)[-1].strip()
     ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
     fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
+    print(f"  → [_dataurl_to_file] MIME: {mime}, 副檔名: {ext}")
+    print(f"  → [_dataurl_to_file] 目標檔名: {fname}")
+    print(f"  → [_dataurl_to_file] Base64 長度: {len(b64)}")
     with open(fname, "wb") as f:
         f.write(base64.b64decode(b64))
+    file_size = os.path.getsize(fname)
+    print(f"  → [_dataurl_to_file] ✅ 檔案已建立, 大小: {file_size} bytes")
     return fname
 def _extract_effective_path(file_obj) -> str:
     """從各種格式中提取有效檔案路徑"""
+    print(f"\n[_extract_effective_path] 開始解析檔案...")
+    print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
+    print(f"[_extract_effective_path] 收到內容前100字: {str(file_obj)[:100]}...")
     # 字串模式
     if isinstance(file_obj, str):
         s = file_obj.strip().strip('"')
+        print(f"  → [模式 A] 字串模式")
         if s.startswith("data:"):
+            print(f"  → [模式 A] 偵測到 data URL, 長度: {len(s)}")
             return _dataurl_to_file(s, None)
         if os.path.isfile(s):
+            print(f"  → [模式 A] 找到檔案路徑: {s}")
             return s
     # 字典模式
     if isinstance(file_obj, dict):
+        print(f"  → [模式 B] 字典模式")
+        print(f"  → [模式 B] Keys: {list(file_obj.keys())}")
         data = file_obj.get("data")
         if isinstance(data, str) and data.startswith("data:"):
+            print(f"  → [模式 B] 找到 data URL! 長度: {len(data)}")
             return _dataurl_to_file(data, file_obj.get("orig_name"))
         p = str(file_obj.get("path") or "").strip().strip('"')
         if p and os.path.isfile(p):
+            print(f"  → [模式 B] 找到 path: {p}")
             return p
     # 物件模式
+    print(f"  → [模式 C] 物件模式")
     for attr in ("name", "path"):
         p = getattr(file_obj, attr, None)
         if isinstance(p, str):
             s = p.strip().strip('"')
             if os.path.isfile(s):
+                print(f"  → [模式 C] 找到屬性 {attr}: {s}")
                 return s
+    print(f"[_extract_effective_path] ❌ 無法解析檔案")
     raise FileNotFoundError("Cannot parse uploaded file")
 # ====== 分段處理 ======
 def split_audio(path):
+    print(f"\n[split_audio] 檢查檔案大小...")
     size = os.path.getsize(path)
+    print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
     if size <= MAX_SIZE:
+        print(f"[split_audio] 檔案小於 25MB, 不需分割")
         return [path]
+    print(f"[split_audio] 檔案大於 25MB, 開始分割...")
     audio = AudioSegment.from_file(path)
     n = int(size / MAX_SIZE) + 1
     chunk_ms = len(audio) / n
+    print(f"[split_audio] 將分割成 {n} 個片段, 每段約 {chunk_ms/1000:.1f} 秒")
     parts = []
     for i in range(n):
         fn = f"chunk_{i+1}.wav"
         audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
+        print(f"[split_audio] 已產生片段 {i+1}/{n}: {fn}")
         parts.append(fn)
     return parts
 # ====== 轉錄核心 ======
 def transcribe_core(path, model="whisper-1"):
+    print(f"\n{'='*60}")
+    print(f"[transcribe_core] 開始轉錄流程")
+    print(f"[transcribe_core] 檔案路徑: {path}")
+    print(f"{'='*60}")
+    start_time = time.time()
     if path.lower().endswith(".mp4"):
+        print(f"[transcribe_core] 偵測到 .mp4 檔案, 轉換為 .m4a")
         fixed = path[:-4] + ".m4a"
         try:
             shutil.copy(path, fixed)
             path = fixed
+            print(f"[transcribe_core] ✅ 已轉換: {path}")
+        except Exception as e:
+            print(f"[transcribe_core] ⚠️ 轉換失敗: {e}")
+    print(f"\n[transcribe_core] === 步驟 1: 分割音檔 ===")
     chunks = split_audio(path)
+    print(f"[transcribe_core] 共 {len(chunks)} 個片段")
+    print(f"\n[transcribe_core] === 步驟 2: Whisper 轉錄 ===")
     raw = []
+    for i, c in enumerate(chunks, 1):
+        print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}: {c}")
+        chunk_start = time.time()
         with open(c, "rb") as af:
             txt = client.audio.transcriptions.create(
                 model=model, file=af, response_format="text"
             )
             raw.append(txt)
+        chunk_time = time.time() - chunk_start
+        print(f"[transcribe_core] ✅ 片段 {i} 完成 (耗時 {chunk_time:.1f}秒)")
+        print(f"[transcribe_core] 片段 {i} 內容: {txt[:100]}...")
     raw_txt = "\n".join(raw)
+    print(f"\n[transcribe_core] 原始轉錄總長度: {len(raw_txt)} 字元")
+    print(f"[transcribe_core] 原始內容前200字: {raw_txt[:200]}...")
+    print(f"\n[transcribe_core] === 步驟 3: 簡轉繁 ===")
+    conv_start = time.time()
     conv = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
         temperature=0.0
     )
     trad = conv.choices[0].message.content.strip()
+    conv_time = time.time() - conv_start
+    print(f"[transcribe_core] ✅ 繁體轉換完成 (耗時 {conv_time:.1f}秒)")
+    print(f"[transcribe_core] 繁體內容長度: {len(trad)} 字元")
+    print(f"[transcribe_core] 繁體內容前200字: {trad[:200]}...")
+    print(f"\n[transcribe_core] === 步驟 4: AI 摘要 ===")
+    summ_start = time.time()
     summ = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
         ],
         temperature=0.2
     )
+    summary = summ.choices[0].message.content.strip()
+    summ_time = time.time() - summ_start
+    print(f"[transcribe_core] ✅ 摘要完成 (耗時 {summ_time:.1f}秒)")
+    print(f"[transcribe_core] 摘要內容: {summary}")
+    total_time = time.time() - start_time
+    print(f"\n{'='*60}")
+    print(f"[transcribe_core] ✅✅✅ 轉錄流程全部完成!")
+    print(f"[transcribe_core] 總耗時: {total_time:.1f} 秒")
+    print(f"{'='*60}\n")
+    return trad, summary
 # ====== Gradio UI 函式 ======
 def transcribe_ui(password, file):
     """網頁版轉錄函式"""
+    print(f"\n{'🌐'*30}")
+    print(f"🎯 [UI] 收到網頁版請求")
+    print(f"🔑 [UI] 密碼: {password[:2] if password else ''}*** (長度: {len(password) if password else 0})")
+    print(f"📁 [UI] 檔案類型: {type(file)}")
+    print(f"{'🌐'*30}")
     if not password or password.strip() != PASSWORD:
+        print(f"❌ [UI] 密碼驗證失敗")
         return "❌ Password incorrect", "", ""
     if not file:
+        print(f"❌ [UI] 未收到檔案")
         return "⚠️ No file uploaded", "", ""
     try:
         path = _extract_effective_path(file)
+        print(f"✅ [UI] 檔案解析成功: {path}")
         text, summary = transcribe_core(path)
+        print(f"✅ [UI] 轉錄完成, 準備返回結果")
         return "✅ Transcription completed", text, summary
     except Exception as e:
+        import traceback
+        error_trace = traceback.format_exc()
+        print(f"❌ [UI] 發生錯誤:\n{error_trace}")
         return f"❌ Error: {e}", "", ""
+# ====== API 函式 ======
 def transcribe_api(password, file_data, file_name):
     """
     API 版本的轉錄函式
     """
+    print(f"\n{'📱'*30}")
+    print(f"🎯 [API] 收到 API 請求")
+    print(f"🔑 [API] 密碼: {password[:2] if password else ''}*** (長度: {len(password) if password else 0})")
+    print(f"📁 [API] file_data 類型: {type(file_data)}")
+    print(f"📁 [API] file_data 長度: {len(file_data) if file_data else 0}")
+    print(f"📁 [API] file_data 前50字: {str(file_data)[:50] if file_data else 'None'}...")
+    print(f"📁 [API] file_name: {file_name}")
+    print(f"{'📱'*30}")
     if not password or password.strip() != PASSWORD:
+        result = {
             "status": "error",
             "error": "Password incorrect",
             "transcription": "",
             "summary": ""
         }
+        print(f"❌ [API] 密碼驗證失敗")
+        print(f"[API] 返回結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
+        return result
     if not file_data or not file_data.startswith("data:"):
+        result = {
             "status": "error",
             "error": "Invalid file data format. Must be data:audio/...;base64,...",
             "transcription": "",
             "summary": ""
         }
+        print(f"❌ [API] 檔案格式錯誤")
+        print(f"[API] 返回結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
+        return result
     try:
         file_dict = {
             "data": file_data,
             "orig_name": file_name or "recording.m4a"
         }
+        print(f"[API] 開始解析檔案...")
         path = _extract_effective_path(file_dict)
+        print(f"✅ [API] 檔案解析成功: {path}")
+        print(f"[API] 開始轉錄流程...")
         text, summary = transcribe_core(path)
+        result = {
             "status": "success",
             "transcription": text,
             "summary": summary
         }
+        print(f"\n{'✅'*30}")
+        print(f"✅✅✅ [API] 全部完成!")
+        print(f"[API] 轉錄長度: {len(text)} 字元")
+        print(f"[API] 摘要長度: {len(summary)} 字元")
+        print(f"[API] 返回結果:")
+        print(json.dumps(result, ensure_ascii=False, indent=2))
+        print(f"{'✅'*30}\n")
+        return result
     except Exception as e:
         import traceback
+        error_trace = traceback.format_exc()
+        print(f"\n{'❌'*30}")
+        print(f"❌ [API] 發生錯誤:")
+        print(error_trace)
+        print(f"{'❌'*30}\n")
+        result = {
             "status": "error",
             "error": str(e),
             "transcription": "",
             "summary": ""
         }
+        print(f"[API] 返回錯誤結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
+        return result
 # ====== Gradio 介面 ======
 with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
         gr.Markdown("""
         ### For iPhone Shortcuts & Automation
+        Test the API endpoint here before using in iPhone Shortcuts.
         """)
         with gr.Row():
             inputs=[pw_api, file_data_api, file_name_api],
             outputs=[result_api],
             api_name="transcribe",
+            queue=False  # 🔴 關鍵: 禁用 queue
         )
         gr.Markdown("""
         ---
+        ### 📖 iPhone Shortcuts Configuration
+        **Endpoint**: `/gradio_api/call/transcribe`
         **Request Format (JSON)**:
         ```json
         **Response Format**:
         ```json
         {
+          "data": {
+            "status": "success",
+            "transcription": "轉錄內容...",
+            "summary": "摘要..."
+          }
         }
         ```
         """)
     gr.Markdown("""
 # ====== 啟動 ======
 if __name__ == "__main__":
+    print("\n" + "="*60)
+    print("準備啟動 Gradio 應用...")
+    print("="*60 + "\n")
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
+        show_api=True
     )