| import os, shutil, base64, uuid, mimetypes, json, time |
| from pydub import AudioSegment |
| from openai import OpenAI |
| import gradio as gr |
| from fastapi import FastAPI, Request |
| from fastapi.responses import JSONResponse |
| from fastapi.middleware.cors import CORSMiddleware |
|
|
| |
| PASSWORD = os.getenv("APP_PASSWORD", "chou") |
| MAX_SIZE = 25 * 1024 * 1024 |
| client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
| print("===== 🚀 啟動中 =====") |
| print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}") |
|
|
| |
| MIME_EXT = { |
| "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac", |
| "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav", |
| "audio/ogg": "ogg", "audio/webm": "webm", "audio/opus": "opus", |
| "video/mp4": "mp4", |
| } |
|
|
| def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str: |
| """將 data URL 轉換為本地檔案""" |
| print(f" → [_dataurl_to_file] 開始處理 data URL...") |
| try: |
| header, b64 = data_url.split(",", 1) |
| except ValueError: |
| raise ValueError("data URL format error") |
| mime = header.split(";")[0].split(":", 1)[-1].strip() |
| ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".") |
| fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}" |
| print(f" → [_dataurl_to_file] 檔名: {fname}, Base64長度: {len(b64)}") |
| with open(fname, "wb") as f: |
| f.write(base64.b64decode(b64)) |
| file_size = os.path.getsize(fname) |
| print(f" → [_dataurl_to_file] ✅ 檔案已建立, 大小: {file_size} bytes") |
| return fname |
|
|
| def _extract_effective_path(file_obj) -> str: |
| """從各種格式中提取有效檔案路徑""" |
| print(f"[_extract_effective_path] 收到類型: {type(file_obj)}") |
| |
| |
| if isinstance(file_obj, str): |
| s = file_obj.strip().strip('"') |
| if s.startswith("data:"): |
| print(f" → 偵測到 data URL") |
| return _dataurl_to_file(s, None) |
| if os.path.isfile(s): |
| print(f" → 找到檔案路徑: {s}") |
| return s |
| |
| |
| if isinstance(file_obj, dict): |
| print(f" → 字典模式, Keys: {list(file_obj.keys())}") |
| data = file_obj.get("data") |
| if isinstance(data, str) and data.startswith("data:"): |
| print(f" → 找到 data URL") |
| return _dataurl_to_file(data, file_obj.get("orig_name")) |
| p = str(file_obj.get("path") or "").strip().strip('"') |
| if p and os.path.isfile(p): |
| return p |
| |
| |
| for attr in ("name", "path"): |
| p = getattr(file_obj, attr, None) |
| if isinstance(p, str): |
| s = p.strip().strip('"') |
| if os.path.isfile(s): |
| return s |
| |
| raise FileNotFoundError("Cannot parse uploaded file") |
|
|
| |
| def split_audio(path): |
| """將音訊檔案分割成多個小於 25MB 的片段""" |
| size = os.path.getsize(path) |
| print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)") |
| if size <= MAX_SIZE: |
| print(f"[split_audio] 不需分割") |
| return [path] |
| print(f"[split_audio] 開始分割...") |
| audio = AudioSegment.from_file(path) |
| n = int(size / MAX_SIZE) + 1 |
| chunk_ms = len(audio) / n |
| print(f"[split_audio] 分割成 {n} 個片段") |
| parts = [] |
| for i in range(n): |
| fn = f"chunk_{i+1}.wav" |
| audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav") |
| parts.append(fn) |
| return parts |
|
|
| |
| def transcribe_core(path, model="whisper-1"): |
| """使用 Whisper 進行語音轉錄,並使用 GPT 進行繁簡轉換和摘要""" |
| print(f"\n{'='*60}") |
| print(f"[transcribe_core] 開始轉錄: {path}") |
| print(f"{'='*60}") |
| |
| start_time = time.time() |
| |
| |
| if path.lower().endswith(".mp4"): |
| fixed = path[:-4] + ".m4a" |
| try: |
| shutil.copy(path, fixed) |
| path = fixed |
| except: |
| pass |
| |
| |
| chunks = split_audio(path) |
| print(f"\n[transcribe_core] === Whisper 轉錄 ({len(chunks)} 片段) ===") |
| raw = [] |
| for i, c in enumerate(chunks, 1): |
| print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}") |
| with open(c, "rb") as af: |
| txt = client.audio.transcriptions.create( |
| model=model, file=af, response_format="text" |
| ) |
| raw.append(txt) |
| print(f"[transcribe_core] ✅ 片段 {i} 完成") |
| |
| raw_txt = "\n".join(raw) |
| print(f"[transcribe_core] 原始轉錄: {len(raw_txt)} 字元") |
| |
| |
| print(f"\n[transcribe_core] === 簡轉繁 ===") |
| conv = client.chat.completions.create( |
| model="gpt-4o-mini", |
| messages=[ |
| {"role":"system","content":"你是嚴格的繁體中文轉換器"}, |
| {"role":"user","content":f"將以下內容轉為台灣繁體,不意譯:\n{raw_txt}"} |
| ], |
| temperature=0.0 |
| ) |
| trad = conv.choices[0].message.content.strip() |
| print(f"[transcribe_core] ✅ 繁體轉換完成: {len(trad)} 字元") |
| |
| |
| print(f"\n[transcribe_core] === AI 摘要 ===") |
| summ = client.chat.completions.create( |
| model="gpt-4o-mini", |
| messages=[ |
| {"role":"system","content":"你是繁體摘要助手"}, |
| {"role":"user","content":f"請用台灣繁體中文摘要;內容多則條列重點,內容短則一句話:\n{trad}"} |
| ], |
| temperature=0.2 |
| ) |
| summary = summ.choices[0].message.content.strip() |
| |
| total_time = time.time() - start_time |
| print(f"\n{'='*60}") |
| print(f"[transcribe_core] ✅✅✅ 全部完成! 總耗時: {total_time:.1f}秒") |
| print(f"{'='*60}\n") |
| |
| return trad, summary |
|
|
| |
| def transcribe_ui(password, file): |
| """網頁界面的轉錄處理函式""" |
| print(f"\n🌐 [UI] 網頁版請求") |
| if not password or password.strip() != PASSWORD: |
| return "❌ Password incorrect", "", "" |
| if not file: |
| return "⚠️ No file uploaded", "", "" |
| try: |
| path = _extract_effective_path(file) |
| text, summary = transcribe_core(path) |
| return "✅ Transcription completed", text, summary |
| except Exception as e: |
| import traceback |
| print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}") |
| return f"❌ Error: {e}", "", "" |
|
|
| |
| fastapi_app = FastAPI() |
|
|
| |
| fastapi_app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| @fastapi_app.post("/api/transcribe") |
| async def api_transcribe_sync(request: Request): |
| """ |
| 完全同步的 API 端點 - 直接返回結果,不用輪詢 |
| |
| 請求格式: |
| { |
| "password": "chou", |
| "file_data": "data:audio/m4a;base64,...", |
| "file_name": "recording.m4a" |
| } |
| """ |
| try: |
| body = await request.json() |
| print(f"\n{'📱'*30}") |
| print(f"🎯 [SYNC API] 收到同步 API 請求") |
| print(f"📦 Keys: {list(body.keys())}") |
| print(f"{'📱'*30}") |
| |
| password = body.get("password", "") |
| if password.strip() != PASSWORD: |
| print(f"❌ [SYNC API] 密碼錯誤") |
| return JSONResponse( |
| status_code=401, |
| content={"status": "error", "error": "Password incorrect"} |
| ) |
| |
| file_data = body.get("file_data", "") |
| file_name = body.get("file_name", "recording.m4a") |
| |
| if not file_data or not file_data.startswith("data:"): |
| print(f"❌ [SYNC API] 檔案格式錯誤") |
| return JSONResponse( |
| status_code=400, |
| content={"status": "error", "error": "Invalid file data format"} |
| ) |
| |
| print(f"[SYNC API] 檔案長度: {len(file_data)}, 檔名: {file_name}") |
| |
| |
| file_dict = {"data": file_data, "orig_name": file_name} |
| path = _extract_effective_path(file_dict) |
| print(f"✅ [SYNC API] 檔案解析成功: {path}") |
| |
| text, summary = transcribe_core(path) |
| |
| result = { |
| "status": "success", |
| "transcription": text, |
| "summary": summary |
| } |
| |
| print(f"\n{'✅'*30}") |
| print(f"✅✅✅ [SYNC API] 完成! 返回結果") |
| print(json.dumps(result, ensure_ascii=False, indent=2)) |
| print(f"{'✅'*30}\n") |
| |
| return JSONResponse(content=result) |
| |
| except Exception as e: |
| import traceback |
| error_trace = traceback.format_exc() |
| print(f"\n{'❌'*30}") |
| print(f"❌ [SYNC API] 錯誤:\n{error_trace}") |
| print(f"{'❌'*30}\n") |
| return JSONResponse( |
| status_code=500, |
| content={"status": "error", "error": str(e)} |
| ) |
|
|
| |
| custom_css = """ |
| .gradio-container { |
| max-width: 1200px !important; |
| margin: auto !important; |
| } |
| |
| /* 主標題 */ |
| .main-header { |
| text-align: center; |
| padding: 2.5rem 1rem; |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| border-radius: 12px; |
| margin-bottom: 2rem; |
| color: white; |
| } |
| |
| .main-header h1 { |
| font-size: 2.2rem; |
| margin: 0 0 0.5rem 0; |
| font-weight: 700; |
| } |
| |
| .main-header p { |
| font-size: 1rem; |
| margin: 0; |
| opacity: 0.95; |
| } |
| |
| /* 按鈕 */ |
| .primary-btn { |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; |
| border: none !important; |
| color: white !important; |
| font-weight: 600 !important; |
| font-size: 1.05rem !important; |
| } |
| |
| /* 文字框 */ |
| textarea { |
| font-size: 0.95rem !important; |
| line-height: 1.6 !important; |
| } |
| |
| /* 資訊卡片 */ |
| .info-box { |
| background: #f0f9ff; |
| border-left: 4px solid #3b82f6; |
| padding: 1rem; |
| border-radius: 6px; |
| margin: 1rem 0; |
| font-size: 0.9rem; |
| } |
| |
| /* 程式碼 */ |
| pre { |
| background: #1f2937 !important; |
| color: #f3f4f6 !important; |
| padding: 1rem !important; |
| border-radius: 6px !important; |
| font-size: 0.85rem !important; |
| } |
| |
| code { |
| background: #e5e7eb !important; |
| color: #1f2937 !important; |
| padding: 0.2rem 0.4rem !important; |
| border-radius: 3px !important; |
| font-size: 0.9rem !important; |
| } |
| """ |
|
|
| |
| with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription") as demo: |
| |
| |
| gr.HTML(""" |
| <div class="main-header"> |
| <h1>🎧 Audio Transcription Service</h1> |
| <p>AI-Powered Speech-to-Text with Summarization</p> |
| </div> |
| """) |
| |
| with gr.Tabs(): |
| |
| with gr.Tab("🌐 Web Upload"): |
| with gr.Row(): |
| with gr.Column(scale=1): |
| pw = gr.Textbox(label="Password", type="password", placeholder="Enter password") |
| audio_file = gr.File(label="Audio File", file_types=["audio", ".mp4"]) |
| submit_btn = gr.Button("🚀 Start Transcription", variant="primary", elem_classes="primary-btn") |
| |
| gr.HTML(""" |
| <div class="info-box"> |
| <strong>Supported:</strong> MP3, M4A, WAV, OGG, WEBM, MP4<br> |
| <strong>Max Size:</strong> Auto-split for large files |
| </div> |
| """) |
| |
| with gr.Column(scale=2): |
| status = gr.Textbox(label="Status", interactive=False) |
| transcription = gr.Textbox(label="Transcription", lines=12, show_copy_button=True) |
| summary = gr.Textbox(label="Summary", lines=5, show_copy_button=True) |
| |
| submit_btn.click(transcribe_ui, [pw, audio_file], [status, transcription, summary]) |
| |
| |
| with gr.Tab("📱 API Documentation"): |
| gr.Markdown(""" |
| ## API Endpoint |
| |
| **URL:** `/api/transcribe` (POST) |
| **Type:** Synchronous - returns complete results in one request |
| |
| ### Request Format |
| |
| ```json |
| { |
| "password": "your_password", |
| "file_data": "data:audio/m4a;base64,UklGR...", |
| "file_name": "recording.m4a" |
| } |
| ``` |
| |
| ### Response Format |
| |
| ```json |
| { |
| "status": "success", |
| "transcription": "Full transcription text...", |
| "summary": "AI-generated summary..." |
| } |
| ``` |
| |
| --- |
| |
| ## iPhone Shortcuts Setup |
| |
| 1. **Get File** → Audio recording |
| 2. **Base64 Encode** → File content |
| 3. **Text** → Create data URL: |
| ``` |
| data:audio/m4a;base64,[Base64 Result] |
| ``` |
| 4. **Dictionary** → Request body: |
| - `password`: `chou` |
| - `file_data`: [Text from step 3] |
| - `file_name`: `recording.m4a` |
| 5. **Get Contents of URL**: |
| - URL: `https://your-domain.com/api/transcribe` |
| - Method: `POST` |
| - Headers: `Content-Type: application/json` |
| - Body: [Dictionary], Type: `JSON` |
| 6. **Get Dictionary Value**: |
| - `transcription` → Full text |
| - `summary` → Summary |
| |
| --- |
| |
| ## Testing with cURL |
| |
| ```bash |
| curl -X POST https://your-domain.com/api/transcribe \\ |
| -H "Content-Type: application/json" \\ |
| -d '{ |
| "password": "chou", |
| "file_data": "data:audio/m4a;base64,AAAA...", |
| "file_name": "test.m4a" |
| }' |
| ``` |
| |
| --- |
| |
| ## Technical Details |
| |
| - **Transcription:** OpenAI Whisper (high accuracy) |
| - **Summarization:** GPT-4o-mini |
| - **Output:** Traditional Chinese (Taiwan) |
| - **Processing:** Fully synchronous, no polling needed |
| - **File Handling:** Auto-split for files > 25MB |
| |
| --- |
| |
| ## Error Codes |
| |
| - `401` - Incorrect password |
| - `400` - Invalid file format |
| - `500` - Processing error |
| |
| For support, contact your administrator. |
| """) |
| |
| |
| gr.HTML(""" |
| <div style="text-align: center; margin-top: 2rem; padding: 1.5rem; background: #f9fafb; border-radius: 8px;"> |
| <p style="color: #6b7280; font-size: 0.9rem; margin: 0;"> |
| Audio Transcription Service v2.0 | Powered by OpenAI |
| </p> |
| </div> |
| """) |
|
|
| |
| app = gr.mount_gradio_app(fastapi_app, demo, path="/") |
|
|
| |
| if __name__ == "__main__": |
| print("\n" + "="*60) |
| print("🚀 啟動 FastAPI + Gradio 應用") |
| print("📱 同步 API: /api/transcribe") |
| print("🌐 網頁介面: /") |
| print("="*60 + "\n") |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860) |