MichaelChou0806 commited on
Commit
6c57120
·
verified ·
1 Parent(s): bc06406

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +323 -212
app.py CHANGED
@@ -14,7 +14,7 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
14
  print("===== 🚀 啟動中 =====")
15
  print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
16
 
17
- # ====== 工具:把 data:URL 轉成臨時檔 ======
18
  MIME_EXT = {
19
  "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
20
  "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
@@ -24,41 +24,31 @@ MIME_EXT = {
24
 
25
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
26
  """將 data URL 轉換為本地檔案"""
27
- print(f" → [_dataurl_to_file] 開始處理 data URL...")
28
  try:
29
  header, b64 = data_url.split(",", 1)
30
  except ValueError:
31
- raise ValueError("data URL format error")
32
  mime = header.split(";")[0].split(":", 1)[-1].strip()
33
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
34
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
35
- print(f" → [_dataurl_to_file] 檔名: {fname}, Base64長度: {len(b64)}")
36
  with open(fname, "wb") as f:
37
  f.write(base64.b64decode(b64))
38
- file_size = os.path.getsize(fname)
39
- print(f" → [_dataurl_to_file] ✅ 檔案已建立, 大小: {file_size} bytes")
40
  return fname
41
 
42
  def _extract_effective_path(file_obj) -> str:
43
  """從各種格式中提取有效檔案路徑"""
44
- print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
45
-
46
  # 字串模式
47
  if isinstance(file_obj, str):
48
  s = file_obj.strip().strip('"')
49
  if s.startswith("data:"):
50
- print(f" → 偵測到 data URL")
51
  return _dataurl_to_file(s, None)
52
  if os.path.isfile(s):
53
- print(f" → 找到檔案路徑: {s}")
54
  return s
55
 
56
  # 字典模式
57
  if isinstance(file_obj, dict):
58
- print(f" → 字典模式, Keys: {list(file_obj.keys())}")
59
  data = file_obj.get("data")
60
  if isinstance(data, str) and data.startswith("data:"):
61
- print(f" → 找到 data URL")
62
  return _dataurl_to_file(data, file_obj.get("orig_name"))
63
  p = str(file_obj.get("path") or "").strip().strip('"')
64
  if p and os.path.isfile(p):
@@ -74,19 +64,15 @@ def _extract_effective_path(file_obj) -> str:
74
 
75
  raise FileNotFoundError("Cannot parse uploaded file")
76
 
77
- # ====== 分段處理 ======
78
  def split_audio(path):
79
  """將音訊檔案分割成多個小於 25MB 的片段"""
80
  size = os.path.getsize(path)
81
- print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
82
  if size <= MAX_SIZE:
83
- print(f"[split_audio] 不需分割")
84
  return [path]
85
- print(f"[split_audio] 開始分割...")
86
  audio = AudioSegment.from_file(path)
87
  n = int(size / MAX_SIZE) + 1
88
  chunk_ms = len(audio) / n
89
- print(f"[split_audio] 分割成 {n} 個片段")
90
  parts = []
91
  for i in range(n):
92
  fn = f"chunk_{i+1}.wav"
@@ -94,7 +80,6 @@ def split_audio(path):
94
  parts.append(fn)
95
  return parts
96
 
97
- # ====== 轉錄核心 ======
98
  def transcribe_core(path, model="whisper-1"):
99
  """使用 Whisper 進行語音轉錄,並使用 GPT 進行繁簡轉換和摘要"""
100
  print(f"\n{'='*60}")
@@ -112,9 +97,8 @@ def transcribe_core(path, model="whisper-1"):
112
  except:
113
  pass
114
 
115
- # 分割音訊
116
  chunks = split_audio(path)
117
- print(f"\n[transcribe_core] === Whisper 轉錄 ({len(chunks)} 片段) ===")
118
  raw = []
119
  for i, c in enumerate(chunks, 1):
120
  print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}")
@@ -123,13 +107,10 @@ def transcribe_core(path, model="whisper-1"):
123
  model=model, file=af, response_format="text"
124
  )
125
  raw.append(txt)
126
- print(f"[transcribe_core] ✅ 片段 {i} 完成")
127
 
128
  raw_txt = "\n".join(raw)
129
- print(f"[transcribe_core] 原始轉錄: {len(raw_txt)} 字元")
130
 
131
  # 簡轉繁
132
- print(f"\n[transcribe_core] === 簡轉繁 ===")
133
  conv = client.chat.completions.create(
134
  model="gpt-4o-mini",
135
  messages=[
@@ -139,10 +120,8 @@ def transcribe_core(path, model="whisper-1"):
139
  temperature=0.0
140
  )
141
  trad = conv.choices[0].message.content.strip()
142
- print(f"[transcribe_core] ✅ 繁體轉換完成: {len(trad)} 字元")
143
 
144
  # AI 摘要
145
- print(f"\n[transcribe_core] === AI 摘要 ===")
146
  summ = client.chat.completions.create(
147
  model="gpt-4o-mini",
148
  messages=[
@@ -154,33 +133,47 @@ def transcribe_core(path, model="whisper-1"):
154
  summary = summ.choices[0].message.content.strip()
155
 
156
  total_time = time.time() - start_time
157
- print(f"\n{'='*60}")
158
- print(f"[transcribe_core] ✅✅✅ 全部完成! 總耗時: {total_time:.1f}秒")
159
- print(f"{'='*60}\n")
160
 
161
  return trad, summary
162
 
163
  # ====== Gradio UI 函式 ======
164
- def transcribe_ui(password, file):
165
- """網頁界面的轉錄處理函式"""
166
- print(f"\n🌐 [UI] 網頁版請求")
 
 
167
  if not password or password.strip() != PASSWORD:
168
- return "❌ Password incorrect", "", ""
169
- if not file:
170
- return "⚠️ No file uploaded", "", ""
 
 
 
171
  try:
172
- path = _extract_effective_path(file)
 
 
 
 
173
  text, summary = transcribe_core(path)
174
- return "✅ Transcription completed", text, summary
 
 
 
 
 
 
 
175
  except Exception as e:
176
  import traceback
177
- print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}")
178
- return f"❌ Error: {e}", "", ""
 
179
 
180
- # ====== 建立 FastAPI 應用 ======
181
  fastapi_app = FastAPI()
182
 
183
- # CORS 設定
184
  fastapi_app.add_middleware(
185
  CORSMiddleware,
186
  allow_origins=["*"],
@@ -189,51 +182,37 @@ fastapi_app.add_middleware(
189
  allow_headers=["*"],
190
  )
191
 
192
- # ====== 完全同步的 API 端點 ======
193
  @fastapi_app.post("/api/transcribe")
194
- async def api_transcribe_sync(request: Request):
195
- """
196
- 完全同步的 API 端點 - 直接返回結果,不用輪詢
197
-
198
- 請求格式:
199
- {
200
- "password": "chou",
201
- "file_data": "data:audio/m4a;base64,...",
202
- "file_name": "recording.m4a"
203
- }
204
- """
205
  try:
206
  body = await request.json()
207
- print(f"\n{'📱'*30}")
208
- print(f"🎯 [SYNC API] 收到同步 API 請求")
209
- print(f"📦 Keys: {list(body.keys())}")
210
- print(f"{'📱'*30}")
211
 
 
212
  password = body.get("password", "")
213
  if password.strip() != PASSWORD:
214
- print(f"❌ [SYNC API] 密碼錯誤")
215
  return JSONResponse(
216
  status_code=401,
217
  content={"status": "error", "error": "Password incorrect"}
218
  )
219
 
 
220
  file_data = body.get("file_data", "")
221
  file_name = body.get("file_name", "recording.m4a")
222
 
223
  if not file_data or not file_data.startswith("data:"):
224
- print(f"❌ [SYNC API] 檔案格式錯誤")
225
  return JSONResponse(
226
  status_code=400,
227
  content={"status": "error", "error": "Invalid file data format"}
228
  )
229
 
230
- print(f"[SYNC API] 檔案長度: {len(file_data)}, 檔名: {file_name}")
231
-
232
- # 直接處理,同步執行
233
  file_dict = {"data": file_data, "orig_name": file_name}
234
  path = _extract_effective_path(file_dict)
235
- print(f"[SYNC API] 檔案解析成功: {path}")
236
 
 
237
  text, summary = transcribe_core(path)
238
 
239
  result = {
@@ -242,234 +221,366 @@ async def api_transcribe_sync(request: Request):
242
  "summary": summary
243
  }
244
 
245
- print(f"\n{'✅'*30}")
246
- print(f"✅✅✅ [SYNC API] 完成! 返回結果")
247
- print(json.dumps(result, ensure_ascii=False, indent=2))
248
- print(f"{'✅'*30}\n")
249
-
250
  return JSONResponse(content=result)
251
 
252
  except Exception as e:
253
  import traceback
254
  error_trace = traceback.format_exc()
255
- print(f"\n{'❌'*30}")
256
- print(f"❌ [SYNC API] 錯誤:\n{error_trace}")
257
- print(f"{'❌'*30}\n")
258
  return JSONResponse(
259
  status_code=500,
260
  content={"status": "error", "error": str(e)}
261
  )
262
 
263
- # ====== 自定義 CSS ======
264
  custom_css = """
 
 
 
 
 
265
  .gradio-container {
266
- max-width: 1200px !important;
267
- margin: auto !important;
268
  }
269
 
270
- /* 主標題 */
271
- .main-header {
 
 
 
 
 
272
  text-align: center;
273
- padding: 2.5rem 1rem;
274
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
275
- border-radius: 12px;
276
- margin-bottom: 2rem;
277
- color: white;
278
  }
279
 
280
- .main-header h1 {
281
- font-size: 2.2rem;
282
- margin: 0 0 0.5rem 0;
283
  font-weight: 700;
 
 
284
  }
285
 
286
- .main-header p {
287
- font-size: 1rem;
 
288
  margin: 0;
289
- opacity: 0.95;
290
  }
291
 
292
- /* 按鈕 */
293
- .primary-btn {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
295
  border: none !important;
296
  color: white !important;
 
297
  font-weight: 600 !important;
298
- font-size: 1.05rem !important;
 
 
 
299
  }
300
 
301
- /* 文字框 */
302
- textarea {
303
- font-size: 0.95rem !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  line-height: 1.6 !important;
 
305
  }
306
 
307
- /* 資訊卡片 */
308
- .info-box {
309
- background: #f0f9ff;
310
- border-left: 4px solid #3b82f6;
 
311
  padding: 1rem;
312
- border-radius: 6px;
313
  margin: 1rem 0;
314
  font-size: 0.9rem;
 
315
  }
316
 
317
- /* 程式碼 */
318
- pre {
319
- background: #1f2937 !important;
320
- color: #f3f4f6 !important;
321
- padding: 1rem !important;
322
- border-radius: 6px !important;
323
- font-size: 0.85rem !important;
324
  }
325
 
326
- code {
327
- background: #e5e7eb !important;
328
- color: #1f2937 !important;
329
- padding: 0.2rem 0.4rem !important;
330
- border-radius: 3px !important;
331
- font-size: 0.9rem !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  }
333
  """
334
 
335
- # ====== 建立 Gradio 介面 ======
336
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription") as demo:
337
 
338
  # 標題
339
  gr.HTML("""
340
- <div class="main-header">
341
  <h1>🎧 Audio Transcription Service</h1>
342
- <p>AI-Powered Speech-to-Text with Summarization</p>
343
  </div>
344
  """)
345
 
346
- with gr.Tabs():
347
- # ====== Tab 1: Upload ======
348
- with gr.Tab("🌐 Web Upload"):
349
- with gr.Row():
350
- with gr.Column(scale=1):
351
- pw = gr.Textbox(label="Password", type="password", placeholder="Enter password")
352
- audio_file = gr.File(label="Audio File", file_types=["audio", ".mp4"])
353
- submit_btn = gr.Button("🚀 Start Transcription", variant="primary", elem_classes="primary-btn")
354
-
355
- gr.HTML("""
356
- <div class="info-box">
357
- <strong>Supported:</strong> MP3, M4A, WAV, OGG, WEBM, MP4<br>
358
- <strong>Max Size:</strong> Auto-split for large files
359
- </div>
360
- """)
361
-
362
- with gr.Column(scale=2):
363
- status = gr.Textbox(label="Status", interactive=False)
364
- transcription = gr.Textbox(label="Transcription", lines=12, show_copy_button=True)
365
- summary = gr.Textbox(label="Summary", lines=5, show_copy_button=True)
366
 
367
- submit_btn.click(transcribe_ui, [pw, audio_file], [status, transcription, summary])
 
 
 
 
 
 
 
 
 
 
 
368
 
369
- # ====== Tab 2: API ======
370
- with gr.Tab("📱 API Documentation"):
371
- gr.Markdown("""
372
- ## API Endpoint
373
-
374
- **URL:** `/api/transcribe` (POST)
375
- **Type:** Synchronous - returns complete results in one request
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
 
377
- ### Request Format
378
 
 
379
  ```json
380
  {
381
  "password": "your_password",
382
- "file_data": "data:audio/m4a;base64,UklGR...",
383
  "file_name": "recording.m4a"
384
  }
385
  ```
386
 
387
- ### Response Format
388
-
389
  ```json
390
  {
391
  "status": "success",
392
- "transcription": "Full transcription text...",
393
- "summary": "AI-generated summary..."
394
  }
395
  ```
396
 
397
- ---
398
-
399
- ## iPhone Shortcuts Setup
400
-
401
- 1. **Get File** → Audio recording
402
- 2. **Base64 Encode** → File content
403
- 3. **Text** → Create data URL:
404
- ```
405
- data:audio/m4a;base64,[Base64 Result]
406
- ```
407
- 4. **Dictionary** Request body:
408
- - `password`: `chou`
409
- - `file_data`: [Text from step 3]
410
- - `file_name`: `recording.m4a`
411
- 5. **Get Contents of URL**:
412
- - URL: `https://your-domain.com/api/transcribe`
413
- - Method: `POST`
414
- - Headers: `Content-Type: application/json`
415
- - Body: [Dictionary], Type: `JSON`
416
- 6. **Get Dictionary Value**:
417
- - `transcription` → Full text
418
- - `summary` → Summary
419
-
420
- ---
421
-
422
- ## Testing with cURL
423
-
424
- ```bash
425
- curl -X POST https://your-domain.com/api/transcribe \\
426
- -H "Content-Type: application/json" \\
427
- -d '{
428
- "password": "chou",
429
- "file_data": "data:audio/m4a;base64,AAAA...",
430
- "file_name": "test.m4a"
431
- }'
432
- ```
433
-
434
- ---
435
-
436
- ## Technical Details
437
-
438
- - **Transcription:** OpenAI Whisper (high accuracy)
439
- - **Summarization:** GPT-4o-mini
440
- - **Output:** Traditional Chinese (Taiwan)
441
- - **Processing:** Fully synchronous, no polling needed
442
- - **File Handling:** Auto-split for files > 25MB
443
-
444
- ---
445
-
446
- ## Error Codes
447
-
448
- - `401` - Incorrect password
449
- - `400` - Invalid file format
450
- - `500` - Processing error
451
-
452
- For support, contact your administrator.
453
- """)
454
 
455
  # 頁腳
456
  gr.HTML("""
457
- <div style="text-align: center; margin-top: 2rem; padding: 1.5rem; background: #f9fafb; border-radius: 8px;">
458
- <p style="color: #6b7280; font-size: 0.9rem; margin: 0;">
459
- Audio Transcription Service v2.0 | Powered by OpenAI
460
- </p>
461
  </div>
462
  """)
 
 
 
 
 
 
 
463
 
464
- # ====== 掛載 Gradio 到 FastAPI ======
465
  app = gr.mount_gradio_app(fastapi_app, demo, path="/")
466
 
467
  # ====== 啟動 ======
468
  if __name__ == "__main__":
469
  print("\n" + "="*60)
470
- print("🚀 啟動 FastAPI + Gradio 應用")
471
- print("📱 同步 API: /api/transcribe")
472
- print("🌐 網頁介面: /")
473
  print("="*60 + "\n")
474
  import uvicorn
475
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
14
  print("===== 🚀 啟動中 =====")
15
  print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
16
 
17
+ # ====== 工具函數 ======
18
  MIME_EXT = {
19
  "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
20
  "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
 
24
 
25
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
26
  """將 data URL 轉換為本地檔案"""
 
27
  try:
28
  header, b64 = data_url.split(",", 1)
29
  except ValueError:
30
+ raise ValueError("Invalid data URL format")
31
  mime = header.split(";")[0].split(":", 1)[-1].strip()
32
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
33
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
 
34
  with open(fname, "wb") as f:
35
  f.write(base64.b64decode(b64))
 
 
36
  return fname
37
 
38
  def _extract_effective_path(file_obj) -> str:
39
  """從各種格式中提取有效檔案路徑"""
 
 
40
  # 字串模式
41
  if isinstance(file_obj, str):
42
  s = file_obj.strip().strip('"')
43
  if s.startswith("data:"):
 
44
  return _dataurl_to_file(s, None)
45
  if os.path.isfile(s):
 
46
  return s
47
 
48
  # 字典模式
49
  if isinstance(file_obj, dict):
 
50
  data = file_obj.get("data")
51
  if isinstance(data, str) and data.startswith("data:"):
 
52
  return _dataurl_to_file(data, file_obj.get("orig_name"))
53
  p = str(file_obj.get("path") or "").strip().strip('"')
54
  if p and os.path.isfile(p):
 
64
 
65
  raise FileNotFoundError("Cannot parse uploaded file")
66
 
 
67
  def split_audio(path):
68
  """將音訊檔案分割成多個小於 25MB 的片段"""
69
  size = os.path.getsize(path)
 
70
  if size <= MAX_SIZE:
 
71
  return [path]
72
+
73
  audio = AudioSegment.from_file(path)
74
  n = int(size / MAX_SIZE) + 1
75
  chunk_ms = len(audio) / n
 
76
  parts = []
77
  for i in range(n):
78
  fn = f"chunk_{i+1}.wav"
 
80
  parts.append(fn)
81
  return parts
82
 
 
83
  def transcribe_core(path, model="whisper-1"):
84
  """使用 Whisper 進行語音轉錄,並使用 GPT 進行繁簡轉換和摘要"""
85
  print(f"\n{'='*60}")
 
97
  except:
98
  pass
99
 
100
+ # 分割並轉錄
101
  chunks = split_audio(path)
 
102
  raw = []
103
  for i, c in enumerate(chunks, 1):
104
  print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}")
 
107
  model=model, file=af, response_format="text"
108
  )
109
  raw.append(txt)
 
110
 
111
  raw_txt = "\n".join(raw)
 
112
 
113
  # 簡轉繁
 
114
  conv = client.chat.completions.create(
115
  model="gpt-4o-mini",
116
  messages=[
 
120
  temperature=0.0
121
  )
122
  trad = conv.choices[0].message.content.strip()
 
123
 
124
  # AI 摘要
 
125
  summ = client.chat.completions.create(
126
  model="gpt-4o-mini",
127
  messages=[
 
133
  summary = summ.choices[0].message.content.strip()
134
 
135
  total_time = time.time() - start_time
136
+ print(f"[transcribe_core] ✅ 全部完成! 總耗時: {total_time:.1f}秒\n")
 
 
137
 
138
  return trad, summary
139
 
140
  # ====== Gradio UI 函式 ======
141
+ def transcribe_web(password, audio_file):
142
+ """網頁版轉錄處理"""
143
+ print(f"\n🌐 [WEB] 收到網頁請求")
144
+
145
+ # 驗證密碼
146
  if not password or password.strip() != PASSWORD:
147
+ return "❌ Incorrect password. Please try again.", "", ""
148
+
149
+ # 檢查檔案
150
+ if not audio_file:
151
+ return "⚠️ Please upload an audio file first.", "", ""
152
+
153
  try:
154
+ # 處理檔案
155
+ path = _extract_effective_path(audio_file)
156
+ print(f"[WEB] 檔案路徑: {path}")
157
+
158
+ # 轉錄
159
  text, summary = transcribe_core(path)
160
+
161
+ # 統計資訊
162
+ char_count = len(text)
163
+ status = f"✅ Transcription completed successfully!\n📝 Total characters: {char_count}"
164
+
165
+ print(f"[WEB] ✅ 成功完成")
166
+ return status, text, summary
167
+
168
  except Exception as e:
169
  import traceback
170
+ error_msg = traceback.format_exc()
171
+ print(f"❌ [WEB] 錯誤:\n{error_msg}")
172
+ return f"❌ Error: {str(e)}", "", ""
173
 
174
+ # ====== FastAPI 應用 ======
175
  fastapi_app = FastAPI()
176
 
 
177
  fastapi_app.add_middleware(
178
  CORSMiddleware,
179
  allow_origins=["*"],
 
182
  allow_headers=["*"],
183
  )
184
 
 
185
  @fastapi_app.post("/api/transcribe")
186
+ async def api_transcribe(request: Request):
187
+ """API 端點 - 用於手機等外部調用"""
 
 
 
 
 
 
 
 
 
188
  try:
189
  body = await request.json()
190
+ print(f"\n📱 [API] 收到 API 請求")
 
 
 
191
 
192
+ # 驗證密碼
193
  password = body.get("password", "")
194
  if password.strip() != PASSWORD:
 
195
  return JSONResponse(
196
  status_code=401,
197
  content={"status": "error", "error": "Password incorrect"}
198
  )
199
 
200
+ # 檢查檔案資料
201
  file_data = body.get("file_data", "")
202
  file_name = body.get("file_name", "recording.m4a")
203
 
204
  if not file_data or not file_data.startswith("data:"):
 
205
  return JSONResponse(
206
  status_code=400,
207
  content={"status": "error", "error": "Invalid file data format"}
208
  )
209
 
210
+ # 處理檔案
 
 
211
  file_dict = {"data": file_data, "orig_name": file_name}
212
  path = _extract_effective_path(file_dict)
213
+ print(f"[API] 檔案解析成功: {path}")
214
 
215
+ # 轉錄
216
  text, summary = transcribe_core(path)
217
 
218
  result = {
 
221
  "summary": summary
222
  }
223
 
224
+ print(f"[API] ✅ 成功完成\n")
 
 
 
 
225
  return JSONResponse(content=result)
226
 
227
  except Exception as e:
228
  import traceback
229
  error_trace = traceback.format_exc()
230
+ print(f"❌ [API] 錯誤:\n{error_trace}\n")
 
 
231
  return JSONResponse(
232
  status_code=500,
233
  content={"status": "error", "error": str(e)}
234
  )
235
 
236
+ # ====== 自定義樣式 ======
237
  custom_css = """
238
+ /* 全局設定 */
239
+ * {
240
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
241
+ }
242
+
243
  .gradio-container {
244
+ max-width: 1400px !important;
245
+ margin: 0 auto !important;
246
  }
247
 
248
+ /* 主容器 */
249
+ .main-container {
250
+ padding: 2rem;
251
+ }
252
+
253
+ /* 標題區 */
254
+ .hero-section {
255
  text-align: center;
256
+ padding: 3rem 2rem;
257
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
258
+ border-radius: 16px;
259
+ margin-bottom: 3rem;
260
+ box-shadow: 0 8px 32px rgba(102, 126, 234, 0.3);
261
  }
262
 
263
+ .hero-section h1 {
264
+ color: white;
265
+ font-size: 2.5rem;
266
  font-weight: 700;
267
+ margin: 0 0 0.5rem 0;
268
+ letter-spacing: -0.02em;
269
  }
270
 
271
+ .hero-section p {
272
+ color: rgba(255, 255, 255, 0.9);
273
+ font-size: 1.15rem;
274
  margin: 0;
 
275
  }
276
 
277
+ /* 卡片樣式 */
278
+ .card {
279
+ background: white;
280
+ border-radius: 12px;
281
+ padding: 2rem;
282
+ box-shadow: 0 4px 16px rgba(0, 0, 0, 0.08);
283
+ margin-bottom: 1.5rem;
284
+ }
285
+
286
+ .card h2 {
287
+ font-size: 1.5rem;
288
+ font-weight: 600;
289
+ margin: 0 0 1.5rem 0;
290
+ color: #1f2937;
291
+ }
292
+
293
+ /* 輸入框樣式 */
294
+ .input-group {
295
+ margin-bottom: 1.5rem;
296
+ }
297
+
298
+ .input-group label {
299
+ display: block;
300
+ font-weight: 600;
301
+ color: #374151;
302
+ margin-bottom: 0.5rem;
303
+ font-size: 0.95rem;
304
+ }
305
+
306
+ input[type="password"],
307
+ textarea {
308
+ width: 100%;
309
+ padding: 0.75rem;
310
+ border: 2px solid #e5e7eb;
311
+ border-radius: 8px;
312
+ font-size: 0.95rem;
313
+ transition: all 0.2s;
314
+ }
315
+
316
+ input[type="password"]:focus,
317
+ textarea:focus {
318
+ outline: none;
319
+ border-color: #667eea;
320
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
321
+ }
322
+
323
+ /* 按鈕樣式 */
324
+ button.primary-btn {
325
+ width: 100%;
326
+ padding: 1rem 2rem !important;
327
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
328
  border: none !important;
329
  color: white !important;
330
+ font-size: 1.1rem !important;
331
  font-weight: 600 !important;
332
+ border-radius: 10px !important;
333
+ cursor: pointer !important;
334
+ transition: all 0.3s !important;
335
+ box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3) !important;
336
  }
337
 
338
+ button.primary-btn:hover {
339
+ transform: translateY(-2px) !important;
340
+ box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important;
341
+ }
342
+
343
+ /* 檔案上傳區 */
344
+ .file-upload-area {
345
+ border: 2px dashed #d1d5db;
346
+ border-radius: 12px;
347
+ padding: 2.5rem;
348
+ text-align: center;
349
+ background: #f9fafb;
350
+ transition: all 0.3s;
351
+ cursor: pointer;
352
+ }
353
+
354
+ .file-upload-area:hover {
355
+ border-color: #667eea;
356
+ background: #f0f4ff;
357
+ }
358
+
359
+ /* 狀態框 */
360
+ .status-box {
361
+ padding: 1rem;
362
+ border-radius: 8px;
363
+ margin-bottom: 1rem;
364
+ font-size: 0.95rem;
365
+ line-height: 1.5;
366
+ }
367
+
368
+ .status-success {
369
+ background: #d1fae5;
370
+ border-left: 4px solid #10b981;
371
+ color: #065f46;
372
+ }
373
+
374
+ .status-error {
375
+ background: #fee2e2;
376
+ border-left: 4px solid #ef4444;
377
+ color: #991b1b;
378
+ }
379
+
380
+ .status-warning {
381
+ background: #fef3c7;
382
+ border-left: 4px solid #f59e0b;
383
+ color: #92400e;
384
+ }
385
+
386
+ /* 結果文字框 */
387
+ textarea.result-text {
388
+ min-height: 200px !important;
389
+ font-family: "SF Mono", Monaco, monospace !important;
390
+ font-size: 0.9rem !important;
391
  line-height: 1.6 !important;
392
+ background: #f9fafb !important;
393
  }
394
 
395
+ /* 資訊提示 */
396
+ .info-banner {
397
+ background: #eff6ff;
398
+ border: 1px solid #bfdbfe;
399
+ border-radius: 8px;
400
  padding: 1rem;
 
401
  margin: 1rem 0;
402
  font-size: 0.9rem;
403
+ color: #1e40af;
404
  }
405
 
406
+ /* 分隔線 */
407
+ .divider {
408
+ height: 1px;
409
+ background: #e5e7eb;
410
+ margin: 2rem 0;
 
 
411
  }
412
 
413
+ /* API 文檔區 */
414
+ .api-section {
415
+ background: #f9fafb;
416
+ border-radius: 12px;
417
+ padding: 2rem;
418
+ margin-top: 2rem;
419
+ }
420
+
421
+ .api-section h3 {
422
+ font-size: 1.25rem;
423
+ font-weight: 600;
424
+ color: #1f2937;
425
+ margin: 0 0 1rem 0;
426
+ }
427
+
428
+ .api-endpoint {
429
+ background: #1f2937;
430
+ color: #f3f4f6;
431
+ padding: 1rem;
432
+ border-radius: 8px;
433
+ font-family: monospace;
434
+ font-size: 0.9rem;
435
+ margin: 1rem 0;
436
+ }
437
+
438
+ /* 響應式設計 */
439
+ @media (max-width: 768px) {
440
+ .hero-section h1 {
441
+ font-size: 2rem;
442
+ }
443
+
444
+ .card {
445
+ padding: 1.5rem;
446
+ }
447
  }
448
  """
449
 
450
+ # ====== Gradio 介面 ======
451
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription Service") as demo:
452
 
453
  # 標題
454
  gr.HTML("""
455
+ <div class="hero-section">
456
  <h1>🎧 Audio Transcription Service</h1>
457
+ <p>AI-Powered Speech Recognition & Summarization</p>
458
  </div>
459
  """)
460
 
461
+ # 主要上傳區域
462
+ gr.HTML('<div class="card">')
463
+ gr.Markdown("## 🎵 Upload & Transcribe")
464
+
465
+ with gr.Row():
466
+ with gr.Column(scale=1):
467
+ password_input = gr.Textbox(
468
+ label="🔐 Password",
469
+ type="password",
470
+ placeholder="Enter password",
471
+ elem_classes="input-group"
472
+ )
473
+
474
+ audio_input = gr.File(
475
+ label="📁 Audio File",
476
+ file_types=["audio", ".mp4"],
477
+ file_count="single",
478
+ elem_classes="file-upload-area"
479
+ )
 
480
 
481
+ gr.HTML("""
482
+ <div class="info-banner">
483
+ <strong>💡 Supported formats:</strong> MP3, M4A, WAV, OGG, WEBM, MP4<br>
484
+ <strong>📦 File size:</strong> Automatic chunking for large files
485
+ </div>
486
+ """)
487
+
488
+ submit_button = gr.Button(
489
+ "🚀 Start Transcription",
490
+ variant="primary",
491
+ elem_classes="primary-btn"
492
+ )
493
 
494
+ with gr.Column(scale=2):
495
+ status_output = gr.Textbox(
496
+ label="📊 Status",
497
+ interactive=False,
498
+ lines=2,
499
+ elem_classes="status-box"
500
+ )
501
+
502
+ transcription_output = gr.Textbox(
503
+ label="📝 Transcription Result",
504
+ lines=15,
505
+ placeholder="Transcription will appear here...",
506
+ show_copy_button=True,
507
+ elem_classes="result-text"
508
+ )
509
+
510
+ summary_output = gr.Textbox(
511
+ label="💡 AI Summary",
512
+ lines=6,
513
+ placeholder="AI-generated summary will appear here...",
514
+ show_copy_button=True,
515
+ elem_classes="result-text"
516
+ )
517
+
518
+ gr.HTML('</div>')
519
+
520
+ # API 文檔
521
+ gr.HTML('<div class="api-section">')
522
+ gr.Markdown("## 📱 API Integration")
523
+ gr.Markdown("""
524
+ ### For Mobile Apps & External Services
525
 
526
+ **Endpoint:** `POST /api/transcribe`
527
 
528
+ **Request Body (JSON):**
529
  ```json
530
  {
531
  "password": "your_password",
532
+ "file_data": "data:audio/m4a;base64,...",
533
  "file_name": "recording.m4a"
534
  }
535
  ```
536
 
537
+ **Response:**
 
538
  ```json
539
  {
540
  "status": "success",
541
+ "transcription": "Full text...",
542
+ "summary": "Summary..."
543
  }
544
  ```
545
 
546
+ **Features:**
547
+ - ✅ Fully synchronous - returns complete results
548
+ - Automatic file chunking for large files
549
+ - ✅ Traditional Chinese output
550
+ - AI-powered summarization
551
+
552
+ **Use Cases:**
553
+ - iPhone Shortcuts automation
554
+ - Mobile app integration
555
+ - Webhook processing
556
+ - Batch transcription systems
557
+ """)
558
+ gr.HTML('</div>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
  # 頁腳
561
  gr.HTML("""
562
+ <div style="text-align: center; margin-top: 3rem; padding: 1.5rem; color: #6b7280; font-size: 0.9rem;">
563
+ <p><strong>Audio Transcription Service</strong> v2.0</p>
564
+ <p>Powered by OpenAI Whisper & GPT-4</p>
 
565
  </div>
566
  """)
567
+
568
+ # 綁定事件
569
+ submit_button.click(
570
+ fn=transcribe_web,
571
+ inputs=[password_input, audio_input],
572
+ outputs=[status_output, transcription_output, summary_output]
573
+ )
574
 
575
+ # ====== 掛載到 FastAPI ======
576
  app = gr.mount_gradio_app(fastapi_app, demo, path="/")
577
 
578
  # ====== 啟動 ======
579
  if __name__ == "__main__":
580
  print("\n" + "="*60)
581
+ print("🚀 啟動服務")
582
+ print("🌐 網頁介面: http://0.0.0.0:7860")
583
+ print("📱 API 端點: http://0.0.0.0:7860/api/transcribe")
584
  print("="*60 + "\n")
585
  import uvicorn
586
  uvicorn.run(app, host="0.0.0.0", port=7860)