MichaelChou0806 commited on
Commit
c76e92c
·
verified ·
1 Parent(s): ec64510

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -216
app.py CHANGED
@@ -2,6 +2,9 @@ import os, shutil, base64, uuid, mimetypes, json, time
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
 
 
 
5
 
6
  # ====== 基本設定 ======
7
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
@@ -24,14 +27,11 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
24
  try:
25
  header, b64 = data_url.split(",", 1)
26
  except ValueError:
27
- print(f" → [_dataurl_to_file] ❌ 錯誤: data URL 格式錯誤")
28
  raise ValueError("data URL format error")
29
  mime = header.split(";")[0].split(":", 1)[-1].strip()
30
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
31
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
32
- print(f" → [_dataurl_to_file] MIME: {mime}, 副檔名: {ext}")
33
- print(f" → [_dataurl_to_file] 目標檔名: {fname}")
34
- print(f" → [_dataurl_to_file] Base64 長度: {len(b64)}")
35
  with open(fname, "wb") as f:
36
  f.write(base64.b64decode(b64))
37
  file_size = os.path.getsize(fname)
@@ -40,111 +40,90 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
40
 
41
  def _extract_effective_path(file_obj) -> str:
42
  """從各種格式中提取有效檔案路徑"""
43
- print(f"\n[_extract_effective_path] 開始解析檔案...")
44
  print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
45
- print(f"[_extract_effective_path] 收到內容前100字: {str(file_obj)[:100]}...")
46
 
47
  # 字串模式
48
  if isinstance(file_obj, str):
49
  s = file_obj.strip().strip('"')
50
- print(f" → [模式 A] 字串模式")
51
  if s.startswith("data:"):
52
- print(f" → [模式 A] 偵測到 data URL, 長度: {len(s)}")
53
  return _dataurl_to_file(s, None)
54
  if os.path.isfile(s):
55
- print(f" → [模式 A] 找到檔案路徑: {s}")
56
  return s
57
 
58
  # 字典模式
59
  if isinstance(file_obj, dict):
60
- print(f" → [模式 B] 字典模式")
61
- print(f" → [模式 B] Keys: {list(file_obj.keys())}")
62
  data = file_obj.get("data")
63
  if isinstance(data, str) and data.startswith("data:"):
64
- print(f" → [模式 B] 找到 data URL! 長度: {len(data)}")
65
  return _dataurl_to_file(data, file_obj.get("orig_name"))
66
  p = str(file_obj.get("path") or "").strip().strip('"')
67
  if p and os.path.isfile(p):
68
- print(f" → [模式 B] 找到 path: {p}")
69
  return p
70
 
71
  # 物件模式
72
- print(f" → [模式 C] 物件模式")
73
  for attr in ("name", "path"):
74
  p = getattr(file_obj, attr, None)
75
  if isinstance(p, str):
76
  s = p.strip().strip('"')
77
  if os.path.isfile(s):
78
- print(f" → [模式 C] 找到屬性 {attr}: {s}")
79
  return s
80
 
81
- print(f"[_extract_effective_path] ❌ 無法解析檔案")
82
  raise FileNotFoundError("Cannot parse uploaded file")
83
 
84
  # ====== 分段處理 ======
85
  def split_audio(path):
86
- print(f"\n[split_audio] 檢查檔案大小...")
87
  size = os.path.getsize(path)
88
  print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
89
  if size <= MAX_SIZE:
90
- print(f"[split_audio] 檔案小於 25MB, 不需分割")
91
  return [path]
92
- print(f"[split_audio] 檔案大於 25MB, 開始分割...")
93
  audio = AudioSegment.from_file(path)
94
  n = int(size / MAX_SIZE) + 1
95
  chunk_ms = len(audio) / n
96
- print(f"[split_audio] 將分割成 {n} 個片段, 每段約 {chunk_ms/1000:.1f} 秒")
97
  parts = []
98
  for i in range(n):
99
  fn = f"chunk_{i+1}.wav"
100
  audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
101
- print(f"[split_audio] 已產生片段 {i+1}/{n}: {fn}")
102
  parts.append(fn)
103
  return parts
104
 
105
  # ====== 轉錄核心 ======
106
  def transcribe_core(path, model="whisper-1"):
107
  print(f"\n{'='*60}")
108
- print(f"[transcribe_core] 開始轉錄流程")
109
- print(f"[transcribe_core] 檔案路徑: {path}")
110
  print(f"{'='*60}")
111
 
112
  start_time = time.time()
113
 
114
  if path.lower().endswith(".mp4"):
115
- print(f"[transcribe_core] 偵測到 .mp4 檔案, 轉換為 .m4a")
116
  fixed = path[:-4] + ".m4a"
117
  try:
118
  shutil.copy(path, fixed)
119
  path = fixed
120
- print(f"[transcribe_core] ✅ 已轉換: {path}")
121
- except Exception as e:
122
- print(f"[transcribe_core] ⚠️ 轉換失敗: {e}")
123
 
124
- print(f"\n[transcribe_core] === 步驟 1: 分割音檔 ===")
125
  chunks = split_audio(path)
126
- print(f"[transcribe_core] {len(chunks)} 個片段")
127
-
128
- print(f"\n[transcribe_core] === 步驟 2: Whisper 轉錄 ===")
129
  raw = []
130
  for i, c in enumerate(chunks, 1):
131
- print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}: {c}")
132
- chunk_start = time.time()
133
  with open(c, "rb") as af:
134
  txt = client.audio.transcriptions.create(
135
  model=model, file=af, response_format="text"
136
  )
137
  raw.append(txt)
138
- chunk_time = time.time() - chunk_start
139
- print(f"[transcribe_core] ✅ 片段 {i} 完成 (耗時 {chunk_time:.1f}秒)")
140
- print(f"[transcribe_core] 片段 {i} 內容: {txt[:100]}...")
141
 
142
  raw_txt = "\n".join(raw)
143
- print(f"\n[transcribe_core] 原始轉錄總長度: {len(raw_txt)} 字元")
144
- print(f"[transcribe_core] 原始內容前200字: {raw_txt[:200]}...")
145
 
146
- print(f"\n[transcribe_core] === 步驟 3: 簡轉繁 ===")
147
- conv_start = time.time()
148
  conv = client.chat.completions.create(
149
  model="gpt-4o-mini",
150
  messages=[
@@ -154,13 +133,9 @@ def transcribe_core(path, model="whisper-1"):
154
  temperature=0.0
155
  )
156
  trad = conv.choices[0].message.content.strip()
157
- conv_time = time.time() - conv_start
158
- print(f"[transcribe_core] ✅ 繁體轉換完成 (耗時 {conv_time:.1f}秒)")
159
- print(f"[transcribe_core] 繁體內容長度: {len(trad)} 字元")
160
- print(f"[transcribe_core] 繁體內容前200字: {trad[:200]}...")
161
 
162
- print(f"\n[transcribe_core] === 步驟 4: AI 摘要 ===")
163
- summ_start = time.time()
164
  summ = client.chat.completions.create(
165
  model="gpt-4o-mini",
166
  messages=[
@@ -170,92 +145,87 @@ def transcribe_core(path, model="whisper-1"):
170
  temperature=0.2
171
  )
172
  summary = summ.choices[0].message.content.strip()
173
- summ_time = time.time() - summ_start
174
- print(f"[transcribe_core] ✅ 摘要完成 (耗時 {summ_time:.1f}秒)")
175
- print(f"[transcribe_core] 摘要內容: {summary}")
176
 
177
  total_time = time.time() - start_time
178
  print(f"\n{'='*60}")
179
- print(f"[transcribe_core] ✅✅✅ 轉錄流程全部完成!")
180
- print(f"[transcribe_core] 總耗時: {total_time:.1f} 秒")
181
  print(f"{'='*60}\n")
182
 
183
  return trad, summary
184
 
185
  # ====== Gradio UI 函式 ======
186
  def transcribe_ui(password, file):
187
- """網頁版轉錄函式"""
188
- print(f"\n{'🌐'*30}")
189
- print(f"🎯 [UI] 收到網頁版請求")
190
- print(f"🔑 [UI] 密碼: {password[:2] if password else ''}*** (長度: {len(password) if password else 0})")
191
- print(f"📁 [UI] 檔案類型: {type(file)}")
192
- print(f"{'🌐'*30}")
193
-
194
  if not password or password.strip() != PASSWORD:
195
- print(f"❌ [UI] 密碼驗證失敗")
196
  return "❌ Password incorrect", "", ""
197
  if not file:
198
- print(f"❌ [UI] 未收到檔案")
199
  return "⚠️ No file uploaded", "", ""
200
-
201
  try:
202
  path = _extract_effective_path(file)
203
- print(f"✅ [UI] 檔案解析成功: {path}")
204
  text, summary = transcribe_core(path)
205
- print(f"✅ [UI] 轉錄完成, 準備返回結果")
206
  return "✅ Transcription completed", text, summary
207
  except Exception as e:
208
  import traceback
209
- error_trace = traceback.format_exc()
210
- print(f"❌ [UI] 發生錯誤:\n{error_trace}")
211
  return f"❌ Error: {e}", "", ""
212
 
213
- # ====== API 函式 ======
214
- def transcribe_api(password, file_data, file_name):
215
- """
216
- API 版本的轉錄函式
 
 
 
 
 
 
 
 
 
 
 
217
  """
218
- print(f"\n{'📱'*30}")
219
- print(f"🎯 [API] 收到 API 請求")
220
- print(f"🔑 [API] 密碼: {password[:2] if password else ''}*** (長度: {len(password) if password else 0})")
221
- print(f"📁 [API] file_data 類型: {type(file_data)}")
222
- print(f"📁 [API] file_data 長度: {len(file_data) if file_data else 0}")
223
- print(f"📁 [API] file_data 前50字: {str(file_data)[:50] if file_data else 'None'}...")
224
- print(f"📁 [API] file_name: {file_name}")
225
- print(f"{'📱'*30}")
226
-
227
- if not password or password.strip() != PASSWORD:
228
- result = {
229
- "status": "error",
230
- "error": "Password incorrect",
231
- "transcription": "",
232
- "summary": ""
233
- }
234
- print(f"❌ [API] 密碼驗證失敗")
235
- print(f"[API] 返回結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
236
- return result
237
-
238
- if not file_data or not file_data.startswith("data:"):
239
- result = {
240
- "status": "error",
241
- "error": "Invalid file data format. Must be data:audio/...;base64,...",
242
- "transcription": "",
243
- "summary": ""
244
- }
245
- print(f"❌ [API] 檔案格式錯誤")
246
- print(f"[API] 返回結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
247
- return result
248
 
 
 
 
 
 
 
 
249
  try:
250
- file_dict = {
251
- "data": file_data,
252
- "orig_name": file_name or "recording.m4a"
253
- }
254
- print(f"[API] 開始解析檔案...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  path = _extract_effective_path(file_dict)
256
- print(f"✅ [API] 檔案解析成功: {path}")
257
 
258
- print(f"[API] 開始轉錄流程...")
259
  text, summary = transcribe_core(path)
260
 
261
  result = {
@@ -263,30 +233,24 @@ def transcribe_api(password, file_data, file_name):
263
  "transcription": text,
264
  "summary": summary
265
  }
 
266
  print(f"\n{'✅'*30}")
267
- print(f"✅✅✅ [API] 全部完成!")
268
- print(f"[API] 轉錄長度: {len(text)} 字元")
269
- print(f"[API] 摘要長度: {len(summary)} 字元")
270
- print(f"[API] 返回結果:")
271
  print(json.dumps(result, ensure_ascii=False, indent=2))
272
  print(f"{'✅'*30}\n")
273
- return result
 
274
 
275
  except Exception as e:
276
  import traceback
277
  error_trace = traceback.format_exc()
278
  print(f"\n{'❌'*30}")
279
- print(f"❌ [API] 發生錯誤:")
280
- print(error_trace)
281
  print(f"{'❌'*30}\n")
282
- result = {
283
- "status": "error",
284
- "error": str(e),
285
- "transcription": "",
286
- "summary": ""
287
- }
288
- print(f"[API] 返回錯誤結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
289
- return result
290
 
291
  # ====== Gradio 介面 ======
292
  with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
@@ -296,129 +260,111 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
296
  gr.Markdown("### Upload audio file directly from browser")
297
  with gr.Row():
298
  with gr.Column(scale=1):
299
- pw_ui = gr.Textbox(
300
- label="Password",
301
- type="password",
302
- placeholder="Enter password"
303
- )
304
- file_ui = gr.File(
305
- label="Upload Audio File",
306
- file_types=["audio"]
307
- )
308
- btn_ui = gr.Button(
309
- "Start Transcription 🚀",
310
- variant="primary",
311
- size="lg"
312
- )
313
-
314
  with gr.Column(scale=2):
315
  status_ui = gr.Textbox(label="Status", interactive=False)
316
- transcript_ui = gr.Textbox(
317
- label="Transcription Result",
318
- lines=10,
319
- placeholder="Transcription will appear here..."
320
- )
321
- summary_ui = gr.Textbox(
322
- label="AI Summary",
323
- lines=6,
324
- placeholder="Summary will appear here..."
325
- )
326
 
327
- btn_ui.click(
328
- transcribe_ui,
329
- inputs=[pw_ui, file_ui],
330
- outputs=[status_ui, transcript_ui, summary_ui]
331
- )
332
 
333
- with gr.Tab("📱 API (iPhone Shortcut)"):
334
  gr.Markdown("""
335
- ### For iPhone Shortcuts & Automation
336
 
337
- Test the API endpoint here before using in iPhone Shortcuts.
338
- """)
339
 
340
- with gr.Row():
341
- with gr.Column(scale=1):
342
- pw_api = gr.Textbox(
343
- label="Password",
344
- type="password",
345
- value="chou",
346
- placeholder="Enter password"
347
- )
348
- file_data_api = gr.Textbox(
349
- label="File Data (Base64)",
350
- placeholder="data:audio/m4a;base64,UklGR...",
351
- lines=3,
352
- info="Paste your base64-encoded audio data URL here"
353
- )
354
- file_name_api = gr.Textbox(
355
- label="Original Filename",
356
- value="recording.m4a",
357
- placeholder="recording.m4a"
358
- )
359
- btn_api = gr.Button(
360
- "Test API 🧪",
361
- variant="secondary",
362
- size="lg"
363
- )
364
-
365
- with gr.Column(scale=2):
366
- result_api = gr.JSON(
367
- label="API Response",
368
- show_label=True
369
- )
370
 
371
- btn_api.click(
372
- transcribe_api,
373
- inputs=[pw_api, file_data_api, file_name_api],
374
- outputs=[result_api],
375
- api_name="transcribe",
376
- queue=False # 🔴 關鍵: 禁用 queue
377
- )
378
 
379
- gr.Markdown("""
380
  ---
381
- ### 📖 iPhone Shortcuts Configuration
382
 
383
- **Endpoint**: `/gradio_api/call/transcribe`
384
-
385
- **Request Format (JSON)**:
386
  ```json
387
  {
388
- "data": [
389
- "your_password",
390
- "data:audio/m4a;base64,UklGR...",
391
- "recording.m4a"
392
- ]
393
  }
394
  ```
395
 
396
- **Response Format**:
397
  ```json
398
  {
399
- "data": {
400
- "status": "success",
401
- "transcription": "轉錄內容...",
402
- "summary": "摘要..."
403
- }
404
  }
405
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  """)
407
 
408
  gr.Markdown("""
409
  ---
410
  💡 **Supported Formats**: MP4, M4A, MP3, WAV, OGG, WEBM
411
- 📦 **Max File Size**: 25MB per chunk (larger files auto-split)
412
- 🔒 **Security**: Password-protected access
413
  """)
414
 
 
 
 
415
  # ====== 啟動 ======
416
  if __name__ == "__main__":
417
  print("\n" + "="*60)
418
- print("準備啟動 Gradio 應用...")
 
 
419
  print("="*60 + "\n")
420
- demo.launch(
421
- server_name="0.0.0.0",
422
- server_port=7860,
423
- show_api=True
424
- )
 
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
5
+ from fastapi import FastAPI, Request
6
+ from fastapi.responses import JSONResponse
7
+ from fastapi.middleware.cors import CORSMiddleware
8
 
9
  # ====== 基本設定 ======
10
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
 
27
  try:
28
  header, b64 = data_url.split(",", 1)
29
  except ValueError:
 
30
  raise ValueError("data URL format error")
31
  mime = header.split(";")[0].split(":", 1)[-1].strip()
32
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
33
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
34
+ print(f" → [_dataurl_to_file] 檔名: {fname}, Base64長度: {len(b64)}")
 
 
35
  with open(fname, "wb") as f:
36
  f.write(base64.b64decode(b64))
37
  file_size = os.path.getsize(fname)
 
40
 
41
  def _extract_effective_path(file_obj) -> str:
42
  """從各種格式中提取有效檔案路徑"""
 
43
  print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
 
44
 
45
  # 字串模式
46
  if isinstance(file_obj, str):
47
  s = file_obj.strip().strip('"')
 
48
  if s.startswith("data:"):
49
+ print(f" → 偵測到 data URL")
50
  return _dataurl_to_file(s, None)
51
  if os.path.isfile(s):
52
+ print(f" → 找到檔案路徑: {s}")
53
  return s
54
 
55
  # 字典模式
56
  if isinstance(file_obj, dict):
57
+ print(f" → 字典模式, Keys: {list(file_obj.keys())}")
 
58
  data = file_obj.get("data")
59
  if isinstance(data, str) and data.startswith("data:"):
60
+ print(f" → 找到 data URL")
61
  return _dataurl_to_file(data, file_obj.get("orig_name"))
62
  p = str(file_obj.get("path") or "").strip().strip('"')
63
  if p and os.path.isfile(p):
 
64
  return p
65
 
66
  # 物件模式
 
67
  for attr in ("name", "path"):
68
  p = getattr(file_obj, attr, None)
69
  if isinstance(p, str):
70
  s = p.strip().strip('"')
71
  if os.path.isfile(s):
 
72
  return s
73
 
 
74
  raise FileNotFoundError("Cannot parse uploaded file")
75
 
76
  # ====== 分段處理 ======
77
  def split_audio(path):
 
78
  size = os.path.getsize(path)
79
  print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
80
  if size <= MAX_SIZE:
81
+ print(f"[split_audio] 不需分割")
82
  return [path]
83
+ print(f"[split_audio] 開始分割...")
84
  audio = AudioSegment.from_file(path)
85
  n = int(size / MAX_SIZE) + 1
86
  chunk_ms = len(audio) / n
87
+ print(f"[split_audio] 分割成 {n} 個片段")
88
  parts = []
89
  for i in range(n):
90
  fn = f"chunk_{i+1}.wav"
91
  audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
 
92
  parts.append(fn)
93
  return parts
94
 
95
  # ====== 轉錄核心 ======
96
  def transcribe_core(path, model="whisper-1"):
97
  print(f"\n{'='*60}")
98
+ print(f"[transcribe_core] 開始轉錄: {path}")
 
99
  print(f"{'='*60}")
100
 
101
  start_time = time.time()
102
 
103
  if path.lower().endswith(".mp4"):
 
104
  fixed = path[:-4] + ".m4a"
105
  try:
106
  shutil.copy(path, fixed)
107
  path = fixed
108
+ except:
109
+ pass
 
110
 
 
111
  chunks = split_audio(path)
112
+ print(f"\n[transcribe_core] === Whisper 轉錄 ({len(chunks)} 片段) ===")
 
 
113
  raw = []
114
  for i, c in enumerate(chunks, 1):
115
+ print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}")
 
116
  with open(c, "rb") as af:
117
  txt = client.audio.transcriptions.create(
118
  model=model, file=af, response_format="text"
119
  )
120
  raw.append(txt)
121
+ print(f"[transcribe_core] 片段 {i} 完成")
 
 
122
 
123
  raw_txt = "\n".join(raw)
124
+ print(f"[transcribe_core] 原始轉錄: {len(raw_txt)} 字元")
 
125
 
126
+ print(f"\n[transcribe_core] === 簡轉繁 ===")
 
127
  conv = client.chat.completions.create(
128
  model="gpt-4o-mini",
129
  messages=[
 
133
  temperature=0.0
134
  )
135
  trad = conv.choices[0].message.content.strip()
136
+ print(f"[transcribe_core] 繁體轉換完成: {len(trad)} 字元")
 
 
 
137
 
138
+ print(f"\n[transcribe_core] === AI 摘要 ===")
 
139
  summ = client.chat.completions.create(
140
  model="gpt-4o-mini",
141
  messages=[
 
145
  temperature=0.2
146
  )
147
  summary = summ.choices[0].message.content.strip()
 
 
 
148
 
149
  total_time = time.time() - start_time
150
  print(f"\n{'='*60}")
151
+ print(f"[transcribe_core] ✅✅✅ 全部完成! 總耗時: {total_time:.1f}秒")
 
152
  print(f"{'='*60}\n")
153
 
154
  return trad, summary
155
 
156
  # ====== Gradio UI 函式 ======
157
  def transcribe_ui(password, file):
158
+ print(f"\n🌐 [UI] 網頁版請求")
 
 
 
 
 
 
159
  if not password or password.strip() != PASSWORD:
 
160
  return "❌ Password incorrect", "", ""
161
  if not file:
 
162
  return "⚠️ No file uploaded", "", ""
 
163
  try:
164
  path = _extract_effective_path(file)
 
165
  text, summary = transcribe_core(path)
 
166
  return "✅ Transcription completed", text, summary
167
  except Exception as e:
168
  import traceback
169
+ print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}")
 
170
  return f"❌ Error: {e}", "", ""
171
 
172
+ # ====== 建立 FastAPI 應用 ======
173
+ fastapi_app = FastAPI()
174
+
175
+ # CORS 設定
176
+ fastapi_app.add_middleware(
177
+ CORSMiddleware,
178
+ allow_origins=["*"],
179
+ allow_credentials=True,
180
+ allow_methods=["*"],
181
+ allow_headers=["*"],
182
+ )
183
+
184
+ # ====== 完全同步的 API 端點 ======
185
+ @fastapi_app.post("/api/transcribe")
186
+ async def api_transcribe_sync(request: Request):
187
  """
188
+ 完全同步的 API 端點 - 直接返回結果,不用輪詢
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
+ 請求格式:
191
+ {
192
+ "password": "chou",
193
+ "file_data": "data:audio/m4a;base64,...",
194
+ "file_name": "recording.m4a"
195
+ }
196
+ """
197
  try:
198
+ body = await request.json()
199
+ print(f"\n{'📱'*30}")
200
+ print(f"🎯 [SYNC API] 收到同步 API 請求")
201
+ print(f"📦 Keys: {list(body.keys())}")
202
+ print(f"{'📱'*30}")
203
+
204
+ password = body.get("password", "")
205
+ if password.strip() != PASSWORD:
206
+ print(f"❌ [SYNC API] 密碼錯誤")
207
+ return JSONResponse(
208
+ status_code=401,
209
+ content={"status": "error", "error": "Password incorrect"}
210
+ )
211
+
212
+ file_data = body.get("file_data", "")
213
+ file_name = body.get("file_name", "recording.m4a")
214
+
215
+ if not file_data or not file_data.startswith("data:"):
216
+ print(f"❌ [SYNC API] 檔案格式錯誤")
217
+ return JSONResponse(
218
+ status_code=400,
219
+ content={"status": "error", "error": "Invalid file data format"}
220
+ )
221
+
222
+ print(f"[SYNC API] 檔案長度: {len(file_data)}, 檔名: {file_name}")
223
+
224
+ # 直接處理,同步執行
225
+ file_dict = {"data": file_data, "orig_name": file_name}
226
  path = _extract_effective_path(file_dict)
227
+ print(f"✅ [SYNC API] 檔案解析成功: {path}")
228
 
 
229
  text, summary = transcribe_core(path)
230
 
231
  result = {
 
233
  "transcription": text,
234
  "summary": summary
235
  }
236
+
237
  print(f"\n{'✅'*30}")
238
+ print(f"✅✅✅ [SYNC API] 完成! 返回結果")
 
 
 
239
  print(json.dumps(result, ensure_ascii=False, indent=2))
240
  print(f"{'✅'*30}\n")
241
+
242
+ return JSONResponse(content=result)
243
 
244
  except Exception as e:
245
  import traceback
246
  error_trace = traceback.format_exc()
247
  print(f"\n{'❌'*30}")
248
+ print(f"❌ [SYNC API] 錯誤:\n{error_trace}")
 
249
  print(f"{'❌'*30}\n")
250
+ return JSONResponse(
251
+ status_code=500,
252
+ content={"status": "error", "error": str(e)}
253
+ )
 
 
 
 
254
 
255
  # ====== Gradio 介面 ======
256
  with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
 
260
  gr.Markdown("### Upload audio file directly from browser")
261
  with gr.Row():
262
  with gr.Column(scale=1):
263
+ pw_ui = gr.Textbox(label="Password", type="password")
264
+ file_ui = gr.File(label="Upload Audio File", file_types=["audio"])
265
+ btn_ui = gr.Button("Start Transcription 🚀", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
266
  with gr.Column(scale=2):
267
  status_ui = gr.Textbox(label="Status", interactive=False)
268
+ transcript_ui = gr.Textbox(label="Transcription Result", lines=10)
269
+ summary_ui = gr.Textbox(label="AI Summary", lines=6)
 
 
 
 
 
 
 
 
270
 
271
+ btn_ui.click(transcribe_ui, [pw_ui, file_ui], [status_ui, transcript_ui, summary_ui])
 
 
 
 
272
 
273
+ with gr.Tab("📱 API Documentation"):
274
  gr.Markdown("""
275
+ ### 🚀 Synchronous API (Recommended for iPhone Shortcuts)
276
 
277
+ **Endpoint**: `/api/transcribe` (POST)
 
278
 
279
+ **完全同步** - 直接返回結果,無需輪詢
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
+ ✅ **穩定可靠** - 不受音檔長度影響,自動等待完成
 
 
 
 
 
 
282
 
 
283
  ---
 
284
 
285
+ #### Request Format (JSON):
 
 
286
  ```json
287
  {
288
+ "password": "your_password",
289
+ "file_data": "data:audio/m4a;base64,UklGR...",
290
+ "file_name": "recording.m4a"
 
 
291
  }
292
  ```
293
 
294
+ #### Response Format:
295
  ```json
296
  {
297
+ "status": "success",
298
+ "transcription": "轉錄內容...",
299
+ "summary": "摘要內容..."
 
 
300
  }
301
  ```
302
+
303
+ ---
304
+
305
+ ### 📱 iPhone Shortcuts 設定
306
+
307
+ **動作流程:**
308
+
309
+ 1. **取得檔案** → 語音檔
310
+ 2. **Base64 編碼**
311
+ 3. **文字** (組合 data URL):
312
+ ```
313
+ data:audio/m4a;base64,Base64編碼結果
314
+ ```
315
+ 4. **字典** (請求本文):
316
+ - 鍵: `password`, 值: `chou`
317
+ - 鍵: `file_data`, 值: 上一步的文字
318
+ - 鍵: `file_name`, 值: `recording.m4a`
319
+ 5. **取得 URL 內容**:
320
+ - URL: `https://你的網址/api/transcribe`
321
+ - 方法: `POST`
322
+ - 標頭: `Content-Type` = `application/json`
323
+ - 請求本文: 上一步的字典
324
+ - 請求本文類型: `JSON`
325
+ 6. **從字典取得值**:
326
+ - 鍵: `transcription` → 轉錄結果
327
+ - 鍵: `summary` → 摘要
328
+
329
+ ---
330
+
331
+ ### 💡 重要提醒
332
+
333
+ - ✅ 這個端點**完全同步**,會等待轉錄完成後才返回
334
+ - ✅ 無論音檔多長,都會自動處理��成
335
+ - ✅ 不需要設定等待時間或輪詢機制
336
+ - ✅ 直接取得最終結果,不會有 `event_id`
337
+
338
+ ### 🧪 測試 API
339
+
340
+ 使用 curl 測試:
341
+ ```bash
342
+ curl -X POST https://你的網址/api/transcribe \\
343
+ -H "Content-Type: application/json" \\
344
+ -d '{
345
+ "password": "chou",
346
+ "file_data": "data:audio/m4a;base64,AAAA...",
347
+ "file_name": "test.m4a"
348
+ }'
349
+ ```
350
  """)
351
 
352
  gr.Markdown("""
353
  ---
354
  💡 **Supported Formats**: MP4, M4A, MP3, WAV, OGG, WEBM
355
+ 📦 **Max File Size**: 25MB per chunk (auto-split)
356
+ 🔒 **Security**: Password-protected
357
  """)
358
 
359
+ # ====== 掛載 Gradio 到 FastAPI ======
360
+ app = gr.mount_gradio_app(fastapi_app, demo, path="/")
361
+
362
  # ====== 啟動 ======
363
  if __name__ == "__main__":
364
  print("\n" + "="*60)
365
+ print("🚀 啟動 FastAPI + Gradio 應用")
366
+ print("📱 同步 API: /api/transcribe")
367
+ print("🌐 網頁介面: /")
368
  print("="*60 + "\n")
369
+ import uvicorn
370
+ uvicorn.run(app, host="0.0.0.0", port=7860)