MichaelChou0806 commited on
Commit
7de90eb
·
verified ·
1 Parent(s): e5e8cf2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -207
app.py CHANGED
@@ -14,7 +14,7 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
14
  print("===== 🚀 啟動中 =====")
15
  print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
16
 
17
- # ====== 工具:把 data:URL 轉成臨時檔 ======
18
  MIME_EXT = {
19
  "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
20
  "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
@@ -23,7 +23,7 @@ MIME_EXT = {
23
  }
24
 
25
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
26
- print(f" → [_dataurl_to_file] 開始處理 data URL...")
27
  try:
28
  header, b64 = data_url.split(",", 1)
29
  except ValueError:
@@ -31,60 +31,43 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
31
  mime = header.split(";")[0].split(":", 1)[-1].strip()
32
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
33
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
34
- print(f" → [_dataurl_to_file] 檔名: {fname}, Base64長度: {len(b64)}")
35
  with open(fname, "wb") as f:
36
  f.write(base64.b64decode(b64))
37
- file_size = os.path.getsize(fname)
38
- print(f" → [_dataurl_to_file] ✅ 檔案已建立, 大小: {file_size} bytes")
39
  return fname
40
 
41
  def _extract_effective_path(file_obj) -> str:
42
- """從各種格式中提取有效檔案路徑"""
43
- print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
44
-
45
- # 字串模式
46
  if isinstance(file_obj, str):
47
  s = file_obj.strip().strip('"')
48
  if s.startswith("data:"):
49
- print(f" → 偵測到 data URL")
50
  return _dataurl_to_file(s, None)
51
  if os.path.isfile(s):
52
- print(f" → 找到檔案路徑: {s}")
53
  return s
54
-
55
- # 字典模式
56
  if isinstance(file_obj, dict):
57
- print(f" → 字典模式, Keys: {list(file_obj.keys())}")
58
  data = file_obj.get("data")
59
  if isinstance(data, str) and data.startswith("data:"):
60
- print(f" → 找到 data URL")
61
  return _dataurl_to_file(data, file_obj.get("orig_name"))
62
  p = str(file_obj.get("path") or "").strip().strip('"')
63
  if p and os.path.isfile(p):
64
  return p
65
-
66
- # 物件模式
67
  for attr in ("name", "path"):
68
  p = getattr(file_obj, attr, None)
69
  if isinstance(p, str):
70
  s = p.strip().strip('"')
71
  if os.path.isfile(s):
72
  return s
73
-
74
- raise FileNotFoundError("Cannot parse uploaded file")
75
 
76
- # ====== 分段處理 ======
77
  def split_audio(path):
78
  size = os.path.getsize(path)
79
- print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
80
  if size <= MAX_SIZE:
81
- print(f"[split_audio] 不需分割")
82
  return [path]
83
- print(f"[split_audio] 開始分割...")
84
  audio = AudioSegment.from_file(path)
85
  n = int(size / MAX_SIZE) + 1
86
  chunk_ms = len(audio) / n
87
- print(f"[split_audio] 分割成 {n} 個片段")
88
  parts = []
89
  for i in range(n):
90
  fn = f"chunk_{i+1}.wav"
@@ -92,13 +75,9 @@ def split_audio(path):
92
  parts.append(fn)
93
  return parts
94
 
95
- # ====== 轉錄核心 ======
96
  def transcribe_core(path, model="whisper-1"):
97
- print(f"\n{'='*60}")
98
- print(f"[transcribe_core] 開始轉錄: {path}")
99
- print(f"{'='*60}")
100
-
101
- start_time = time.time()
102
 
103
  if path.lower().endswith(".mp4"):
104
  fixed = path[:-4] + ".m4a"
@@ -109,21 +88,18 @@ def transcribe_core(path, model="whisper-1"):
109
  pass
110
 
111
  chunks = split_audio(path)
112
- print(f"\n[transcribe_core] === Whisper 轉錄 ({len(chunks)} 片段) ===")
113
  raw = []
114
  for i, c in enumerate(chunks, 1):
115
- print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}")
116
  with open(c, "rb") as af:
117
  txt = client.audio.transcriptions.create(
118
  model=model, file=af, response_format="text"
119
  )
120
  raw.append(txt)
121
- print(f"[transcribe_core] ✅ 片段 {i} 完成")
122
-
123
  raw_txt = "\n".join(raw)
124
- print(f"[transcribe_core] 原始轉錄: {len(raw_txt)} 字元")
125
 
126
- print(f"\n[transcribe_core] === 簡轉繁 ===")
127
  conv = client.chat.completions.create(
128
  model="gpt-4o-mini",
129
  messages=[
@@ -133,9 +109,8 @@ def transcribe_core(path, model="whisper-1"):
133
  temperature=0.0
134
  )
135
  trad = conv.choices[0].message.content.strip()
136
- print(f"[transcribe_core] ✅ 繁體轉換完成: {len(trad)} 字元")
137
 
138
- print(f"\n[transcribe_core] === AI 摘要 ===")
139
  summ = client.chat.completions.create(
140
  model="gpt-4o-mini",
141
  messages=[
@@ -144,35 +119,27 @@ def transcribe_core(path, model="whisper-1"):
144
  ],
145
  temperature=0.2
146
  )
147
- summary = summ.choices[0].message.content.strip()
148
-
149
- total_time = time.time() - start_time
150
- print(f"\n{'='*60}")
151
- print(f"[transcribe_core] ✅✅✅ 全部完成! 總耗時: {total_time:.1f}秒")
152
- print(f"{'='*60}\n")
153
 
154
- return trad, summary
 
155
 
156
  # ====== Gradio UI 函式 ======
157
  def transcribe_ui(password, file):
158
- print(f"\n🌐 [UI] 網頁版請求")
159
  if not password or password.strip() != PASSWORD:
160
  return "❌ Password incorrect", "", ""
161
  if not file:
162
- return "⚠️ No file uploaded", "", ""
163
  try:
164
  path = _extract_effective_path(file)
165
  text, summary = transcribe_core(path)
166
- return "✅ Transcription completed", text, summary
167
  except Exception as e:
168
- import traceback
169
- print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}")
170
  return f"❌ Error: {e}", "", ""
171
 
172
- # ====== 建立 FastAPI 應用 ======
173
  fastapi_app = FastAPI()
174
-
175
- # CORS 設定
176
  fastapi_app.add_middleware(
177
  CORSMiddleware,
178
  allow_origins=["*"],
@@ -181,29 +148,15 @@ fastapi_app.add_middleware(
181
  allow_headers=["*"],
182
  )
183
 
184
- # ====== 完全同步的 API 端點 ======
185
  @fastapi_app.post("/api/transcribe")
186
- async def api_transcribe_sync(request: Request):
187
- """
188
- 完全同步的 API 端點 - 直接返回結果,不用輪詢
189
-
190
- 請求格式:
191
- {
192
- "password": "chou",
193
- "file_data": "data:audio/m4a;base64,...",
194
- "file_name": "recording.m4a"
195
- }
196
- """
197
  try:
198
  body = await request.json()
199
- print(f"\n{'📱'*30}")
200
- print(f"🎯 [SYNC API] 收到同步 API 請求")
201
- print(f"📦 Keys: {list(body.keys())}")
202
- print(f"{'📱'*30}")
203
 
204
  password = body.get("password", "")
205
  if password.strip() != PASSWORD:
206
- print(f"❌ [SYNC API] 密碼錯誤")
207
  return JSONResponse(
208
  status_code=401,
209
  content={"status": "error", "error": "Password incorrect"}
@@ -213,19 +166,13 @@ async def api_transcribe_sync(request: Request):
213
  file_name = body.get("file_name", "recording.m4a")
214
 
215
  if not file_data or not file_data.startswith("data:"):
216
- print(f"❌ [SYNC API] 檔案格式錯誤")
217
  return JSONResponse(
218
  status_code=400,
219
- content={"status": "error", "error": "Invalid file data format"}
220
  )
221
 
222
- print(f"[SYNC API] 檔案長度: {len(file_data)}, 檔名: {file_name}")
223
-
224
- # 直接處理,同步執行
225
  file_dict = {"data": file_data, "orig_name": file_name}
226
  path = _extract_effective_path(file_dict)
227
- print(f"✅ [SYNC API] 檔案解析成功: {path}")
228
-
229
  text, summary = transcribe_core(path)
230
 
231
  result = {
@@ -233,136 +180,44 @@ async def api_transcribe_sync(request: Request):
233
  "transcription": text,
234
  "summary": summary
235
  }
236
-
237
- print(f"\n{'✅'*30}")
238
- print(f"✅✅✅ [SYNC API] 完成! 返回結果")
239
- print(json.dumps(result, ensure_ascii=False, indent=2))
240
- print(f"{'✅'*30}\n")
241
-
242
  return JSONResponse(content=result)
243
 
244
  except Exception as e:
245
  import traceback
246
- error_trace = traceback.format_exc()
247
- print(f"\n{'❌'*30}")
248
- print(f"❌ [SYNC API] 錯誤:\n{error_trace}")
249
- print(f"{'❌'*30}\n")
250
  return JSONResponse(
251
  status_code=500,
252
  content={"status": "error", "error": str(e)}
253
  )
254
 
255
  # ====== Gradio 介面 ======
256
- with gr.Blocks(
257
- theme=gr.themes.Soft(),
258
- title="LINE Audio Transcription",
259
- css="""
260
- /* 手機優化樣式 */
261
- @media (max-width: 768px) {
262
- .gradio-container {
263
- padding: 8px !important;
264
- }
265
- /* 限制檔案上傳區塊高度 */
266
- .file-upload {
267
- max-height: 180px !important;
268
- }
269
- /* 限制圖示大小 */
270
- .file-upload svg, .file-upload img {
271
- max-width: 80px !important;
272
- max-height: 80px !important;
273
- }
274
- /* 調整按鈕 */
275
- button {
276
- font-size: 16px !important;
277
- padding: 14px !important;
278
- margin: 8px 0 !important;
279
- }
280
- /* Tab 標籤 */
281
- .tabs button {
282
- padding: 10px 14px !important;
283
- font-size: 14px !important;
284
- }
285
- /* 標題更緊湊 */
286
- h1 {
287
- font-size: 24px !important;
288
- margin: 10px 0 !important;
289
- }
290
- h3 {
291
- font-size: 16px !important;
292
- margin: 8px 0 !important;
293
- }
294
- /* 輸入框 */
295
- input, textarea {
296
- font-size: 16px !important;
297
- }
298
- /* 減少內邊距 */
299
- .block {
300
- padding: 8px !important;
301
- }
302
- }
303
- """
304
- ) as demo:
305
  gr.Markdown("# 🎧 LINE Audio Transcription")
306
 
307
- with gr.Tab("🌐 Web Upload"):
308
- gr.Markdown("### Upload audio from browser")
 
 
 
 
 
 
309
 
310
- pw_ui = gr.Textbox(
311
- label="Password",
312
- type="password",
313
- placeholder="Enter password",
314
- scale=1
315
- )
316
- file_ui = gr.File(
317
- label="Upload Audio File",
318
- file_types=["audio"],
319
- file_count="single",
320
- scale=1,
321
- elem_classes=["file-upload"]
322
- )
323
- btn_ui = gr.Button(
324
- "🚀 Start Transcription",
325
- variant="primary",
326
- size="lg",
327
- scale=1
328
- )
329
-
330
- status_ui = gr.Textbox(
331
- label="Status",
332
- interactive=False,
333
- scale=1
334
- )
335
- transcript_ui = gr.Textbox(
336
- label="Transcription",
337
- lines=6,
338
- placeholder="Transcription will appear here...",
339
- show_copy_button=True,
340
- scale=1
341
- )
342
- summary_ui = gr.Textbox(
343
- label="Summary",
344
- lines=4,
345
- placeholder="Summary will appear here...",
346
- show_copy_button=True,
347
- scale=1
348
- )
349
-
350
- btn_ui.click(
351
- transcribe_ui,
352
- inputs=[pw_ui, file_ui],
353
- outputs=[status_ui, transcript_ui, summary_ui]
354
- )
355
 
356
- with gr.Tab("📱 API"):
357
  gr.Markdown("""
358
- ### iPhone Shortcuts
359
 
360
  **Endpoint:** `POST /api/transcribe`
361
 
362
  **Request:**
363
  ```json
364
  {
365
- "password": "your_password",
366
  "file_data": "data:audio/m4a;base64,...",
367
  "file_name": "recording.m4a"
368
  }
@@ -379,43 +234,32 @@ with gr.Blocks(
379
 
380
  ---
381
 
382
- Fully synchronous
383
  ✅ No polling needed
384
  ✅ Works with any audio length
385
 
386
  ---
387
 
388
- **Setup Steps:**
389
-
390
  1. Get file → Audio
391
  2. Base64 encode
392
  3. Text: `data:audio/m4a;base64,[encoded]`
393
- 4. Dictionary with 3 text fields:
394
  - `password`: `chou`
395
  - `file_data`: Step 3
396
  - `file_name`: `recording.m4a`
397
- 5. Get URL contents:
398
- - URL: `/api/transcribe`
399
- - Method: POST
400
- - Header: `Content-Type: application/json`
401
- - Body: Step 4 (JSON)
402
- 6. Get `transcription` & `summary`
403
  """)
404
 
405
- gr.Markdown("""
406
- ---
407
- 💡 **Formats:** MP4, M4A, MP3, WAV, OGG, WEBM | **Max:** 25MB/chunk | 🔒 Password-protected
408
- """)
409
 
410
- # ====== 掛載 Gradio 到 FastAPI ======
411
  app = gr.mount_gradio_app(fastapi_app, demo, path="/")
412
 
413
- # ====== 啟動 ======
414
  if __name__ == "__main__":
415
- print("\n" + "="*60)
416
- print("🚀 啟動 FastAPI + Gradio 應用")
417
- print("📱 同步 API: /api/transcribe")
418
- print("🌐 網頁介面: /")
419
- print("="*60 + "\n")
420
  import uvicorn
421
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
14
  print("===== 🚀 啟動中 =====")
15
  print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
16
 
17
+ # ====== 工具函式 ======
18
  MIME_EXT = {
19
  "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
20
  "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
 
23
  }
24
 
25
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
26
+ print(f" → 處理 data URL, 長度: {len(data_url)}")
27
  try:
28
  header, b64 = data_url.split(",", 1)
29
  except ValueError:
 
31
  mime = header.split(";")[0].split(":", 1)[-1].strip()
32
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
33
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
 
34
  with open(fname, "wb") as f:
35
  f.write(base64.b64decode(b64))
36
+ print(f" → 檔案建立: {fname}, {os.path.getsize(fname)} bytes")
 
37
  return fname
38
 
39
  def _extract_effective_path(file_obj) -> str:
40
+ print(f"解析檔案, 類型: {type(file_obj)}")
 
 
 
41
  if isinstance(file_obj, str):
42
  s = file_obj.strip().strip('"')
43
  if s.startswith("data:"):
 
44
  return _dataurl_to_file(s, None)
45
  if os.path.isfile(s):
 
46
  return s
 
 
47
  if isinstance(file_obj, dict):
 
48
  data = file_obj.get("data")
49
  if isinstance(data, str) and data.startswith("data:"):
 
50
  return _dataurl_to_file(data, file_obj.get("orig_name"))
51
  p = str(file_obj.get("path") or "").strip().strip('"')
52
  if p and os.path.isfile(p):
53
  return p
 
 
54
  for attr in ("name", "path"):
55
  p = getattr(file_obj, attr, None)
56
  if isinstance(p, str):
57
  s = p.strip().strip('"')
58
  if os.path.isfile(s):
59
  return s
60
+ raise FileNotFoundError("Cannot parse file")
 
61
 
 
62
  def split_audio(path):
63
  size = os.path.getsize(path)
64
+ print(f"檔案大小: {size/1024/1024:.2f} MB")
65
  if size <= MAX_SIZE:
 
66
  return [path]
 
67
  audio = AudioSegment.from_file(path)
68
  n = int(size / MAX_SIZE) + 1
69
  chunk_ms = len(audio) / n
70
+ print(f"分割成 {n} 個片段")
71
  parts = []
72
  for i in range(n):
73
  fn = f"chunk_{i+1}.wav"
 
75
  parts.append(fn)
76
  return parts
77
 
 
78
  def transcribe_core(path, model="whisper-1"):
79
+ print(f"\n{'='*50}\n開始轉錄: {path}\n{'='*50}")
80
+ start = time.time()
 
 
 
81
 
82
  if path.lower().endswith(".mp4"):
83
  fixed = path[:-4] + ".m4a"
 
88
  pass
89
 
90
  chunks = split_audio(path)
91
+ print(f"Whisper 轉錄 ({len(chunks)} 片段)")
92
  raw = []
93
  for i, c in enumerate(chunks, 1):
94
+ print(f"片段 {i}/{len(chunks)}")
95
  with open(c, "rb") as af:
96
  txt = client.audio.transcriptions.create(
97
  model=model, file=af, response_format="text"
98
  )
99
  raw.append(txt)
 
 
100
  raw_txt = "\n".join(raw)
 
101
 
102
+ print("簡轉繁")
103
  conv = client.chat.completions.create(
104
  model="gpt-4o-mini",
105
  messages=[
 
109
  temperature=0.0
110
  )
111
  trad = conv.choices[0].message.content.strip()
 
112
 
113
+ print("AI 摘要")
114
  summ = client.chat.completions.create(
115
  model="gpt-4o-mini",
116
  messages=[
 
119
  ],
120
  temperature=0.2
121
  )
 
 
 
 
 
 
122
 
123
+ print(f"✅ 完成! 耗時: {time.time()-start:.1f}秒\n{'='*50}\n")
124
+ return trad, summ.choices[0].message.content.strip()
125
 
126
  # ====== Gradio UI 函式 ======
127
  def transcribe_ui(password, file):
128
+ print(f"\n🌐 網頁版請求")
129
  if not password or password.strip() != PASSWORD:
130
  return "❌ Password incorrect", "", ""
131
  if not file:
132
+ return "⚠️ No file", "", ""
133
  try:
134
  path = _extract_effective_path(file)
135
  text, summary = transcribe_core(path)
136
+ return "✅ Completed", text, summary
137
  except Exception as e:
138
+ print(f"❌ 錯誤: {e}")
 
139
  return f"❌ Error: {e}", "", ""
140
 
141
+ # ====== FastAPI 應用 ======
142
  fastapi_app = FastAPI()
 
 
143
  fastapi_app.add_middleware(
144
  CORSMiddleware,
145
  allow_origins=["*"],
 
148
  allow_headers=["*"],
149
  )
150
 
 
151
  @fastapi_app.post("/api/transcribe")
152
+ async def api_transcribe(request: Request):
153
+ """同步 API 端點"""
 
 
 
 
 
 
 
 
 
154
  try:
155
  body = await request.json()
156
+ print(f"\n📱 API 請求")
 
 
 
157
 
158
  password = body.get("password", "")
159
  if password.strip() != PASSWORD:
 
160
  return JSONResponse(
161
  status_code=401,
162
  content={"status": "error", "error": "Password incorrect"}
 
166
  file_name = body.get("file_name", "recording.m4a")
167
 
168
  if not file_data or not file_data.startswith("data:"):
 
169
  return JSONResponse(
170
  status_code=400,
171
+ content={"status": "error", "error": "Invalid file data"}
172
  )
173
 
 
 
 
174
  file_dict = {"data": file_data, "orig_name": file_name}
175
  path = _extract_effective_path(file_dict)
 
 
176
  text, summary = transcribe_core(path)
177
 
178
  result = {
 
180
  "transcription": text,
181
  "summary": summary
182
  }
183
+ print(f"✅ API 完成\n{json.dumps(result, ensure_ascii=False, indent=2)}\n")
 
 
 
 
 
184
  return JSONResponse(content=result)
185
 
186
  except Exception as e:
187
  import traceback
188
+ print(f"❌ API 錯誤:\n{traceback.format_exc()}")
 
 
 
189
  return JSONResponse(
190
  status_code=500,
191
  content={"status": "error", "error": str(e)}
192
  )
193
 
194
  # ====== Gradio 介面 ======
195
+ demo = gr.Blocks(title="LINE Audio Transcription")
196
+
197
+ with demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  gr.Markdown("# 🎧 LINE Audio Transcription")
199
 
200
+ with gr.Tab("Web Upload"):
201
+ with gr.Column():
202
+ pw = gr.Textbox(label="Password", type="password", placeholder="Enter password")
203
+ audio = gr.Audio(label="Upload Audio", type="filepath", sources=["upload"])
204
+ btn = gr.Button("🚀 Start Transcription", variant="primary")
205
+ status = gr.Textbox(label="Status", interactive=False)
206
+ result = gr.Textbox(label="Transcription", lines=8, show_copy_button=True)
207
+ summary = gr.Textbox(label="Summary", lines=5, show_copy_button=True)
208
 
209
+ btn.click(transcribe_ui, inputs=[pw, audio], outputs=[status, result, summary])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ with gr.Tab("API Info"):
212
  gr.Markdown("""
213
+ ### iPhone Shortcuts Integration
214
 
215
  **Endpoint:** `POST /api/transcribe`
216
 
217
  **Request:**
218
  ```json
219
  {
220
+ "password": "chou",
221
  "file_data": "data:audio/m4a;base64,...",
222
  "file_name": "recording.m4a"
223
  }
 
234
 
235
  ---
236
 
237
+ Synchronous - returns directly
238
  ✅ No polling needed
239
  ✅ Works with any audio length
240
 
241
  ---
242
 
243
+ **Setup:**
 
244
  1. Get file → Audio
245
  2. Base64 encode
246
  3. Text: `data:audio/m4a;base64,[encoded]`
247
+ 4. Dictionary (3 text fields):
248
  - `password`: `chou`
249
  - `file_data`: Step 3
250
  - `file_name`: `recording.m4a`
251
+ 5. Get URL: `/api/transcribe` (POST, JSON)
252
+ 6. Extract `transcription` & `summary`
 
 
 
 
253
  """)
254
 
255
+ gr.Markdown("💡 **Formats:** MP4, M4A, MP3, WAV, OGG, WEBM | **Max:** 25MB/chunk")
 
 
 
256
 
257
+ # ====== 掛載與啟動 ======
258
  app = gr.mount_gradio_app(fastapi_app, demo, path="/")
259
 
 
260
  if __name__ == "__main__":
261
+ print("\n🚀 啟動應用")
262
+ print("📱 API: /api/transcribe")
263
+ print("🌐 Web: /\n")
 
 
264
  import uvicorn
265
  uvicorn.run(app, host="0.0.0.0", port=7860)