MichaelChou0806 commited on
Commit
d339fc0
·
verified ·
1 Parent(s): 7ecf1a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -95
app.py CHANGED
@@ -7,7 +7,7 @@ from fastapi.responses import JSONResponse
7
  from fastapi.middleware.cors import CORSMiddleware
8
 
9
  # ====== 基本設定 ======
10
- PASSWORD = os.getenv("APP_PASSWORD") # 從環境變數讀取,不設預設值
11
  if not PASSWORD:
12
  raise ValueError("APP_PASSWORD environment variable is not set!")
13
 
@@ -43,11 +43,10 @@ def _extract_effective_path(file_obj) -> str:
43
  print(f"[DEBUG] 檔案物件類型: {type(file_obj)}")
44
  print(f"[DEBUG] 檔案物件內容: {file_obj}")
45
 
46
- # 處理 None
47
  if file_obj is None:
48
  raise FileNotFoundError("File object is None")
49
 
50
- # 如果是字串路徑
51
  if isinstance(file_obj, str):
52
  s = file_obj.strip().strip('"')
53
  print(f"[DEBUG] 字串路徑: {s}")
@@ -56,44 +55,44 @@ def _extract_effective_path(file_obj) -> str:
56
  if os.path.isfile(s):
57
  return s
58
 
59
- # 如果是字典
60
  if isinstance(file_obj, dict):
61
  print(f"[DEBUG] 字典 keys: {list(file_obj.keys())}")
62
 
63
- # 嘗試 data URL
64
  data = file_obj.get("data")
65
  if isinstance(data, str) and data.startswith("data:"):
66
  return _dataurl_to_file(data, file_obj.get("orig_name"))
67
 
68
- # 嘗試 path
69
  for key in ["path", "name", "file", "filepath"]:
70
  p = file_obj.get(key)
71
  if p and isinstance(p, str):
72
  p = p.strip().strip('"')
73
  if os.path.isfile(p):
74
- print(f"[DEBUG] 找到有效路徑 (key={key}): {p}")
75
  return p
76
 
77
- # 如果是物件,嘗試獲取屬性
78
  for attr in ["name", "path", "file", "filepath"]:
79
  if hasattr(file_obj, attr):
80
  p = getattr(file_obj, attr, None)
81
  if p and isinstance(p, str):
82
  p = p.strip().strip('"')
83
  if os.path.isfile(p):
84
- print(f"[DEBUG] 找到有效路徑 (attr={attr}): {p}")
85
  return p
86
 
87
- # 最後嘗試:直接當作路徑字串
88
  try:
89
  path_str = str(file_obj).strip().strip('"')
90
  if os.path.isfile(path_str):
91
- print(f"[DEBUG] 直接轉換為路徑: {path_str}")
92
  return path_str
93
  except:
94
  pass
95
 
96
- raise FileNotFoundError(f"Cannot parse uploaded file: {type(file_obj)} - {file_obj}")
97
 
98
  def split_audio(path):
99
  """將音訊檔案分割成多個小於 25MB 的片段"""
@@ -119,7 +118,7 @@ def transcribe_core(path, model="whisper-1"):
119
 
120
  start_time = time.time()
121
 
122
- # 處理 MP4 格式
123
  if path.lower().endswith(".mp4"):
124
  fixed = path[:-4] + ".m4a"
125
  try:
@@ -164,15 +163,15 @@ def transcribe_core(path, model="whisper-1"):
164
  summary = summ.choices[0].message.content.strip()
165
 
166
  total_time = time.time() - start_time
167
- print(f"[transcribe_core] ✅ 全部完成! 總耗時: {total_time:.1f}秒\n")
168
 
169
  return trad, summary
170
 
171
  # ====== Gradio UI 函式 ======
172
  def transcribe_web(password, audio_file):
173
- """網頁版轉錄處理 - 必須返回三個值"""
174
  print(f"\n{'='*60}")
175
- print(f"🌐 [WEB] 收到網頁請求")
176
  print(f"密碼: {'已提供' if password else '未提供'}")
177
  print(f"檔案: {audio_file}")
178
  print(f"{'='*60}")
@@ -183,35 +182,35 @@ def transcribe_web(password, audio_file):
183
  return "❌ Please enter password", "", ""
184
 
185
  if password.strip() != PASSWORD:
186
- print(f"[WEB] ❌ 密碼錯誤: '{password}' != '{PASSWORD}'")
187
  return "❌ Incorrect password", "", ""
188
 
189
  # 檢查檔案
190
  if not audio_file:
191
  print("[WEB] ❌ 未上傳檔案")
192
- return "⚠️ Please upload an audio file", "", ""
193
 
194
  try:
195
  # 處理檔案
196
- print(f"[WEB] 開始處理檔案...")
197
  path = _extract_effective_path(audio_file)
198
- print(f"[WEB] ✅ 檔案路徑: {path}")
199
 
200
  # 轉錄
201
  print(f"[WEB] 開始轉錄...")
202
  text, summary = transcribe_core(path)
203
 
204
- # 統計資訊
205
  char_count = len(text)
206
- status = f"✅ Completed! ({char_count} characters)"
207
 
208
- print(f"[WEB] ✅ 轉錄成功\n")
209
  return status, text, summary
210
 
211
  except Exception as e:
212
  import traceback
213
  error_msg = traceback.format_exc()
214
- print(f"❌ [WEB] 發生錯誤:\n{error_msg}\n")
215
  return f"❌ Error: {str(e)}", "", ""
216
 
217
  # ====== FastAPI 應用 ======
@@ -231,7 +230,7 @@ async def api_transcribe(request: Request):
231
  try:
232
  body = await request.json()
233
  print(f"\n{'='*60}")
234
- print(f"📱 [API] 收到 API 請求")
235
  print(f"{'='*60}")
236
 
237
  # 驗證密碼
@@ -243,7 +242,7 @@ async def api_transcribe(request: Request):
243
  content={"status": "error", "error": "Password incorrect"}
244
  )
245
 
246
- # 檢查檔案資料
247
  file_data = body.get("file_data", "")
248
  file_name = body.get("file_name", "recording.m4a")
249
 
@@ -251,13 +250,13 @@ async def api_transcribe(request: Request):
251
  print(f"[API] ❌ 檔案格式錯誤")
252
  return JSONResponse(
253
  status_code=400,
254
- content={"status": "error", "error": "Invalid file data format"}
255
  )
256
 
257
  # 處理檔案
258
  file_dict = {"data": file_data, "orig_name": file_name}
259
  path = _extract_effective_path(file_dict)
260
- print(f"[API] ✅ 檔案解析成功: {path}")
261
 
262
  # 轉錄
263
  text, summary = transcribe_core(path)
@@ -268,86 +267,123 @@ async def api_transcribe(request: Request):
268
  "summary": summary
269
  }
270
 
271
- print(f"[API] ✅ 轉錄成功\n")
272
  return JSONResponse(content=result)
273
 
274
  except Exception as e:
275
  import traceback
276
  error_trace = traceback.format_exc()
277
- print(f"❌ [API] 發生錯誤:\n{error_trace}\n")
278
  return JSONResponse(
279
  status_code=500,
280
  content={"status": "error", "error": str(e)}
281
  )
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  # ====== Gradio 介面 ======
284
- with gr.Blocks(title="Audio Transcription", theme=gr.themes.Soft()) as demo:
 
 
 
 
285
 
286
- gr.Markdown("""
287
- # 🎧 Audio Transcription Service
288
- ### AI-Powered Speech-to-Text with Summarization
289
- """)
290
 
291
- with gr.Row():
292
- with gr.Column(scale=1):
293
- gr.Markdown("### 📤 Upload")
294
-
295
- password_input = gr.Textbox(
296
- label="Password",
297
- type="password",
298
- placeholder="Enter password",
299
- max_lines=1
300
- )
301
-
302
- audio_input = gr.File(
303
- label="Audio File",
304
- file_types=["audio/*", ".mp3", ".m4a", ".wav", ".ogg", ".webm", ".mp4"],
305
- file_count="single",
306
- type="filepath"
307
- )
308
-
309
- submit_btn = gr.Button(
310
- "🚀 Start Transcription",
311
- variant="primary",
312
- size="lg"
313
- )
314
-
315
- gr.Markdown("""
316
- **Supported formats:**
317
- MP3, M4A, WAV, OGG, WEBM, MP4
318
-
319
- **Processing:**
320
- Automatic chunking for large files
321
- """)
322
-
323
- with gr.Column(scale=2):
324
- gr.Markdown("### 📊 Results")
325
-
326
- status_output = gr.Textbox(
327
- label="Status",
328
- interactive=False,
329
- lines=1,
330
- max_lines=2
331
- )
332
-
333
- transcription_output = gr.Textbox(
334
- label="Transcription",
335
- lines=12,
336
- interactive=True,
337
- show_copy_button=True
338
- )
339
-
340
- summary_output = gr.Textbox(
341
- label="Summary",
342
- lines=6,
343
- interactive=True,
344
- show_copy_button=True
345
- )
346
 
347
- gr.Markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  gr.Markdown("""
350
- ## 📱 API Integration
351
 
352
  **Endpoint:** `POST /api/transcribe`
353
 
@@ -370,12 +406,11 @@ with gr.Blocks(title="Audio Transcription", theme=gr.themes.Soft()) as demo:
370
  ```
371
  """)
372
 
373
- # 事件綁定 - 這是關鍵!
374
  submit_btn.click(
375
  fn=transcribe_web,
376
  inputs=[password_input, audio_input],
377
- outputs=[status_output, transcription_output, summary_output],
378
- api_name="transcribe"
379
  )
380
 
381
  # ====== 掛載到 FastAPI ======
 
7
  from fastapi.middleware.cors import CORSMiddleware
8
 
9
  # ====== 基本設定 ======
10
+ PASSWORD = os.getenv("APP_PASSWORD")
11
  if not PASSWORD:
12
  raise ValueError("APP_PASSWORD environment variable is not set!")
13
 
 
43
  print(f"[DEBUG] 檔案物件類型: {type(file_obj)}")
44
  print(f"[DEBUG] 檔案物件內容: {file_obj}")
45
 
 
46
  if file_obj is None:
47
  raise FileNotFoundError("File object is None")
48
 
49
+ # 字串路徑
50
  if isinstance(file_obj, str):
51
  s = file_obj.strip().strip('"')
52
  print(f"[DEBUG] 字串路徑: {s}")
 
55
  if os.path.isfile(s):
56
  return s
57
 
58
+ # 字典格式
59
  if isinstance(file_obj, dict):
60
  print(f"[DEBUG] 字典 keys: {list(file_obj.keys())}")
61
 
62
+ # data URL
63
  data = file_obj.get("data")
64
  if isinstance(data, str) and data.startswith("data:"):
65
  return _dataurl_to_file(data, file_obj.get("orig_name"))
66
 
67
+ # 路徑
68
  for key in ["path", "name", "file", "filepath"]:
69
  p = file_obj.get(key)
70
  if p and isinstance(p, str):
71
  p = p.strip().strip('"')
72
  if os.path.isfile(p):
73
+ print(f"[DEBUG] 找到路徑 (key={key}): {p}")
74
  return p
75
 
76
+ # 物件屬性
77
  for attr in ["name", "path", "file", "filepath"]:
78
  if hasattr(file_obj, attr):
79
  p = getattr(file_obj, attr, None)
80
  if p and isinstance(p, str):
81
  p = p.strip().strip('"')
82
  if os.path.isfile(p):
83
+ print(f"[DEBUG] 找到路徑 (attr={attr}): {p}")
84
  return p
85
 
86
+ # 直接轉換
87
  try:
88
  path_str = str(file_obj).strip().strip('"')
89
  if os.path.isfile(path_str):
90
+ print(f"[DEBUG] 直接路徑: {path_str}")
91
  return path_str
92
  except:
93
  pass
94
 
95
+ raise FileNotFoundError(f"Cannot parse file: {type(file_obj)} - {file_obj}")
96
 
97
  def split_audio(path):
98
  """將音訊檔案分割成多個小於 25MB 的片段"""
 
118
 
119
  start_time = time.time()
120
 
121
+ # 處理 MP4
122
  if path.lower().endswith(".mp4"):
123
  fixed = path[:-4] + ".m4a"
124
  try:
 
163
  summary = summ.choices[0].message.content.strip()
164
 
165
  total_time = time.time() - start_time
166
+ print(f"[transcribe_core] ✅ 完成! 耗時: {total_time:.1f}秒\n")
167
 
168
  return trad, summary
169
 
170
  # ====== Gradio UI 函式 ======
171
  def transcribe_web(password, audio_file):
172
+ """網頁版轉錄處理"""
173
  print(f"\n{'='*60}")
174
+ print(f"🌐 [WEB] 收到請求")
175
  print(f"密碼: {'已提供' if password else '未提供'}")
176
  print(f"檔案: {audio_file}")
177
  print(f"{'='*60}")
 
182
  return "❌ Please enter password", "", ""
183
 
184
  if password.strip() != PASSWORD:
185
+ print(f"[WEB] ❌ 密碼錯誤")
186
  return "❌ Incorrect password", "", ""
187
 
188
  # 檢查檔案
189
  if not audio_file:
190
  print("[WEB] ❌ 未上傳檔案")
191
+ return "⚠️ Please upload audio file", "", ""
192
 
193
  try:
194
  # 處理檔案
195
+ print(f"[WEB] 處理檔案...")
196
  path = _extract_effective_path(audio_file)
197
+ print(f"[WEB] ✅ 檔案: {path}")
198
 
199
  # 轉錄
200
  print(f"[WEB] 開始轉錄...")
201
  text, summary = transcribe_core(path)
202
 
203
+ # 統計
204
  char_count = len(text)
205
+ status = f"✅ Completed! ({char_count} chars)"
206
 
207
+ print(f"[WEB] ✅ 成功\n")
208
  return status, text, summary
209
 
210
  except Exception as e:
211
  import traceback
212
  error_msg = traceback.format_exc()
213
+ print(f"❌ [WEB] 錯誤:\n{error_msg}\n")
214
  return f"❌ Error: {str(e)}", "", ""
215
 
216
  # ====== FastAPI 應用 ======
 
230
  try:
231
  body = await request.json()
232
  print(f"\n{'='*60}")
233
+ print(f"📱 [API] 收到請求")
234
  print(f"{'='*60}")
235
 
236
  # 驗證密碼
 
242
  content={"status": "error", "error": "Password incorrect"}
243
  )
244
 
245
+ # 檢查檔案
246
  file_data = body.get("file_data", "")
247
  file_name = body.get("file_name", "recording.m4a")
248
 
 
250
  print(f"[API] ❌ 檔案格式錯誤")
251
  return JSONResponse(
252
  status_code=400,
253
+ content={"status": "error", "error": "Invalid file format"}
254
  )
255
 
256
  # 處理檔案
257
  file_dict = {"data": file_data, "orig_name": file_name}
258
  path = _extract_effective_path(file_dict)
259
+ print(f"[API] ✅ 檔案: {path}")
260
 
261
  # 轉錄
262
  text, summary = transcribe_core(path)
 
267
  "summary": summary
268
  }
269
 
270
+ print(f"[API] ✅ 成功\n")
271
  return JSONResponse(content=result)
272
 
273
  except Exception as e:
274
  import traceback
275
  error_trace = traceback.format_exc()
276
+ print(f"❌ [API] 錯誤:\n{error_trace}\n")
277
  return JSONResponse(
278
  status_code=500,
279
  content={"status": "error", "error": str(e)}
280
  )
281
 
282
+ # ====== 手機優化 CSS ======
283
+ mobile_css = """
284
+ /* 基本響應式 */
285
+ .gradio-container {
286
+ max-width: 100% !important;
287
+ padding: 0.5rem !important;
288
+ }
289
+
290
+ /* 手機優化 */
291
+ @media (max-width: 768px) {
292
+ .gradio-container {
293
+ padding: 0.25rem !important;
294
+ }
295
+
296
+ /* 標題縮小 */
297
+ h1 {
298
+ font-size: 1.5rem !important;
299
+ }
300
+
301
+ h3 {
302
+ font-size: 1.1rem !important;
303
+ }
304
+
305
+ /* 按鈕加大點擊區域 */
306
+ button {
307
+ min-height: 44px !important;
308
+ font-size: 1rem !important;
309
+ }
310
+
311
+ /* 輸入框 */
312
+ input, textarea {
313
+ font-size: 16px !important; /* 防止手機自動縮放 */
314
+ }
315
+
316
+ /* 行布局改為列布局 */
317
+ .row {
318
+ flex-direction: column !important;
319
+ }
320
+
321
+ .column {
322
+ width: 100% !important;
323
+ max-width: 100% !important;
324
+ }
325
+ }
326
+
327
+ /* 確保文字可選取和複製 */
328
+ textarea {
329
+ user-select: text !important;
330
+ -webkit-user-select: text !important;
331
+ }
332
+ """
333
+
334
  # ====== Gradio 介面 ======
335
+ with gr.Blocks(
336
+ title="Audio Transcription",
337
+ theme=gr.themes.Soft(),
338
+ css=mobile_css
339
+ ) as demo:
340
 
341
+ gr.Markdown("# 🎧 Audio Transcription")
342
+ gr.Markdown("AI-Powered Speech-to-Text")
 
 
343
 
344
+ # 密碼輸入
345
+ password_input = gr.Textbox(
346
+ label="Password",
347
+ type="password",
348
+ placeholder="Enter password"
349
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
+ # 檔案上傳 - 使用最基本的 File 組件
352
+ audio_input = gr.File(
353
+ label="Audio File (MP3, M4A, WAV, etc.)",
354
+ type="filepath"
355
+ )
356
+
357
+ # 提交按鈕
358
+ submit_btn = gr.Button(
359
+ "🚀 Start Transcription",
360
+ variant="primary",
361
+ size="lg"
362
+ )
363
+
364
+ # 狀態顯示
365
+ status_output = gr.Textbox(
366
+ label="Status",
367
+ interactive=False
368
+ )
369
 
370
+ # 轉錄結果
371
+ transcription_output = gr.Textbox(
372
+ label="Transcription",
373
+ lines=10,
374
+ max_lines=20
375
+ )
376
+
377
+ # 摘要
378
+ summary_output = gr.Textbox(
379
+ label="Summary",
380
+ lines=5,
381
+ max_lines=10
382
+ )
383
+
384
+ gr.Markdown("---")
385
  gr.Markdown("""
386
+ ### 📱 API Integration
387
 
388
  **Endpoint:** `POST /api/transcribe`
389
 
 
406
  ```
407
  """)
408
 
409
+ # 事件綁定
410
  submit_btn.click(
411
  fn=transcribe_web,
412
  inputs=[password_input, audio_input],
413
+ outputs=[status_output, transcription_output, summary_output]
 
414
  )
415
 
416
  # ====== 掛載到 FastAPI ======