MichaelChou0806 commited on
Commit
ec64510
·
verified ·
1 Parent(s): ebd798f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -36
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import os, shutil, base64, uuid, mimetypes
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
@@ -20,77 +20,131 @@ MIME_EXT = {
20
  }
21
 
22
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
 
23
  try:
24
  header, b64 = data_url.split(",", 1)
25
  except ValueError:
 
26
  raise ValueError("data URL format error")
27
  mime = header.split(";")[0].split(":", 1)[-1].strip()
28
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
29
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
 
 
 
30
  with open(fname, "wb") as f:
31
  f.write(base64.b64decode(b64))
 
 
32
  return fname
33
 
34
  def _extract_effective_path(file_obj) -> str:
35
  """從各種格式中提取有效檔案路徑"""
 
 
 
 
36
  # 字串模式
37
  if isinstance(file_obj, str):
38
  s = file_obj.strip().strip('"')
 
39
  if s.startswith("data:"):
 
40
  return _dataurl_to_file(s, None)
41
  if os.path.isfile(s):
 
42
  return s
 
43
  # 字典模式
44
  if isinstance(file_obj, dict):
 
 
45
  data = file_obj.get("data")
46
  if isinstance(data, str) and data.startswith("data:"):
 
47
  return _dataurl_to_file(data, file_obj.get("orig_name"))
48
  p = str(file_obj.get("path") or "").strip().strip('"')
49
  if p and os.path.isfile(p):
 
50
  return p
 
51
  # 物件模式
 
52
  for attr in ("name", "path"):
53
  p = getattr(file_obj, attr, None)
54
  if isinstance(p, str):
55
  s = p.strip().strip('"')
56
  if os.path.isfile(s):
 
57
  return s
 
 
58
  raise FileNotFoundError("Cannot parse uploaded file")
59
 
60
  # ====== 分段處理 ======
61
  def split_audio(path):
 
62
  size = os.path.getsize(path)
 
63
  if size <= MAX_SIZE:
 
64
  return [path]
 
65
  audio = AudioSegment.from_file(path)
66
  n = int(size / MAX_SIZE) + 1
67
  chunk_ms = len(audio) / n
 
68
  parts = []
69
  for i in range(n):
70
  fn = f"chunk_{i+1}.wav"
71
  audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
 
72
  parts.append(fn)
73
  return parts
74
 
75
  # ====== 轉錄核心 ======
76
  def transcribe_core(path, model="whisper-1"):
 
 
 
 
 
 
 
77
  if path.lower().endswith(".mp4"):
 
78
  fixed = path[:-4] + ".m4a"
79
  try:
80
  shutil.copy(path, fixed)
81
  path = fixed
82
- except:
83
- pass
 
 
 
84
  chunks = split_audio(path)
 
 
 
85
  raw = []
86
- for c in chunks:
 
 
87
  with open(c, "rb") as af:
88
  txt = client.audio.transcriptions.create(
89
  model=model, file=af, response_format="text"
90
  )
91
  raw.append(txt)
 
 
 
 
92
  raw_txt = "\n".join(raw)
 
 
93
 
 
 
94
  conv = client.chat.completions.create(
95
  model="gpt-4o-mini",
96
  messages=[
@@ -100,7 +154,13 @@ def transcribe_core(path, model="whisper-1"):
100
  temperature=0.0
101
  )
102
  trad = conv.choices[0].message.content.strip()
 
 
 
 
103
 
 
 
104
  summ = client.chat.completions.create(
105
  model="gpt-4o-mini",
106
  messages=[
@@ -109,74 +169,124 @@ def transcribe_core(path, model="whisper-1"):
109
  ],
110
  temperature=0.2
111
  )
112
- return trad, summ.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  # ====== Gradio UI 函式 ======
115
  def transcribe_ui(password, file):
116
  """網頁版轉錄函式"""
117
- print(f"\n🎯 Web UI Request | Password: {password[:2] if password else ''}***")
 
 
 
 
 
118
  if not password or password.strip() != PASSWORD:
 
119
  return "❌ Password incorrect", "", ""
120
  if not file:
 
121
  return "⚠️ No file uploaded", "", ""
 
122
  try:
123
  path = _extract_effective_path(file)
 
124
  text, summary = transcribe_core(path)
 
125
  return "✅ Transcription completed", text, summary
126
  except Exception as e:
127
- print(f"❌ Error: {e}")
 
 
128
  return f"❌ Error: {e}", "", ""
129
 
130
- # ====== API 函式 (通過 Gradio 端點呼叫) ======
131
  def transcribe_api(password, file_data, file_name):
132
  """
133
  API 版本的轉錄函式
134
- 參數:
135
- - password: 密碼字串
136
- - file_data: data:audio/...;base64,... 格式的字串
137
- - file_name: 原始檔名
138
  """
139
- print(f"\n🎯 API Request | Password: {password[:2] if password else ''}***")
140
- print(f"📁 File data length: {len(file_data) if file_data else 0}")
141
- print(f"📁 File name: {file_name}")
 
 
 
 
 
142
 
143
  if not password or password.strip() != PASSWORD:
144
- return {
145
  "status": "error",
146
  "error": "Password incorrect",
147
  "transcription": "",
148
  "summary": ""
149
  }
 
 
 
150
 
151
  if not file_data or not file_data.startswith("data:"):
152
- return {
153
  "status": "error",
154
  "error": "Invalid file data format. Must be data:audio/...;base64,...",
155
  "transcription": "",
156
  "summary": ""
157
  }
 
 
 
158
 
159
  try:
160
  file_dict = {
161
  "data": file_data,
162
  "orig_name": file_name or "recording.m4a"
163
  }
 
164
  path = _extract_effective_path(file_dict)
 
 
 
165
  text, summary = transcribe_core(path)
166
- return {
 
167
  "status": "success",
168
  "transcription": text,
169
  "summary": summary
170
  }
 
 
 
 
 
 
 
 
 
171
  except Exception as e:
172
  import traceback
173
- print(f"❌ Error:\n{traceback.format_exc()}")
174
- return {
 
 
 
 
175
  "status": "error",
176
  "error": str(e),
177
  "transcription": "",
178
  "summary": ""
179
  }
 
 
180
 
181
  # ====== Gradio 介面 ======
182
  with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
@@ -224,7 +334,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
224
  gr.Markdown("""
225
  ### For iPhone Shortcuts & Automation
226
 
227
- This tab provides a Gradio-based API endpoint that accepts Base64-encoded audio.
228
  """)
229
 
230
  with gr.Row():
@@ -263,17 +373,14 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
263
  inputs=[pw_api, file_data_api, file_name_api],
264
  outputs=[result_api],
265
  api_name="transcribe",
266
- queue=False
267
  )
268
 
269
  gr.Markdown("""
270
  ---
271
- ### 📖 How to use with iPhone Shortcuts
272
 
273
- **Gradio API Endpoint**:
274
- ```
275
- POST /gradio_api/call/transcribe
276
- ```
277
 
278
  **Request Format (JSON)**:
279
  ```json
@@ -289,16 +396,13 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
289
  **Response Format**:
290
  ```json
291
  {
292
- "status": "success",
293
- "transcription": "轉錄內容...",
294
- "summary": "摘要內容..."
 
 
295
  }
296
  ```
297
-
298
- 💡 **Important**:
299
- - The endpoint is `/gradio_api/call/transcribe` (note: `call/transcribe`)
300
- - The `data` array must have exactly 3 items: [password, file_data, file_name]
301
- - Use `queue=false` parameter or set `api_name="transcribe"` in your request
302
  """)
303
 
304
  gr.Markdown("""
@@ -310,8 +414,11 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
310
 
311
  # ====== 啟動 ======
312
  if __name__ == "__main__":
 
 
 
313
  demo.launch(
314
  server_name="0.0.0.0",
315
  server_port=7860,
316
- show_api=True # 顯示 API 文件
317
  )
 
1
+ import os, shutil, base64, uuid, mimetypes, json, time
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
 
20
  }
21
 
22
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
23
+ print(f" → [_dataurl_to_file] 開始處理 data URL...")
24
  try:
25
  header, b64 = data_url.split(",", 1)
26
  except ValueError:
27
+ print(f" → [_dataurl_to_file] ❌ 錯誤: data URL 格式錯誤")
28
  raise ValueError("data URL format error")
29
  mime = header.split(";")[0].split(":", 1)[-1].strip()
30
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
31
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
32
+ print(f" → [_dataurl_to_file] MIME: {mime}, 副檔名: {ext}")
33
+ print(f" → [_dataurl_to_file] 目標檔名: {fname}")
34
+ print(f" → [_dataurl_to_file] Base64 長度: {len(b64)}")
35
  with open(fname, "wb") as f:
36
  f.write(base64.b64decode(b64))
37
+ file_size = os.path.getsize(fname)
38
+ print(f" → [_dataurl_to_file] ✅ 檔案已建立, 大小: {file_size} bytes")
39
  return fname
40
 
41
  def _extract_effective_path(file_obj) -> str:
42
  """從各種格式中提取有效檔案路徑"""
43
+ print(f"\n[_extract_effective_path] 開始解析檔案...")
44
+ print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
45
+ print(f"[_extract_effective_path] 收到內容前100字: {str(file_obj)[:100]}...")
46
+
47
  # 字串模式
48
  if isinstance(file_obj, str):
49
  s = file_obj.strip().strip('"')
50
+ print(f" → [模式 A] 字串模式")
51
  if s.startswith("data:"):
52
+ print(f" → [模式 A] 偵測到 data URL, 長度: {len(s)}")
53
  return _dataurl_to_file(s, None)
54
  if os.path.isfile(s):
55
+ print(f" → [模式 A] 找到檔案路徑: {s}")
56
  return s
57
+
58
  # 字典模式
59
  if isinstance(file_obj, dict):
60
+ print(f" → [模式 B] 字典模式")
61
+ print(f" → [模式 B] Keys: {list(file_obj.keys())}")
62
  data = file_obj.get("data")
63
  if isinstance(data, str) and data.startswith("data:"):
64
+ print(f" → [模式 B] 找到 data URL! 長度: {len(data)}")
65
  return _dataurl_to_file(data, file_obj.get("orig_name"))
66
  p = str(file_obj.get("path") or "").strip().strip('"')
67
  if p and os.path.isfile(p):
68
+ print(f" → [模式 B] 找到 path: {p}")
69
  return p
70
+
71
  # 物件模式
72
+ print(f" → [模式 C] 物件模式")
73
  for attr in ("name", "path"):
74
  p = getattr(file_obj, attr, None)
75
  if isinstance(p, str):
76
  s = p.strip().strip('"')
77
  if os.path.isfile(s):
78
+ print(f" → [模式 C] 找到屬性 {attr}: {s}")
79
  return s
80
+
81
+ print(f"[_extract_effective_path] ❌ 無法解析檔案")
82
  raise FileNotFoundError("Cannot parse uploaded file")
83
 
84
  # ====== 分段處理 ======
85
  def split_audio(path):
86
+ print(f"\n[split_audio] 檢查檔案大小...")
87
  size = os.path.getsize(path)
88
+ print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
89
  if size <= MAX_SIZE:
90
+ print(f"[split_audio] 檔案小於 25MB, 不需分割")
91
  return [path]
92
+ print(f"[split_audio] 檔案大於 25MB, 開始分割...")
93
  audio = AudioSegment.from_file(path)
94
  n = int(size / MAX_SIZE) + 1
95
  chunk_ms = len(audio) / n
96
+ print(f"[split_audio] 將分割成 {n} 個片段, 每段約 {chunk_ms/1000:.1f} 秒")
97
  parts = []
98
  for i in range(n):
99
  fn = f"chunk_{i+1}.wav"
100
  audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
101
+ print(f"[split_audio] 已產生片段 {i+1}/{n}: {fn}")
102
  parts.append(fn)
103
  return parts
104
 
105
  # ====== 轉錄核心 ======
106
  def transcribe_core(path, model="whisper-1"):
107
+ print(f"\n{'='*60}")
108
+ print(f"[transcribe_core] 開始轉錄流程")
109
+ print(f"[transcribe_core] 檔案路徑: {path}")
110
+ print(f"{'='*60}")
111
+
112
+ start_time = time.time()
113
+
114
  if path.lower().endswith(".mp4"):
115
+ print(f"[transcribe_core] 偵測到 .mp4 檔案, 轉換為 .m4a")
116
  fixed = path[:-4] + ".m4a"
117
  try:
118
  shutil.copy(path, fixed)
119
  path = fixed
120
+ print(f"[transcribe_core] ✅ 已轉換: {path}")
121
+ except Exception as e:
122
+ print(f"[transcribe_core] ⚠️ 轉換失敗: {e}")
123
+
124
+ print(f"\n[transcribe_core] === 步驟 1: 分割音檔 ===")
125
  chunks = split_audio(path)
126
+ print(f"[transcribe_core] 共 {len(chunks)} 個片段")
127
+
128
+ print(f"\n[transcribe_core] === 步驟 2: Whisper 轉錄 ===")
129
  raw = []
130
+ for i, c in enumerate(chunks, 1):
131
+ print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}: {c}")
132
+ chunk_start = time.time()
133
  with open(c, "rb") as af:
134
  txt = client.audio.transcriptions.create(
135
  model=model, file=af, response_format="text"
136
  )
137
  raw.append(txt)
138
+ chunk_time = time.time() - chunk_start
139
+ print(f"[transcribe_core] ✅ 片段 {i} 完成 (耗時 {chunk_time:.1f}秒)")
140
+ print(f"[transcribe_core] 片段 {i} 內容: {txt[:100]}...")
141
+
142
  raw_txt = "\n".join(raw)
143
+ print(f"\n[transcribe_core] 原始轉錄總長度: {len(raw_txt)} 字元")
144
+ print(f"[transcribe_core] 原始內容前200字: {raw_txt[:200]}...")
145
 
146
+ print(f"\n[transcribe_core] === 步驟 3: 簡轉繁 ===")
147
+ conv_start = time.time()
148
  conv = client.chat.completions.create(
149
  model="gpt-4o-mini",
150
  messages=[
 
154
  temperature=0.0
155
  )
156
  trad = conv.choices[0].message.content.strip()
157
+ conv_time = time.time() - conv_start
158
+ print(f"[transcribe_core] ✅ 繁體轉換完成 (耗時 {conv_time:.1f}秒)")
159
+ print(f"[transcribe_core] 繁體內容長度: {len(trad)} 字元")
160
+ print(f"[transcribe_core] 繁體內容前200字: {trad[:200]}...")
161
 
162
+ print(f"\n[transcribe_core] === 步驟 4: AI 摘要 ===")
163
+ summ_start = time.time()
164
  summ = client.chat.completions.create(
165
  model="gpt-4o-mini",
166
  messages=[
 
169
  ],
170
  temperature=0.2
171
  )
172
+ summary = summ.choices[0].message.content.strip()
173
+ summ_time = time.time() - summ_start
174
+ print(f"[transcribe_core] ✅ 摘要完成 (耗時 {summ_time:.1f}秒)")
175
+ print(f"[transcribe_core] 摘要內容: {summary}")
176
+
177
+ total_time = time.time() - start_time
178
+ print(f"\n{'='*60}")
179
+ print(f"[transcribe_core] ✅✅✅ 轉錄流程全部完成!")
180
+ print(f"[transcribe_core] 總耗時: {total_time:.1f} 秒")
181
+ print(f"{'='*60}\n")
182
+
183
+ return trad, summary
184
 
185
  # ====== Gradio UI 函式 ======
186
  def transcribe_ui(password, file):
187
  """網頁版轉錄函式"""
188
+ print(f"\n{'🌐'*30}")
189
+ print(f"🎯 [UI] 收到網頁版請求")
190
+ print(f"🔑 [UI] 密碼: {password[:2] if password else ''}*** (長度: {len(password) if password else 0})")
191
+ print(f"📁 [UI] 檔案類型: {type(file)}")
192
+ print(f"{'🌐'*30}")
193
+
194
  if not password or password.strip() != PASSWORD:
195
+ print(f"❌ [UI] 密碼驗證失敗")
196
  return "❌ Password incorrect", "", ""
197
  if not file:
198
+ print(f"❌ [UI] 未收到檔案")
199
  return "⚠️ No file uploaded", "", ""
200
+
201
  try:
202
  path = _extract_effective_path(file)
203
+ print(f"✅ [UI] 檔案解析成功: {path}")
204
  text, summary = transcribe_core(path)
205
+ print(f"✅ [UI] 轉錄完成, 準備返回結果")
206
  return "✅ Transcription completed", text, summary
207
  except Exception as e:
208
+ import traceback
209
+ error_trace = traceback.format_exc()
210
+ print(f"❌ [UI] 發生錯誤:\n{error_trace}")
211
  return f"❌ Error: {e}", "", ""
212
 
213
+ # ====== API 函式 ======
214
  def transcribe_api(password, file_data, file_name):
215
  """
216
  API 版本的轉錄函式
 
 
 
 
217
  """
218
+ print(f"\n{'📱'*30}")
219
+ print(f"🎯 [API] 收到 API 請求")
220
+ print(f"🔑 [API] 密碼: {password[:2] if password else ''}*** (長度: {len(password) if password else 0})")
221
+ print(f"📁 [API] file_data 類型: {type(file_data)}")
222
+ print(f"📁 [API] file_data 長度: {len(file_data) if file_data else 0}")
223
+ print(f"📁 [API] file_data 前50字: {str(file_data)[:50] if file_data else 'None'}...")
224
+ print(f"📁 [API] file_name: {file_name}")
225
+ print(f"{'📱'*30}")
226
 
227
  if not password or password.strip() != PASSWORD:
228
+ result = {
229
  "status": "error",
230
  "error": "Password incorrect",
231
  "transcription": "",
232
  "summary": ""
233
  }
234
+ print(f"❌ [API] 密碼驗證失敗")
235
+ print(f"[API] 返回結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
236
+ return result
237
 
238
  if not file_data or not file_data.startswith("data:"):
239
+ result = {
240
  "status": "error",
241
  "error": "Invalid file data format. Must be data:audio/...;base64,...",
242
  "transcription": "",
243
  "summary": ""
244
  }
245
+ print(f"❌ [API] 檔案格式錯誤")
246
+ print(f"[API] 返回結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
247
+ return result
248
 
249
  try:
250
  file_dict = {
251
  "data": file_data,
252
  "orig_name": file_name or "recording.m4a"
253
  }
254
+ print(f"[API] 開始解析檔案...")
255
  path = _extract_effective_path(file_dict)
256
+ print(f"✅ [API] 檔案解析成功: {path}")
257
+
258
+ print(f"[API] 開始轉錄流程...")
259
  text, summary = transcribe_core(path)
260
+
261
+ result = {
262
  "status": "success",
263
  "transcription": text,
264
  "summary": summary
265
  }
266
+ print(f"\n{'✅'*30}")
267
+ print(f"✅✅✅ [API] 全部完成!")
268
+ print(f"[API] 轉錄長度: {len(text)} 字元")
269
+ print(f"[API] 摘要長度: {len(summary)} 字元")
270
+ print(f"[API] 返回結果:")
271
+ print(json.dumps(result, ensure_ascii=False, indent=2))
272
+ print(f"{'✅'*30}\n")
273
+ return result
274
+
275
  except Exception as e:
276
  import traceback
277
+ error_trace = traceback.format_exc()
278
+ print(f"\n{'❌'*30}")
279
+ print(f"❌ [API] 發生錯誤:")
280
+ print(error_trace)
281
+ print(f"{'❌'*30}\n")
282
+ result = {
283
  "status": "error",
284
  "error": str(e),
285
  "transcription": "",
286
  "summary": ""
287
  }
288
+ print(f"[API] 返回錯誤結果: {json.dumps(result, ensure_ascii=False, indent=2)}")
289
+ return result
290
 
291
  # ====== Gradio 介面 ======
292
  with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
 
334
  gr.Markdown("""
335
  ### For iPhone Shortcuts & Automation
336
 
337
+ Test the API endpoint here before using in iPhone Shortcuts.
338
  """)
339
 
340
  with gr.Row():
 
373
  inputs=[pw_api, file_data_api, file_name_api],
374
  outputs=[result_api],
375
  api_name="transcribe",
376
+ queue=False # 🔴 關鍵: 禁用 queue
377
  )
378
 
379
  gr.Markdown("""
380
  ---
381
+ ### 📖 iPhone Shortcuts Configuration
382
 
383
+ **Endpoint**: `/gradio_api/call/transcribe`
 
 
 
384
 
385
  **Request Format (JSON)**:
386
  ```json
 
396
  **Response Format**:
397
  ```json
398
  {
399
+ "data": {
400
+ "status": "success",
401
+ "transcription": "轉錄內容...",
402
+ "summary": "摘要..."
403
+ }
404
  }
405
  ```
 
 
 
 
 
406
  """)
407
 
408
  gr.Markdown("""
 
414
 
415
  # ====== 啟動 ======
416
  if __name__ == "__main__":
417
+ print("\n" + "="*60)
418
+ print("準備啟動 Gradio 應用...")
419
+ print("="*60 + "\n")
420
  demo.launch(
421
  server_name="0.0.0.0",
422
  server_port=7860,
423
+ show_api=True
424
  )