MichaelChou0806 commited on
Commit
a433c1d
·
verified ·
1 Parent(s): e402a2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -63
app.py CHANGED
@@ -14,7 +14,7 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
14
  print("===== 🚀 啟動中 =====")
15
  print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
16
 
17
- # ====== 工具函式 ======
18
  MIME_EXT = {
19
  "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
20
  "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
@@ -23,7 +23,7 @@ MIME_EXT = {
23
  }
24
 
25
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
26
- print(f" → 處理 data URL, 長度: {len(data_url)}")
27
  try:
28
  header, b64 = data_url.split(",", 1)
29
  except ValueError:
@@ -31,43 +31,60 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
31
  mime = header.split(";")[0].split(":", 1)[-1].strip()
32
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
33
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
 
34
  with open(fname, "wb") as f:
35
  f.write(base64.b64decode(b64))
36
- print(f" → 檔案建立: {fname}, {os.path.getsize(fname)} bytes")
 
37
  return fname
38
 
39
  def _extract_effective_path(file_obj) -> str:
40
- print(f"解析檔案, 類型: {type(file_obj)}")
 
 
 
41
  if isinstance(file_obj, str):
42
  s = file_obj.strip().strip('"')
43
  if s.startswith("data:"):
 
44
  return _dataurl_to_file(s, None)
45
  if os.path.isfile(s):
 
46
  return s
 
 
47
  if isinstance(file_obj, dict):
 
48
  data = file_obj.get("data")
49
  if isinstance(data, str) and data.startswith("data:"):
 
50
  return _dataurl_to_file(data, file_obj.get("orig_name"))
51
  p = str(file_obj.get("path") or "").strip().strip('"')
52
  if p and os.path.isfile(p):
53
  return p
 
 
54
  for attr in ("name", "path"):
55
  p = getattr(file_obj, attr, None)
56
  if isinstance(p, str):
57
  s = p.strip().strip('"')
58
  if os.path.isfile(s):
59
  return s
60
- raise FileNotFoundError("Cannot parse file")
 
61
 
 
62
  def split_audio(path):
63
  size = os.path.getsize(path)
64
- print(f"檔案大小: {size/1024/1024:.2f} MB")
65
  if size <= MAX_SIZE:
 
66
  return [path]
 
67
  audio = AudioSegment.from_file(path)
68
  n = int(size / MAX_SIZE) + 1
69
  chunk_ms = len(audio) / n
70
- print(f"分割成 {n} 個片段")
71
  parts = []
72
  for i in range(n):
73
  fn = f"chunk_{i+1}.wav"
@@ -75,9 +92,13 @@ def split_audio(path):
75
  parts.append(fn)
76
  return parts
77
 
 
78
  def transcribe_core(path, model="whisper-1"):
79
- print(f"\n{'='*50}\n開始轉錄: {path}\n{'='*50}")
80
- start = time.time()
 
 
 
81
 
82
  if path.lower().endswith(".mp4"):
83
  fixed = path[:-4] + ".m4a"
@@ -88,18 +109,21 @@ def transcribe_core(path, model="whisper-1"):
88
  pass
89
 
90
  chunks = split_audio(path)
91
- print(f"Whisper 轉錄 ({len(chunks)} 片段)")
92
  raw = []
93
  for i, c in enumerate(chunks, 1):
94
- print(f"片段 {i}/{len(chunks)}")
95
  with open(c, "rb") as af:
96
  txt = client.audio.transcriptions.create(
97
  model=model, file=af, response_format="text"
98
  )
99
  raw.append(txt)
 
 
100
  raw_txt = "\n".join(raw)
 
101
 
102
- print("簡轉繁")
103
  conv = client.chat.completions.create(
104
  model="gpt-4o-mini",
105
  messages=[
@@ -109,8 +133,9 @@ def transcribe_core(path, model="whisper-1"):
109
  temperature=0.0
110
  )
111
  trad = conv.choices[0].message.content.strip()
 
112
 
113
- print("AI 摘要")
114
  summ = client.chat.completions.create(
115
  model="gpt-4o-mini",
116
  messages=[
@@ -119,27 +144,35 @@ def transcribe_core(path, model="whisper-1"):
119
  ],
120
  temperature=0.2
121
  )
 
122
 
123
- print(f"✅ 完成! 耗時: {time.time()-start:.1f}秒\n{'='*50}\n")
124
- return trad, summ.choices[0].message.content.strip()
 
 
 
 
125
 
126
  # ====== Gradio UI 函式 ======
127
  def transcribe_ui(password, file):
128
- print(f"\n🌐 網頁版請求")
129
  if not password or password.strip() != PASSWORD:
130
  return "❌ Password incorrect", "", ""
131
  if not file:
132
- return "⚠️ No file", "", ""
133
  try:
134
  path = _extract_effective_path(file)
135
  text, summary = transcribe_core(path)
136
- return "✅ Completed", text, summary
137
  except Exception as e:
138
- print(f"❌ 錯誤: {e}")
 
139
  return f"❌ Error: {e}", "", ""
140
 
141
- # ====== FastAPI 應用 ======
142
  fastapi_app = FastAPI()
 
 
143
  fastapi_app.add_middleware(
144
  CORSMiddleware,
145
  allow_origins=["*"],
@@ -148,15 +181,29 @@ fastapi_app.add_middleware(
148
  allow_headers=["*"],
149
  )
150
 
 
151
  @fastapi_app.post("/api/transcribe")
152
- async def api_transcribe(request: Request):
153
- """同步 API 端點"""
 
 
 
 
 
 
 
 
 
154
  try:
155
  body = await request.json()
156
- print(f"\n📱 API 請求")
 
 
 
157
 
158
  password = body.get("password", "")
159
  if password.strip() != PASSWORD:
 
160
  return JSONResponse(
161
  status_code=401,
162
  content={"status": "error", "error": "Password incorrect"}
@@ -166,13 +213,19 @@ async def api_transcribe(request: Request):
166
  file_name = body.get("file_name", "recording.m4a")
167
 
168
  if not file_data or not file_data.startswith("data:"):
 
169
  return JSONResponse(
170
  status_code=400,
171
- content={"status": "error", "error": "Invalid file data"}
172
  )
173
 
 
 
 
174
  file_dict = {"data": file_data, "orig_name": file_name}
175
  path = _extract_effective_path(file_dict)
 
 
176
  text, summary = transcribe_core(path)
177
 
178
  result = {
@@ -180,83 +233,138 @@ async def api_transcribe(request: Request):
180
  "transcription": text,
181
  "summary": summary
182
  }
183
- print(f"✅ API 完成\n{json.dumps(result, ensure_ascii=False, indent=2)}\n")
 
 
 
 
 
184
  return JSONResponse(content=result)
185
 
186
  except Exception as e:
187
  import traceback
188
- print(f"❌ API 錯誤:\n{traceback.format_exc()}")
 
 
 
189
  return JSONResponse(
190
  status_code=500,
191
  content={"status": "error", "error": str(e)}
192
  )
193
 
194
  # ====== Gradio 介面 ======
195
- with gr.Blocks(title="LINE Audio Transcription") as demo:
196
- gr.Markdown("# 🎧 LINE Audio Transcription")
197
 
198
- with gr.Tab("Web Upload"):
199
- pw = gr.Textbox(label="Password", type="password", placeholder="Enter password")
200
- audio_file = gr.File(label="Upload Audio", file_types=["audio"])
201
- btn = gr.Button("🚀 Start Transcription", variant="primary")
202
- status = gr.Textbox(label="Status", interactive=False)
203
- result = gr.Textbox(label="Transcription", lines=8, show_copy_button=True)
204
- summary = gr.Textbox(label="Summary", lines=5, show_copy_button=True)
 
 
 
 
205
 
206
- btn.click(transcribe_ui, inputs=[pw, audio_file], outputs=[status, result, summary])
207
 
208
- with gr.Tab("API Info"):
209
  gr.Markdown("""
210
- ### iPhone Shortcuts Integration
 
 
211
 
212
- **Endpoint:** `POST /api/transcribe`
 
 
 
 
213
 
214
- **Request:**
215
  ```json
216
  {
217
- "password": "chou",
218
- "file_data": "data:audio/m4a;base64,...",
219
  "file_name": "recording.m4a"
220
  }
221
  ```
222
 
223
- **Response:**
224
  ```json
225
  {
226
  "status": "success",
227
- "transcription": "...",
228
- "summary": "..."
229
  }
230
  ```
231
 
232
  ---
233
 
234
- Synchronous - returns directly
235
- ✅ No polling needed
236
- ✅ Works with any audio length
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
  ---
239
 
240
- **Setup:**
241
- 1. Get file → Audio
242
- 2. Base64 encode
243
- 3. Text: `data:audio/m4a;base64,[encoded]`
244
- 4. Dictionary (3 text fields):
245
- - `password`: `chou`
246
- - `file_data`: Step 3
247
- - `file_name`: `recording.m4a`
248
- 5. Get URL: `/api/transcribe` (POST, JSON)
249
- 6. Extract `transcription` & `summary`
 
 
 
 
 
 
 
 
 
250
  """)
251
 
252
- gr.Markdown("💡 **Formats:** MP4, M4A, MP3, WAV, OGG, WEBM | **Max:** 25MB/chunk")
 
 
 
 
 
253
 
254
- # ====== 掛載與啟動 ======
255
  app = gr.mount_gradio_app(fastapi_app, demo, path="/")
256
 
 
257
  if __name__ == "__main__":
258
- print("\n🚀 啟動應用")
259
- print("📱 API: /api/transcribe")
260
- print("🌐 Web: /\n")
 
 
261
  import uvicorn
262
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
14
  print("===== 🚀 啟動中 =====")
15
  print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
16
 
17
+ # ====== 工具:把 data:URL 轉成臨時檔 ======
18
  MIME_EXT = {
19
  "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
20
  "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
 
23
  }
24
 
25
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
26
+ print(f" → [_dataurl_to_file] 開始處理 data URL...")
27
  try:
28
  header, b64 = data_url.split(",", 1)
29
  except ValueError:
 
31
  mime = header.split(";")[0].split(":", 1)[-1].strip()
32
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
33
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
34
+ print(f" → [_dataurl_to_file] 檔名: {fname}, Base64長度: {len(b64)}")
35
  with open(fname, "wb") as f:
36
  f.write(base64.b64decode(b64))
37
+ file_size = os.path.getsize(fname)
38
+ print(f" → [_dataurl_to_file] ✅ 檔案已建立, 大小: {file_size} bytes")
39
  return fname
40
 
41
  def _extract_effective_path(file_obj) -> str:
42
+ """從各種格式中提取有效檔案路徑"""
43
+ print(f"[_extract_effective_path] 收到類型: {type(file_obj)}")
44
+
45
+ # 字串模式
46
  if isinstance(file_obj, str):
47
  s = file_obj.strip().strip('"')
48
  if s.startswith("data:"):
49
+ print(f" → 偵測到 data URL")
50
  return _dataurl_to_file(s, None)
51
  if os.path.isfile(s):
52
+ print(f" → 找到檔案路徑: {s}")
53
  return s
54
+
55
+ # 字典模式
56
  if isinstance(file_obj, dict):
57
+ print(f" → 字典模式, Keys: {list(file_obj.keys())}")
58
  data = file_obj.get("data")
59
  if isinstance(data, str) and data.startswith("data:"):
60
+ print(f" → 找到 data URL")
61
  return _dataurl_to_file(data, file_obj.get("orig_name"))
62
  p = str(file_obj.get("path") or "").strip().strip('"')
63
  if p and os.path.isfile(p):
64
  return p
65
+
66
+ # 物件模式
67
  for attr in ("name", "path"):
68
  p = getattr(file_obj, attr, None)
69
  if isinstance(p, str):
70
  s = p.strip().strip('"')
71
  if os.path.isfile(s):
72
  return s
73
+
74
+ raise FileNotFoundError("Cannot parse uploaded file")
75
 
76
+ # ====== 分段處理 ======
77
  def split_audio(path):
78
  size = os.path.getsize(path)
79
+ print(f"[split_audio] 檔案大小: {size} bytes ({size/1024/1024:.2f} MB)")
80
  if size <= MAX_SIZE:
81
+ print(f"[split_audio] 不需分割")
82
  return [path]
83
+ print(f"[split_audio] 開始分割...")
84
  audio = AudioSegment.from_file(path)
85
  n = int(size / MAX_SIZE) + 1
86
  chunk_ms = len(audio) / n
87
+ print(f"[split_audio] 分割成 {n} 個片段")
88
  parts = []
89
  for i in range(n):
90
  fn = f"chunk_{i+1}.wav"
 
92
  parts.append(fn)
93
  return parts
94
 
95
+ # ====== 轉錄核心 ======
96
  def transcribe_core(path, model="whisper-1"):
97
+ print(f"\n{'='*60}")
98
+ print(f"[transcribe_core] 開始轉錄: {path}")
99
+ print(f"{'='*60}")
100
+
101
+ start_time = time.time()
102
 
103
  if path.lower().endswith(".mp4"):
104
  fixed = path[:-4] + ".m4a"
 
109
  pass
110
 
111
  chunks = split_audio(path)
112
+ print(f"\n[transcribe_core] === Whisper 轉錄 ({len(chunks)} 片段) ===")
113
  raw = []
114
  for i, c in enumerate(chunks, 1):
115
+ print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}")
116
  with open(c, "rb") as af:
117
  txt = client.audio.transcriptions.create(
118
  model=model, file=af, response_format="text"
119
  )
120
  raw.append(txt)
121
+ print(f"[transcribe_core] ✅ 片段 {i} 完成")
122
+
123
  raw_txt = "\n".join(raw)
124
+ print(f"[transcribe_core] 原始轉錄: {len(raw_txt)} 字元")
125
 
126
+ print(f"\n[transcribe_core] === 簡轉繁 ===")
127
  conv = client.chat.completions.create(
128
  model="gpt-4o-mini",
129
  messages=[
 
133
  temperature=0.0
134
  )
135
  trad = conv.choices[0].message.content.strip()
136
+ print(f"[transcribe_core] ✅ 繁體轉換完成: {len(trad)} 字元")
137
 
138
+ print(f"\n[transcribe_core] === AI 摘要 ===")
139
  summ = client.chat.completions.create(
140
  model="gpt-4o-mini",
141
  messages=[
 
144
  ],
145
  temperature=0.2
146
  )
147
+ summary = summ.choices[0].message.content.strip()
148
 
149
+ total_time = time.time() - start_time
150
+ print(f"\n{'='*60}")
151
+ print(f"[transcribe_core] ✅✅✅ 全部完成! 總耗時: {total_time:.1f}秒")
152
+ print(f"{'='*60}\n")
153
+
154
+ return trad, summary
155
 
156
  # ====== Gradio UI 函式 ======
157
  def transcribe_ui(password, file):
158
+ print(f"\n🌐 [UI] 網頁版請求")
159
  if not password or password.strip() != PASSWORD:
160
  return "❌ Password incorrect", "", ""
161
  if not file:
162
+ return "⚠️ No file uploaded", "", ""
163
  try:
164
  path = _extract_effective_path(file)
165
  text, summary = transcribe_core(path)
166
+ return "✅ Transcription completed", text, summary
167
  except Exception as e:
168
+ import traceback
169
+ print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}")
170
  return f"❌ Error: {e}", "", ""
171
 
172
+ # ====== 建立 FastAPI 應用 ======
173
  fastapi_app = FastAPI()
174
+
175
+ # CORS 設定
176
  fastapi_app.add_middleware(
177
  CORSMiddleware,
178
  allow_origins=["*"],
 
181
  allow_headers=["*"],
182
  )
183
 
184
+ # ====== 完全同步的 API 端點 ======
185
  @fastapi_app.post("/api/transcribe")
186
+ async def api_transcribe_sync(request: Request):
187
+ """
188
+ 完全同步的 API 端點 - 直接返回結果,不用輪詢
189
+
190
+ 請求格式:
191
+ {
192
+ "password": "chou",
193
+ "file_data": "data:audio/m4a;base64,...",
194
+ "file_name": "recording.m4a"
195
+ }
196
+ """
197
  try:
198
  body = await request.json()
199
+ print(f"\n{'📱'*30}")
200
+ print(f"🎯 [SYNC API] 收到同步 API 請求")
201
+ print(f"📦 Keys: {list(body.keys())}")
202
+ print(f"{'📱'*30}")
203
 
204
  password = body.get("password", "")
205
  if password.strip() != PASSWORD:
206
+ print(f"❌ [SYNC API] 密碼錯誤")
207
  return JSONResponse(
208
  status_code=401,
209
  content={"status": "error", "error": "Password incorrect"}
 
213
  file_name = body.get("file_name", "recording.m4a")
214
 
215
  if not file_data or not file_data.startswith("data:"):
216
+ print(f"❌ [SYNC API] 檔案格式錯誤")
217
  return JSONResponse(
218
  status_code=400,
219
+ content={"status": "error", "error": "Invalid file data format"}
220
  )
221
 
222
+ print(f"[SYNC API] 檔案長度: {len(file_data)}, 檔名: {file_name}")
223
+
224
+ # 直接處理,同步執行
225
  file_dict = {"data": file_data, "orig_name": file_name}
226
  path = _extract_effective_path(file_dict)
227
+ print(f"✅ [SYNC API] 檔案解析成功: {path}")
228
+
229
  text, summary = transcribe_core(path)
230
 
231
  result = {
 
233
  "transcription": text,
234
  "summary": summary
235
  }
236
+
237
+ print(f"\n{'✅'*30}")
238
+ print(f"✅✅✅ [SYNC API] 完成! 返回結果")
239
+ print(json.dumps(result, ensure_ascii=False, indent=2))
240
+ print(f"{'✅'*30}\n")
241
+
242
  return JSONResponse(content=result)
243
 
244
  except Exception as e:
245
  import traceback
246
+ error_trace = traceback.format_exc()
247
+ print(f"\n{'❌'*30}")
248
+ print(f"❌ [SYNC API] 錯誤:\n{error_trace}")
249
+ print(f"{'❌'*30}\n")
250
  return JSONResponse(
251
  status_code=500,
252
  content={"status": "error", "error": str(e)}
253
  )
254
 
255
  # ====== Gradio 介面 ======
256
+ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
257
+ gr.Markdown("# 🎧 LINE Audio Transcription & Summary")
258
 
259
+ with gr.Tab("🌐 Web Upload"):
260
+ gr.Markdown("### Upload audio file directly from browser")
261
+ with gr.Row():
262
+ with gr.Column(scale=1):
263
+ pw_ui = gr.Textbox(label="Password", type="password")
264
+ file_ui = gr.File(label="Upload Audio File", file_types=["audio"])
265
+ btn_ui = gr.Button("Start Transcription 🚀", variant="primary", size="lg")
266
+ with gr.Column(scale=2):
267
+ status_ui = gr.Textbox(label="Status", interactive=False)
268
+ transcript_ui = gr.Textbox(label="Transcription Result", lines=10)
269
+ summary_ui = gr.Textbox(label="AI Summary", lines=6)
270
 
271
+ btn_ui.click(transcribe_ui, [pw_ui, file_ui], [status_ui, transcript_ui, summary_ui])
272
 
273
+ with gr.Tab("📱 API Documentation"):
274
  gr.Markdown("""
275
+ ### 🚀 Synchronous API (Recommended for iPhone Shortcuts)
276
+
277
+ **Endpoint**: `/api/transcribe` (POST)
278
 
279
+ **完全同步** - 直接返回結果,無需輪詢
280
+
281
+ ✅ **穩定可靠** - 不受音檔長度影響,自動等待完成
282
+
283
+ ---
284
 
285
+ #### Request Format (JSON):
286
  ```json
287
  {
288
+ "password": "your_password",
289
+ "file_data": "data:audio/m4a;base64,UklGR...",
290
  "file_name": "recording.m4a"
291
  }
292
  ```
293
 
294
+ #### Response Format:
295
  ```json
296
  {
297
  "status": "success",
298
+ "transcription": "轉錄內容...",
299
+ "summary": "摘要內容..."
300
  }
301
  ```
302
 
303
  ---
304
 
305
+ ### 📱 iPhone Shortcuts 設定
306
+
307
+ **動作流程:**
308
+
309
+ 1. **取得檔案** → 語音檔
310
+ 2. **Base64 編碼**
311
+ 3. **文字** (組合 data URL):
312
+ ```
313
+ data:audio/m4a;base64,Base64編碼結果
314
+ ```
315
+ 4. **字典** (請求本文):
316
+ - 鍵: `password`, 值: `chou`
317
+ - 鍵: `file_data`, 值: 上一步的文字
318
+ - 鍵: `file_name`, 值: `recording.m4a`
319
+ 5. **取得 URL 內容**:
320
+ - URL: `https://你的網址/api/transcribe`
321
+ - 方法: `POST`
322
+ - 標頭: `Content-Type` = `application/json`
323
+ - 請求本文: 上一步的字典
324
+ - 請求本文類型: `JSON`
325
+ 6. **從字典取得值**:
326
+ - 鍵: `transcription` → 轉錄結果
327
+ - 鍵: `summary` → 摘要
328
 
329
  ---
330
 
331
+ ### 💡 重要提醒
332
+
333
+ - 這個端點**完全同步**,會等待轉錄完成後才返回
334
+ - 無論音檔多長,都會自動處理完成
335
+ - 不需要設定等待時間或輪詢機制
336
+ - 直接取得最終結果,不會有 `event_id`
337
+
338
+ ### 🧪 測試 API
339
+
340
+ 使用 curl 測試:
341
+ ```bash
342
+ curl -X POST https://你的網址/api/transcribe \\
343
+ -H "Content-Type: application/json" \\
344
+ -d '{
345
+ "password": "chou",
346
+ "file_data": "data:audio/m4a;base64,AAAA...",
347
+ "file_name": "test.m4a"
348
+ }'
349
+ ```
350
  """)
351
 
352
+ gr.Markdown("""
353
+ ---
354
+ 💡 **Supported Formats**: MP4, M4A, MP3, WAV, OGG, WEBM
355
+ 📦 **Max File Size**: 25MB per chunk (auto-split)
356
+ 🔒 **Security**: Password-protected
357
+ """)
358
 
359
+ # ====== 掛載 Gradio 到 FastAPI ======
360
  app = gr.mount_gradio_app(fastapi_app, demo, path="/")
361
 
362
+ # ====== 啟動 ======
363
  if __name__ == "__main__":
364
+ print("\n" + "="*60)
365
+ print("🚀 啟動 FastAPI + Gradio 應用")
366
+ print("📱 同步 API: /api/transcribe")
367
+ print("🌐 網頁介面: /")
368
+ print("="*60 + "\n")
369
  import uvicorn
370
  uvicorn.run(app, host="0.0.0.0", port=7860)