MichaelChou0806 commited on
Commit
1ebe0ef
·
verified ·
1 Parent(s): 20943a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -66
app.py CHANGED
@@ -2,6 +2,8 @@ import os, shutil, base64, uuid, mimetypes, json
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
 
 
5
 
6
  # ====== 基本設定 ======
7
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
@@ -26,87 +28,75 @@ MIME_EXT = {
26
  }
27
 
28
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
29
- # data_url: "data:audio/mp4;base64,AAAA..."
30
  try:
31
  header, b64 = data_url.split(",", 1)
32
  except ValueError:
33
- raise ValueError("data URL 格式錯誤(缺少逗號)")
34
- # 取 MIME
35
  mime = header.split(";")[0].split(":", 1)[-1].strip()
36
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
37
- # 臨時檔名
38
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
39
  with open(fname, "wb") as f:
40
  f.write(base64.b64decode(b64))
41
- print(f"✅ 已從 data URL 生成檔案:{fname}, 大小:{os.path.getsize(fname)} bytes")
42
  return fname
43
 
44
  def _extract_effective_path(file_obj) -> str:
45
- """
46
- Gradio File 輸入中,得到真正存在的檔案路徑。
47
- 🔴 加強版:詳細記錄所有嘗試過程
48
- """
49
- print(f"\n🔍 開始解析檔案...")
50
- print(f"📦 收到的類型: {type(file_obj)}")
51
- print(f"📦 收到的內容: {file_obj}")
52
 
53
- # 情況 A:字串(可能是路徑或 data:URL)
54
  if isinstance(file_obj, str):
55
  s = file_obj.strip().strip('"')
56
- print(f" → 情況A:字串模式")
57
  if s.startswith("data:"):
58
- print(f" → 偵測到 data URL,長度:{len(s)}")
59
  return _dataurl_to_file(s, None)
60
  if os.path.isfile(s):
61
- print(f" → 找到有效路徑:{s}")
62
  return s
63
- print(f" → 字串無效,繼續嘗試...")
64
 
65
- # 情況 B:dict
66
  if isinstance(file_obj, dict):
67
- print(f" → 情況B:字典模式")
68
- print(f" → 字典的 keys: {list(file_obj.keys())}")
69
 
70
- # 🔴 優先檢查 data(Base64 模式)
71
  data = file_obj.get("data")
72
  if isinstance(data, str) and data.startswith("data:"):
73
- print(f" → ✅ 找到 data URL! 長度:{len(data)}")
74
  orig_name = file_obj.get("orig_name")
75
- print(f" → 檔名:{orig_name}")
76
  return _dataurl_to_file(data, orig_name)
77
 
78
- # 再檢查 path
79
  p = str(file_obj.get("path") or "").strip().strip('"')
80
  if p and os.path.isfile(p):
81
- print(f" → 找到 path:{p}")
82
  return p
83
 
84
- # 檢查 url
85
  u = str(file_obj.get("url") or "").strip().strip('"')
86
  if u and os.path.isfile(u):
87
- print(f" → 找到 url:{u}")
88
  return u
89
-
90
- print(f" → ❌ 字典內沒有有效的 data/path/url")
91
 
92
- # 情況 C:物件
93
- print(f" → 情況C:物件模式")
94
  for attr in ("name", "path"):
95
  p = getattr(file_obj, attr, None)
96
  if isinstance(p, str):
97
  s = p.strip().strip('"')
98
  if os.path.isfile(s):
99
- print(f" → 找到物件屬性 {attr}:{s}")
100
  return s
101
 
102
- # 物件上的 data:URL
103
  data = getattr(file_obj, "data", None)
104
  if isinstance(data, str) and data.startswith("data:"):
105
- print(f" → 找到物件的 data URL")
106
  return _dataurl_to_file(data, getattr(file_obj, "orig_name", None))
107
 
108
- print(f" 所有方法都失敗!")
109
- raise FileNotFoundError(f"無法解析上傳檔案。收到的類型:{type(file_obj)}, 內容:{str(file_obj)[:200]}")
110
 
111
  # ====== 分段處理 ======
112
  def split_audio(path):
@@ -125,14 +115,13 @@ def split_audio(path):
125
 
126
  # ====== 轉錄核心 ======
127
  def transcribe_core(path, model="whisper-1"):
128
- # iPhone LINE 常見:mp4(其實是音訊容器)
129
  if path.lower().endswith(".mp4"):
130
  fixed = path[:-4] + ".m4a"
131
  try:
132
  shutil.copy(path, fixed)
133
  path = fixed
134
  except Exception as e:
135
- print(f"⚠️ mp4→m4a 失敗: {e}")
136
 
137
  chunks = split_audio(path)
138
  raw = []
@@ -146,7 +135,6 @@ def transcribe_core(path, model="whisper-1"):
146
  raw.append(txt)
147
  raw_txt = "\n".join(raw)
148
 
149
- # 簡轉繁(不意譯)
150
  conv = client.chat.completions.create(
151
  model="gpt-4o-mini",
152
  messages=[
@@ -157,7 +145,6 @@ def transcribe_core(path, model="whisper-1"):
157
  )
158
  trad = conv.choices[0].message.content.strip()
159
 
160
- # 摘要(內容多→條列;內容少→一句話)
161
  summ = client.chat.completions.create(
162
  model="gpt-4o-mini",
163
  messages=[
@@ -168,55 +155,99 @@ def transcribe_core(path, model="whisper-1"):
168
  )
169
  return trad, summ.choices[0].message.content.strip()
170
 
171
- # ====== 對外函式(UI / API 共用) ======
172
- def transcribe(password, file):
173
  print("\n" + "="*50)
174
- print("🎯 新的轉錄請求")
175
- print(f"🔑 密碼: {password[:2]}*** (長度:{len(password)})")
176
- print(f"📁 檔案: {type(file)}")
177
  print("="*50)
178
 
179
  if password.strip() != PASSWORD:
180
- print("❌ 密碼驗證失敗")
181
  return "❌ Password incorrect", "", ""
182
-
183
  if not file:
184
- print("❌ 未收到檔案")
185
  return "⚠️ No file uploaded", "", ""
186
 
187
  try:
188
  path = _extract_effective_path(file)
189
- print(f"✅ 成功解析檔案:{path}")
190
- print(f"📊 檔案大小:{os.path.getsize(path)} bytes")
191
- except Exception as e:
192
- import traceback
193
- error_msg = traceback.format_exc()
194
- print(f"❌ 檔案解析失敗:\n{error_msg}")
195
- return f"❌ File parsing failed: {e}", "", ""
196
-
197
- try:
198
  text, summary = transcribe_core(path)
199
- print("✅ 轉錄完成")
200
  return "✅ Transcription completed", text, summary
201
  except Exception as e:
202
  import traceback
203
- error_msg = traceback.format_exc()
204
- print(f"❌ 轉錄失敗:\n{error_msg}")
205
- return f"❌ Transcription failed: {e}", "", ""
206
 
207
  # ====== Gradio UI ======
208
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
209
- gr.Markdown("## 🎧 LINE Audio Transcription & Summary (Enhanced Debug)")
210
  pw = gr.Textbox(label="Password", type="password")
211
  f = gr.File(label="Upload Audio File")
212
  run = gr.Button("Start Transcription 🚀")
213
  s = gr.Textbox(label="Status", interactive=False)
214
  t = gr.Textbox(label="Transcription Result", lines=10)
215
  su = gr.Textbox(label="AI Summary", lines=8)
216
- # 關鍵:queue=False 讓 API 直接回應
217
- run.click(transcribe, [pw, f], [s, t, su], queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
- app = demo
 
220
 
221
  if __name__ == "__main__":
222
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
5
+ from fastapi import FastAPI, Request, HTTPException
6
+ from fastapi.responses import JSONResponse
7
 
8
  # ====== 基本設定 ======
9
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
 
28
  }
29
 
30
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
 
31
  try:
32
  header, b64 = data_url.split(",", 1)
33
  except ValueError:
34
+ raise ValueError("data URL format error (missing comma).")
 
35
  mime = header.split(";")[0].split(":", 1)[-1].strip()
36
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
 
37
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
38
  with open(fname, "wb") as f:
39
  f.write(base64.b64decode(b64))
40
+ print(f"✅ Created file from data URL: {fname}, size: {os.path.getsize(fname)} bytes")
41
  return fname
42
 
43
  def _extract_effective_path(file_obj) -> str:
44
+ """從各種格式中提取有效檔案路徑"""
45
+ print(f"\n🔍 Starting file parsing...")
46
+ print(f"📦 Received type: {type(file_obj)}")
47
+ print(f"📦 Content: {str(file_obj)[:200]}...")
 
 
 
48
 
49
+ # 字串模式
50
  if isinstance(file_obj, str):
51
  s = file_obj.strip().strip('"')
52
+ print(f" → Mode A: String")
53
  if s.startswith("data:"):
54
+ print(f" → Detected data URL, length: {len(s)}")
55
  return _dataurl_to_file(s, None)
56
  if os.path.isfile(s):
57
+ print(f" → Found valid path: {s}")
58
  return s
 
59
 
60
+ # 字典模式
61
  if isinstance(file_obj, dict):
62
+ print(f" → Mode B: Dictionary")
63
+ print(f" → Keys: {list(file_obj.keys())}")
64
 
65
+ # 優先 data URL
66
  data = file_obj.get("data")
67
  if isinstance(data, str) and data.startswith("data:"):
68
+ print(f" → ✅ Found data URL! Length: {len(data)}")
69
  orig_name = file_obj.get("orig_name")
 
70
  return _dataurl_to_file(data, orig_name)
71
 
72
+ # path
73
  p = str(file_obj.get("path") or "").strip().strip('"')
74
  if p and os.path.isfile(p):
75
+ print(f" → Found path: {p}")
76
  return p
77
 
78
+ # url
79
  u = str(file_obj.get("url") or "").strip().strip('"')
80
  if u and os.path.isfile(u):
81
+ print(f" → Found url: {u}")
82
  return u
 
 
83
 
84
+ # 物件模式
85
+ print(f" → Mode C: Object")
86
  for attr in ("name", "path"):
87
  p = getattr(file_obj, attr, None)
88
  if isinstance(p, str):
89
  s = p.strip().strip('"')
90
  if os.path.isfile(s):
91
+ print(f" → Found object attr {attr}: {s}")
92
  return s
93
 
 
94
  data = getattr(file_obj, "data", None)
95
  if isinstance(data, str) and data.startswith("data:"):
96
+ print(f" → Found object data URL")
97
  return _dataurl_to_file(data, getattr(file_obj, "orig_name", None))
98
 
99
+ raise FileNotFoundError(f"Cannot parse uploaded file. Type: {type(file_obj)}")
 
100
 
101
  # ====== 分段處理 ======
102
  def split_audio(path):
 
115
 
116
  # ====== 轉錄核心 ======
117
  def transcribe_core(path, model="whisper-1"):
 
118
  if path.lower().endswith(".mp4"):
119
  fixed = path[:-4] + ".m4a"
120
  try:
121
  shutil.copy(path, fixed)
122
  path = fixed
123
  except Exception as e:
124
+ print(f"⚠️ mp4→m4a failed: {e}")
125
 
126
  chunks = split_audio(path)
127
  raw = []
 
135
  raw.append(txt)
136
  raw_txt = "\n".join(raw)
137
 
 
138
  conv = client.chat.completions.create(
139
  model="gpt-4o-mini",
140
  messages=[
 
145
  )
146
  trad = conv.choices[0].message.content.strip()
147
 
 
148
  summ = client.chat.completions.create(
149
  model="gpt-4o-mini",
150
  messages=[
 
155
  )
156
  return trad, summ.choices[0].message.content.strip()
157
 
158
+ # ====== Gradio UI 函式 ======
159
+ def transcribe_ui(password, file):
160
  print("\n" + "="*50)
161
+ print("🎯 New transcription request (UI)")
162
+ print(f"🔑 Password: {password[:2]}*** (length: {len(password)})")
163
+ print(f"📁 File: {type(file)}")
164
  print("="*50)
165
 
166
  if password.strip() != PASSWORD:
 
167
  return "❌ Password incorrect", "", ""
 
168
  if not file:
 
169
  return "⚠️ No file uploaded", "", ""
170
 
171
  try:
172
  path = _extract_effective_path(file)
 
 
 
 
 
 
 
 
 
173
  text, summary = transcribe_core(path)
 
174
  return "✅ Transcription completed", text, summary
175
  except Exception as e:
176
  import traceback
177
+ print(traceback.format_exc())
178
+ return f"❌ Error: {e}", "", ""
 
179
 
180
  # ====== Gradio UI ======
181
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
182
+ gr.Markdown("## 🎧 LINE Audio Transcription & Summary")
183
  pw = gr.Textbox(label="Password", type="password")
184
  f = gr.File(label="Upload Audio File")
185
  run = gr.Button("Start Transcription 🚀")
186
  s = gr.Textbox(label="Status", interactive=False)
187
  t = gr.Textbox(label="Transcription Result", lines=10)
188
  su = gr.Textbox(label="AI Summary", lines=8)
189
+ run.click(transcribe_ui, [pw, f], [s, t, su], queue=False)
190
+
191
+ # ====== 🔴 新增:自訂 API 端點(繞過 Gradio validation) ======
192
+ app = FastAPI()
193
+
194
+ @app.post("/api/transcribe")
195
+ async def transcribe_api(request: Request):
196
+ """
197
+ 直接接受 Base64 data URL 的自訂 API
198
+
199
+ 請求格式:
200
+ {
201
+ "password": "chou",
202
+ "file": {
203
+ "data": "data:audio/m4a;base64,UklGR...",
204
+ "orig_name": "recording.m4a"
205
+ }
206
+ }
207
+ """
208
+ try:
209
+ body = await request.json()
210
+ print("\n" + "="*50)
211
+ print("🎯 New transcription request (Custom API)")
212
+ print(f"📦 Request body keys: {list(body.keys())}")
213
+ print("="*50)
214
+
215
+ password = body.get("password", "")
216
+ if password.strip() != PASSWORD:
217
+ return JSONResponse(
218
+ status_code=401,
219
+ content={"error": "Password incorrect"}
220
+ )
221
+
222
+ file_obj = body.get("file")
223
+ if not file_obj:
224
+ return JSONResponse(
225
+ status_code=400,
226
+ content={"error": "No file provided"}
227
+ )
228
+
229
+ # 直接處理 Base64
230
+ path = _extract_effective_path(file_obj)
231
+ text, summary = transcribe_core(path)
232
+
233
+ return JSONResponse(content={
234
+ "status": "success",
235
+ "transcription": text,
236
+ "summary": summary
237
+ })
238
+
239
+ except Exception as e:
240
+ import traceback
241
+ error_detail = traceback.format_exc()
242
+ print(f"❌ API Error:\n{error_detail}")
243
+ return JSONResponse(
244
+ status_code=500,
245
+ content={"error": str(e), "detail": error_detail}
246
+ )
247
 
248
+ # 掛載 Gradio 到 FastAPI
249
+ app = gr.mount_gradio_app(app, demo, path="/")
250
 
251
  if __name__ == "__main__":
252
+ import uvicorn
253
+ uvicorn.run(app, host="0.0.0.0", port=7860)