MichaelChou0806 commited on
Commit
c8ee0eb
·
verified ·
1 Parent(s): 7bf71ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -117
app.py CHANGED
@@ -1,8 +1,8 @@
1
- import os, shutil, base64, uuid, mimetypes, json
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
5
- from fastapi import FastAPI, Request, HTTPException
6
  from fastapi.responses import JSONResponse
7
 
8
  # ====== 基本設定 ======
@@ -15,15 +15,9 @@ print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
15
 
16
  # ====== 工具:把 data:URL 轉成臨時檔 ======
17
  MIME_EXT = {
18
- "audio/mp4": "m4a",
19
- "audio/m4a": "m4a",
20
- "audio/aac": "aac",
21
- "audio/mpeg": "mp3",
22
- "audio/wav": "wav",
23
- "audio/x-wav": "wav",
24
- "audio/ogg": "ogg",
25
- "audio/webm": "webm",
26
- "audio/opus": "opus",
27
  "video/mp4": "mp4",
28
  }
29
 
@@ -31,72 +25,39 @@ def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
31
  try:
32
  header, b64 = data_url.split(",", 1)
33
  except ValueError:
34
- raise ValueError("data URL format error (missing comma).")
35
  mime = header.split(";")[0].split(":", 1)[-1].strip()
36
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
37
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
38
  with open(fname, "wb") as f:
39
  f.write(base64.b64decode(b64))
40
- print(f"✅ Created file from data URL: {fname}, size: {os.path.getsize(fname)} bytes")
41
  return fname
42
 
43
  def _extract_effective_path(file_obj) -> str:
44
  """從各種格式中提取有效檔案路徑"""
45
- print(f"\n🔍 Starting file parsing...")
46
- print(f"📦 Received type: {type(file_obj)}")
47
- print(f"📦 Content: {str(file_obj)[:200]}...")
48
-
49
  # 字串模式
50
  if isinstance(file_obj, str):
51
  s = file_obj.strip().strip('"')
52
- print(f" → Mode A: String")
53
  if s.startswith("data:"):
54
- print(f" → Detected data URL, length: {len(s)}")
55
  return _dataurl_to_file(s, None)
56
  if os.path.isfile(s):
57
- print(f" → Found valid path: {s}")
58
  return s
59
-
60
  # 字典模式
61
  if isinstance(file_obj, dict):
62
- print(f" → Mode B: Dictionary")
63
- print(f" → Keys: {list(file_obj.keys())}")
64
-
65
- # 優先 data URL
66
  data = file_obj.get("data")
67
  if isinstance(data, str) and data.startswith("data:"):
68
- print(f" → ✅ Found data URL! Length: {len(data)}")
69
- orig_name = file_obj.get("orig_name")
70
- return _dataurl_to_file(data, orig_name)
71
-
72
- # path
73
  p = str(file_obj.get("path") or "").strip().strip('"')
74
  if p and os.path.isfile(p):
75
- print(f" → Found path: {p}")
76
  return p
77
-
78
- # url
79
- u = str(file_obj.get("url") or "").strip().strip('"')
80
- if u and os.path.isfile(u):
81
- print(f" → Found url: {u}")
82
- return u
83
-
84
  # 物件模式
85
- print(f" → Mode C: Object")
86
  for attr in ("name", "path"):
87
  p = getattr(file_obj, attr, None)
88
  if isinstance(p, str):
89
  s = p.strip().strip('"')
90
  if os.path.isfile(s):
91
- print(f" → Found object attr {attr}: {s}")
92
  return s
93
-
94
- data = getattr(file_obj, "data", None)
95
- if isinstance(data, str) and data.startswith("data:"):
96
- print(f" → Found object data URL")
97
- return _dataurl_to_file(data, getattr(file_obj, "orig_name", None))
98
-
99
- raise FileNotFoundError(f"Cannot parse uploaded file. Type: {type(file_obj)}")
100
 
101
  # ====== 分段處理 ======
102
  def split_audio(path):
@@ -120,21 +81,18 @@ def transcribe_core(path, model="whisper-1"):
120
  try:
121
  shutil.copy(path, fixed)
122
  path = fixed
123
- except Exception as e:
124
- print(f"⚠️ mp4→m4a failed: {e}")
125
-
126
  chunks = split_audio(path)
127
  raw = []
128
  for c in chunks:
129
  with open(c, "rb") as af:
130
  txt = client.audio.transcriptions.create(
131
- model=model,
132
- file=af,
133
- response_format="text"
134
  )
135
  raw.append(txt)
136
  raw_txt = "\n".join(raw)
137
-
138
  conv = client.chat.completions.create(
139
  model="gpt-4o-mini",
140
  messages=[
@@ -144,7 +102,7 @@ def transcribe_core(path, model="whisper-1"):
144
  temperature=0.0
145
  )
146
  trad = conv.choices[0].message.content.strip()
147
-
148
  summ = client.chat.completions.create(
149
  model="gpt-4o-mini",
150
  messages=[
@@ -157,76 +115,80 @@ def transcribe_core(path, model="whisper-1"):
157
 
158
  # ====== Gradio UI 函式 ======
159
  def transcribe_ui(password, file):
160
- print("\n" + "="*50)
161
- print("🎯 New transcription request (UI)")
162
- print(f"🔑 Password: {password[:2]}*** (length: {len(password)})")
163
- print(f"📁 File: {type(file)}")
164
- print("="*50)
165
-
166
- if password.strip() != PASSWORD:
167
  return "❌ Password incorrect", "", ""
168
  if not file:
169
  return "⚠️ No file uploaded", "", ""
170
-
171
  try:
172
  path = _extract_effective_path(file)
173
  text, summary = transcribe_core(path)
174
  return "✅ Transcription completed", text, summary
175
  except Exception as e:
176
- import traceback
177
- print(traceback.format_exc())
178
  return f"❌ Error: {e}", "", ""
179
 
180
- # ====== Gradio UI ======
181
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
182
- gr.Markdown("## 🎧 LINE Audio Transcription & Summary")
183
- pw = gr.Textbox(label="Password", type="password")
184
- f = gr.File(label="Upload Audio File")
185
- run = gr.Button("Start Transcription 🚀")
186
- s = gr.Textbox(label="Status", interactive=False)
187
- t = gr.Textbox(label="Transcription Result", lines=10)
188
- su = gr.Textbox(label="AI Summary", lines=8)
189
- run.click(transcribe_ui, [pw, f], [s, t, su], queue=False)
190
-
191
- # ====== 🔴 新增:自訂 API 端點(繞過 Gradio validation) ======
192
- app = FastAPI()
193
-
194
- @app.post("/api/transcribe")
195
- async def transcribe_api(request: Request):
196
- """
197
- 直接接受 Base64 data URL 的自訂 API
198
 
199
- 請求格式:
200
- {
201
- "password": "chou",
202
- "file": {
203
- "data": "data:audio/m4a;base64,UklGR...",
204
- "orig_name": "recording.m4a"
205
- }
206
- }
207
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  try:
209
  body = await request.json()
210
- print("\n" + "="*50)
211
- print("🎯 New transcription request (Custom API)")
212
- print(f"📦 Request body keys: {list(body.keys())}")
213
- print("="*50)
214
 
215
  password = body.get("password", "")
216
  if password.strip() != PASSWORD:
217
- return JSONResponse(
218
- status_code=401,
219
- content={"error": "Password incorrect"}
220
- )
221
 
222
  file_obj = body.get("file")
223
  if not file_obj:
224
- return JSONResponse(
225
- status_code=400,
226
- content={"error": "No file provided"}
227
- )
228
 
229
- # 直接處理 Base64
230
  path = _extract_effective_path(file_obj)
231
  text, summary = transcribe_core(path)
232
 
@@ -235,19 +197,11 @@ async def transcribe_api(request: Request):
235
  "transcription": text,
236
  "summary": summary
237
  })
238
-
239
  except Exception as e:
240
  import traceback
241
- error_detail = traceback.format_exc()
242
- print(f"❌ API Error:\n{error_detail}")
243
- return JSONResponse(
244
- status_code=500,
245
- content={"error": str(e), "detail": error_detail}
246
- )
247
-
248
- # 掛載 Gradio 到 FastAPI
249
- app = gr.mount_gradio_app(app, demo, path="/")
250
 
 
251
  if __name__ == "__main__":
252
- import uvicorn
253
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import os, shutil, base64, uuid, mimetypes
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
5
+ from fastapi import Request
6
  from fastapi.responses import JSONResponse
7
 
8
  # ====== 基本設定 ======
 
15
 
16
  # ====== 工具:把 data:URL 轉成臨時檔 ======
17
  MIME_EXT = {
18
+ "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
19
+ "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
20
+ "audio/ogg": "ogg", "audio/webm": "webm", "audio/opus": "opus",
 
 
 
 
 
 
21
  "video/mp4": "mp4",
22
  }
23
 
 
25
  try:
26
  header, b64 = data_url.split(",", 1)
27
  except ValueError:
28
+ raise ValueError("data URL format error")
29
  mime = header.split(";")[0].split(":", 1)[-1].strip()
30
  ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
31
  fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
32
  with open(fname, "wb") as f:
33
  f.write(base64.b64decode(b64))
 
34
  return fname
35
 
36
  def _extract_effective_path(file_obj) -> str:
37
  """從各種格式中提取有效檔案路徑"""
 
 
 
 
38
  # 字串模式
39
  if isinstance(file_obj, str):
40
  s = file_obj.strip().strip('"')
 
41
  if s.startswith("data:"):
 
42
  return _dataurl_to_file(s, None)
43
  if os.path.isfile(s):
 
44
  return s
 
45
  # 字典模式
46
  if isinstance(file_obj, dict):
 
 
 
 
47
  data = file_obj.get("data")
48
  if isinstance(data, str) and data.startswith("data:"):
49
+ return _dataurl_to_file(data, file_obj.get("orig_name"))
 
 
 
 
50
  p = str(file_obj.get("path") or "").strip().strip('"')
51
  if p and os.path.isfile(p):
 
52
  return p
 
 
 
 
 
 
 
53
  # 物件模式
 
54
  for attr in ("name", "path"):
55
  p = getattr(file_obj, attr, None)
56
  if isinstance(p, str):
57
  s = p.strip().strip('"')
58
  if os.path.isfile(s):
 
59
  return s
60
+ raise FileNotFoundError("Cannot parse uploaded file")
 
 
 
 
 
 
61
 
62
  # ====== 分段處理 ======
63
  def split_audio(path):
 
81
  try:
82
  shutil.copy(path, fixed)
83
  path = fixed
84
+ except:
85
+ pass
 
86
  chunks = split_audio(path)
87
  raw = []
88
  for c in chunks:
89
  with open(c, "rb") as af:
90
  txt = client.audio.transcriptions.create(
91
+ model=model, file=af, response_format="text"
 
 
92
  )
93
  raw.append(txt)
94
  raw_txt = "\n".join(raw)
95
+
96
  conv = client.chat.completions.create(
97
  model="gpt-4o-mini",
98
  messages=[
 
102
  temperature=0.0
103
  )
104
  trad = conv.choices[0].message.content.strip()
105
+
106
  summ = client.chat.completions.create(
107
  model="gpt-4o-mini",
108
  messages=[
 
115
 
116
  # ====== Gradio UI 函式 ======
117
  def transcribe_ui(password, file):
118
+ print(f"\n🎯 Web UI Request | Password: {password[:2] if password else ''}***")
119
+ if not password or password.strip() != PASSWORD:
 
 
 
 
 
120
  return "❌ Password incorrect", "", ""
121
  if not file:
122
  return "⚠️ No file uploaded", "", ""
 
123
  try:
124
  path = _extract_effective_path(file)
125
  text, summary = transcribe_core(path)
126
  return "✅ Transcription completed", text, summary
127
  except Exception as e:
128
+ print(f"❌ Error: {e}")
 
129
  return f"❌ Error: {e}", "", ""
130
 
131
+ # ====== Gradio 介面 ======
132
+ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
133
+ gr.Markdown("# 🎧 LINE Audio Transcription & Summary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
+ with gr.Row():
136
+ with gr.Column(scale=1):
137
+ pw_ui = gr.Textbox(label="Password", type="password", placeholder="Enter password")
138
+ file_ui = gr.File(label="Upload Audio File", file_types=["audio"])
139
+ btn_ui = gr.Button("Start Transcription 🚀", variant="primary", size="lg")
140
+
141
+ with gr.Column(scale=2):
142
+ status_ui = gr.Textbox(label="Status", interactive=False)
143
+ transcript_ui = gr.Textbox(label="Transcription Result", lines=10)
144
+ summary_ui = gr.Textbox(label="AI Summary", lines=6)
145
+
146
+ btn_ui.click(transcribe_ui, [pw_ui, file_ui], [status_ui, transcript_ui, summary_ui])
147
+
148
+ with gr.Accordion("📱 API Documentation (iPhone Shortcut)", open=False):
149
+ gr.Markdown("""
150
+ ### API Endpoint
151
+ **POST** `/api/transcribe`
152
+
153
+ ### Request Format (JSON)
154
+ ```json
155
+ {
156
+ "password": "your_password",
157
+ "file": {
158
+ "data": "data:audio/m4a;base64,UklGR...",
159
+ "orig_name": "recording.m4a"
160
+ }
161
+ }
162
+ ```
163
+
164
+ ### Response Format
165
+ ```json
166
+ {
167
+ "status": "success",
168
+ "transcription": "轉錄內容...",
169
+ "summary": "摘要內容..."
170
+ }
171
+ ```
172
+
173
+ 💡 **Tip**: Use this endpoint in iPhone Shortcuts for automated transcription
174
+ """)
175
+
176
+ # ====== 自訂 API 路由 ======
177
+ @demo.fastapi_app.post("/api/transcribe")
178
+ async def api_transcribe(request: Request):
179
+ """iPhone 捷徑專用的 API 端點"""
180
  try:
181
  body = await request.json()
182
+ print(f"\n🎯 API Request | Keys: {list(body.keys())}")
 
 
 
183
 
184
  password = body.get("password", "")
185
  if password.strip() != PASSWORD:
186
+ return JSONResponse(status_code=401, content={"error": "Password incorrect"})
 
 
 
187
 
188
  file_obj = body.get("file")
189
  if not file_obj:
190
+ return JSONResponse(status_code=400, content={"error": "No file provided"})
 
 
 
191
 
 
192
  path = _extract_effective_path(file_obj)
193
  text, summary = transcribe_core(path)
194
 
 
197
  "transcription": text,
198
  "summary": summary
199
  })
 
200
  except Exception as e:
201
  import traceback
202
+ print(f"❌ API Error:\n{traceback.format_exc()}")
203
+ return JSONResponse(status_code=500, content={"error": str(e)})
 
 
 
 
 
 
 
204
 
205
+ # ====== 啟動 ======
206
  if __name__ == "__main__":
207
+ demo.launch(server_name="0.0.0.0", server_port=7860)