MichaelChou0806 commited on
Commit
678d6a4
·
verified ·
1 Parent(s): c8ee0eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -54
app.py CHANGED
@@ -2,8 +2,6 @@ import os, shutil, base64, uuid, mimetypes
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
5
- from fastapi import Request
6
- from fastapi.responses import JSONResponse
7
 
8
  # ====== 基本設定 ======
9
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
@@ -115,6 +113,7 @@ def transcribe_core(path, model="whisper-1"):
115
 
116
  # ====== Gradio UI 函式 ======
117
  def transcribe_ui(password, file):
 
118
  print(f"\n🎯 Web UI Request | Password: {password[:2] if password else ''}***")
119
  if not password or password.strip() != PASSWORD:
120
  return "❌ Password incorrect", "", ""
@@ -128,40 +127,165 @@ def transcribe_ui(password, file):
128
  print(f"❌ Error: {e}")
129
  return f"❌ Error: {e}", "", ""
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  # ====== Gradio 介面 ======
132
  with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
133
  gr.Markdown("# 🎧 LINE Audio Transcription & Summary")
134
 
135
- with gr.Row():
136
- with gr.Column(scale=1):
137
- pw_ui = gr.Textbox(label="Password", type="password", placeholder="Enter password")
138
- file_ui = gr.File(label="Upload Audio File", file_types=["audio"])
139
- btn_ui = gr.Button("Start Transcription 🚀", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
- with gr.Column(scale=2):
142
- status_ui = gr.Textbox(label="Status", interactive=False)
143
- transcript_ui = gr.Textbox(label="Transcription Result", lines=10)
144
- summary_ui = gr.Textbox(label="AI Summary", lines=6)
145
-
146
- btn_ui.click(transcribe_ui, [pw_ui, file_ui], [status_ui, transcript_ui, summary_ui])
147
 
148
- with gr.Accordion("📱 API Documentation (iPhone Shortcut)", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  gr.Markdown("""
150
- ### API Endpoint
151
- **POST** `/api/transcribe`
 
 
 
 
 
152
 
153
- ### Request Format (JSON)
154
  ```json
155
  {
156
- "password": "your_password",
157
- "file": {
158
- "data": "data:audio/m4a;base64,UklGR...",
159
- "orig_name": "recording.m4a"
160
- }
161
  }
162
  ```
163
 
164
- ### Response Format
165
  ```json
166
  {
167
  "status": "success",
@@ -170,38 +294,23 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo
170
  }
171
  ```
172
 
173
- 💡 **Tip**: Use this endpoint in iPhone Shortcuts for automated transcription
 
 
 
174
  """)
175
-
176
- # ====== 自訂 API 路由 ======
177
- @demo.fastapi_app.post("/api/transcribe")
178
- async def api_transcribe(request: Request):
179
- """iPhone 捷徑專用的 API 端點"""
180
- try:
181
- body = await request.json()
182
- print(f"\n🎯 API Request | Keys: {list(body.keys())}")
183
-
184
- password = body.get("password", "")
185
- if password.strip() != PASSWORD:
186
- return JSONResponse(status_code=401, content={"error": "Password incorrect"})
187
-
188
- file_obj = body.get("file")
189
- if not file_obj:
190
- return JSONResponse(status_code=400, content={"error": "No file provided"})
191
-
192
- path = _extract_effective_path(file_obj)
193
- text, summary = transcribe_core(path)
194
-
195
- return JSONResponse(content={
196
- "status": "success",
197
- "transcription": text,
198
- "summary": summary
199
- })
200
- except Exception as e:
201
- import traceback
202
- print(f"❌ API Error:\n{traceback.format_exc()}")
203
- return JSONResponse(status_code=500, content={"error": str(e)})
204
 
205
  # ====== 啟動 ======
206
  if __name__ == "__main__":
207
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
2
  from pydub import AudioSegment
3
  from openai import OpenAI
4
  import gradio as gr
 
 
5
 
6
  # ====== 基本設定 ======
7
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
 
113
 
114
  # ====== Gradio UI 函式 ======
115
  def transcribe_ui(password, file):
116
+ """網頁版轉錄函式"""
117
  print(f"\n🎯 Web UI Request | Password: {password[:2] if password else ''}***")
118
  if not password or password.strip() != PASSWORD:
119
  return "❌ Password incorrect", "", ""
 
127
  print(f"❌ Error: {e}")
128
  return f"❌ Error: {e}", "", ""
129
 
130
+ # ====== API 函式 (通過 Gradio 端點呼叫) ======
131
+ def transcribe_api(password, file_data, file_name):
132
+ """
133
+ API 版本的轉錄函式
134
+ 參數:
135
+ - password: 密碼字串
136
+ - file_data: data:audio/...;base64,... 格式的字串
137
+ - file_name: 原始檔名
138
+ """
139
+ print(f"\n🎯 API Request | Password: {password[:2] if password else ''}***")
140
+ print(f"📁 File data length: {len(file_data) if file_data else 0}")
141
+ print(f"📁 File name: {file_name}")
142
+
143
+ if not password or password.strip() != PASSWORD:
144
+ return {
145
+ "status": "error",
146
+ "error": "Password incorrect",
147
+ "transcription": "",
148
+ "summary": ""
149
+ }
150
+
151
+ if not file_data or not file_data.startswith("data:"):
152
+ return {
153
+ "status": "error",
154
+ "error": "Invalid file data format. Must be data:audio/...;base64,...",
155
+ "transcription": "",
156
+ "summary": ""
157
+ }
158
+
159
+ try:
160
+ file_dict = {
161
+ "data": file_data,
162
+ "orig_name": file_name or "recording.m4a"
163
+ }
164
+ path = _extract_effective_path(file_dict)
165
+ text, summary = transcribe_core(path)
166
+ return {
167
+ "status": "success",
168
+ "transcription": text,
169
+ "summary": summary
170
+ }
171
+ except Exception as e:
172
+ import traceback
173
+ print(f"❌ Error:\n{traceback.format_exc()}")
174
+ return {
175
+ "status": "error",
176
+ "error": str(e),
177
+ "transcription": "",
178
+ "summary": ""
179
+ }
180
+
181
  # ====== Gradio 介面 ======
182
  with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
183
  gr.Markdown("# 🎧 LINE Audio Transcription & Summary")
184
 
185
+ with gr.Tab("🌐 Web Upload"):
186
+ gr.Markdown("### Upload audio file directly from browser")
187
+ with gr.Row():
188
+ with gr.Column(scale=1):
189
+ pw_ui = gr.Textbox(
190
+ label="Password",
191
+ type="password",
192
+ placeholder="Enter password"
193
+ )
194
+ file_ui = gr.File(
195
+ label="Upload Audio File",
196
+ file_types=["audio"]
197
+ )
198
+ btn_ui = gr.Button(
199
+ "Start Transcription 🚀",
200
+ variant="primary",
201
+ size="lg"
202
+ )
203
+
204
+ with gr.Column(scale=2):
205
+ status_ui = gr.Textbox(label="Status", interactive=False)
206
+ transcript_ui = gr.Textbox(
207
+ label="Transcription Result",
208
+ lines=10,
209
+ placeholder="Transcription will appear here..."
210
+ )
211
+ summary_ui = gr.Textbox(
212
+ label="AI Summary",
213
+ lines=6,
214
+ placeholder="Summary will appear here..."
215
+ )
216
 
217
+ btn_ui.click(
218
+ transcribe_ui,
219
+ inputs=[pw_ui, file_ui],
220
+ outputs=[status_ui, transcript_ui, summary_ui]
221
+ )
 
222
 
223
+ with gr.Tab("📱 API (iPhone Shortcut)"):
224
+ gr.Markdown("""
225
+ ### For iPhone Shortcuts & Automation
226
+
227
+ This tab provides a Gradio-based API endpoint that accepts Base64-encoded audio.
228
+ """)
229
+
230
+ with gr.Row():
231
+ with gr.Column(scale=1):
232
+ pw_api = gr.Textbox(
233
+ label="Password",
234
+ type="password",
235
+ value="chou",
236
+ placeholder="Enter password"
237
+ )
238
+ file_data_api = gr.Textbox(
239
+ label="File Data (Base64)",
240
+ placeholder="data:audio/m4a;base64,UklGR...",
241
+ lines=3,
242
+ info="Paste your base64-encoded audio data URL here"
243
+ )
244
+ file_name_api = gr.Textbox(
245
+ label="Original Filename",
246
+ value="recording.m4a",
247
+ placeholder="recording.m4a"
248
+ )
249
+ btn_api = gr.Button(
250
+ "Test API 🧪",
251
+ variant="secondary",
252
+ size="lg"
253
+ )
254
+
255
+ with gr.Column(scale=2):
256
+ result_api = gr.JSON(
257
+ label="API Response",
258
+ show_label=True
259
+ )
260
+
261
+ btn_api.click(
262
+ transcribe_api,
263
+ inputs=[pw_api, file_data_api, file_name_api],
264
+ outputs=[result_api],
265
+ api_name="transcribe" # 🔴 關鍵:設定 api_name
266
+ )
267
+
268
  gr.Markdown("""
269
+ ---
270
+ ### 📖 How to use with iPhone Shortcuts
271
+
272
+ **Gradio API Endpoint**:
273
+ ```
274
+ POST /gradio_api/call/transcribe
275
+ ```
276
 
277
+ **Request Format (JSON)**:
278
  ```json
279
  {
280
+ "data": [
281
+ "your_password",
282
+ "data:audio/m4a;base64,UklGR...",
283
+ "recording.m4a"
284
+ ]
285
  }
286
  ```
287
 
288
+ **Response Format**:
289
  ```json
290
  {
291
  "status": "success",
 
294
  }
295
  ```
296
 
297
+ 💡 **Important**:
298
+ - The endpoint is `/gradio_api/call/transcribe` (note: `call/transcribe`)
299
+ - The `data` array must have exactly 3 items: [password, file_data, file_name]
300
+ - Use `queue=false` parameter or set `api_name="transcribe"` in your request
301
  """)
302
+
303
+ gr.Markdown("""
304
+ ---
305
+ 💡 **Supported Formats**: MP4, M4A, MP3, WAV, OGG, WEBM
306
+ 📦 **Max File Size**: 25MB per chunk (larger files auto-split)
307
+ 🔒 **Security**: Password-protected access
308
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
  # ====== 啟動 ======
311
  if __name__ == "__main__":
312
+ demo.launch(
313
+ server_name="0.0.0.0",
314
+ server_port=7860,
315
+ show_api=True # 顯示 API 文件
316
+ )