MichaelChou0806 commited on
Commit
35b1d50
ยท
verified ยท
1 Parent(s): a433c1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +555 -118
app.py CHANGED
@@ -23,6 +23,7 @@ MIME_EXT = {
23
  }
24
 
25
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
 
26
  print(f" โ†’ [_dataurl_to_file] ้–‹ๅง‹่™•็† data URL...")
27
  try:
28
  header, b64 = data_url.split(",", 1)
@@ -75,6 +76,7 @@ def _extract_effective_path(file_obj) -> str:
75
 
76
  # ====== ๅˆ†ๆฎต่™•็† ======
77
  def split_audio(path):
 
78
  size = os.path.getsize(path)
79
  print(f"[split_audio] ๆช”ๆกˆๅคงๅฐ: {size} bytes ({size/1024/1024:.2f} MB)")
80
  if size <= MAX_SIZE:
@@ -94,12 +96,14 @@ def split_audio(path):
94
 
95
  # ====== ่ฝ‰้Œ„ๆ ธๅฟƒ ======
96
  def transcribe_core(path, model="whisper-1"):
 
97
  print(f"\n{'='*60}")
98
  print(f"[transcribe_core] ้–‹ๅง‹่ฝ‰้Œ„: {path}")
99
  print(f"{'='*60}")
100
 
101
  start_time = time.time()
102
 
 
103
  if path.lower().endswith(".mp4"):
104
  fixed = path[:-4] + ".m4a"
105
  try:
@@ -108,6 +112,7 @@ def transcribe_core(path, model="whisper-1"):
108
  except:
109
  pass
110
 
 
111
  chunks = split_audio(path)
112
  print(f"\n[transcribe_core] === Whisper ่ฝ‰้Œ„ ({len(chunks)} ็‰‡ๆฎต) ===")
113
  raw = []
@@ -123,6 +128,7 @@ def transcribe_core(path, model="whisper-1"):
123
  raw_txt = "\n".join(raw)
124
  print(f"[transcribe_core] ๅŽŸๅง‹่ฝ‰้Œ„: {len(raw_txt)} ๅญ—ๅ…ƒ")
125
 
 
126
  print(f"\n[transcribe_core] === ็ฐก่ฝ‰็น ===")
127
  conv = client.chat.completions.create(
128
  model="gpt-4o-mini",
@@ -135,6 +141,7 @@ def transcribe_core(path, model="whisper-1"):
135
  trad = conv.choices[0].message.content.strip()
136
  print(f"[transcribe_core] โœ… ็น้ซ”่ฝ‰ๆ›ๅฎŒๆˆ: {len(trad)} ๅญ—ๅ…ƒ")
137
 
 
138
  print(f"\n[transcribe_core] === AI ๆ‘˜่ฆ ===")
139
  summ = client.chat.completions.create(
140
  model="gpt-4o-mini",
@@ -155,19 +162,38 @@ def transcribe_core(path, model="whisper-1"):
155
 
156
  # ====== Gradio UI ๅ‡ฝๅผ ======
157
  def transcribe_ui(password, file):
 
158
  print(f"\n๐ŸŒ [UI] ็ถฒ้ ็‰ˆ่ซ‹ๆฑ‚")
159
  if not password or password.strip() != PASSWORD:
160
- return "โŒ Password incorrect", "", ""
161
  if not file:
162
- return "โš ๏ธ No file uploaded", "", ""
163
  try:
 
 
 
164
  path = _extract_effective_path(file)
 
 
 
 
 
 
 
165
  text, summary = transcribe_core(path)
166
- return "โœ… Transcription completed", text, summary
 
 
 
 
 
 
 
167
  except Exception as e:
168
  import traceback
169
- print(f"โŒ [UI] ้Œฏ่ชค:\n{traceback.format_exc()}")
170
- return f"โŒ Error: {e}", "", ""
 
171
 
172
  # ====== ๅปบ็ซ‹ FastAPI ๆ‡‰็”จ ======
173
  fastapi_app = FastAPI()
@@ -252,119 +278,530 @@ async def api_transcribe_sync(request: Request):
252
  content={"status": "error", "error": str(e)}
253
  )
254
 
255
- # ====== Gradio ไป‹้ข ======
256
- with gr.Blocks(theme=gr.themes.Soft(), title="LINE Audio Transcription") as demo:
257
- gr.Markdown("# ๐ŸŽง LINE Audio Transcription & Summary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
- with gr.Tab("๐ŸŒ Web Upload"):
260
- gr.Markdown("### Upload audio file directly from browser")
261
- with gr.Row():
262
- with gr.Column(scale=1):
263
- pw_ui = gr.Textbox(label="Password", type="password")
264
- file_ui = gr.File(label="Upload Audio File", file_types=["audio"])
265
- btn_ui = gr.Button("Start Transcription ๐Ÿš€", variant="primary", size="lg")
266
- with gr.Column(scale=2):
267
- status_ui = gr.Textbox(label="Status", interactive=False)
268
- transcript_ui = gr.Textbox(label="Transcription Result", lines=10)
269
- summary_ui = gr.Textbox(label="AI Summary", lines=6)
270
-
271
- btn_ui.click(transcribe_ui, [pw_ui, file_ui], [status_ui, transcript_ui, summary_ui])
272
 
273
- with gr.Tab("๐Ÿ“ฑ API Documentation"):
274
- gr.Markdown("""
275
- ### ๐Ÿš€ Synchronous API (Recommended for iPhone Shortcuts)
276
-
277
- **Endpoint**: `/api/transcribe` (POST)
278
-
279
- โœ… **ๅฎŒๅ…จๅŒๆญฅ** - ็›ดๆŽฅ่ฟ”ๅ›ž็ตๆžœ,็„ก้œ€่ผช่ฉข
280
-
281
- โœ… **็ฉฉๅฎšๅฏ้ ** - ไธๅ—้Ÿณๆช”้•ทๅบฆๅฝฑ้Ÿฟ,่‡ชๅ‹•็ญ‰ๅพ…ๅฎŒๆˆ
282
-
283
- ---
284
-
285
- #### Request Format (JSON):
286
- ```json
287
- {
288
- "password": "your_password",
289
- "file_data": "data:audio/m4a;base64,UklGR...",
290
- "file_name": "recording.m4a"
291
- }
292
- ```
293
-
294
- #### Response Format:
295
- ```json
296
- {
297
- "status": "success",
298
- "transcription": "่ฝ‰้Œ„ๅ…งๅฎน...",
299
- "summary": "ๆ‘˜่ฆๅ…งๅฎน..."
300
- }
301
- ```
302
-
303
- ---
304
-
305
- ### ๐Ÿ“ฑ iPhone Shortcuts ่จญๅฎš
306
-
307
- **ๅ‹•ไฝœๆต็จ‹:**
308
-
309
- 1. **ๅ–ๅพ—ๆช”ๆกˆ** โ†’ ่ชž้Ÿณๆช”
310
- 2. **Base64 ็ทจ็ขผ**
311
- 3. **ๆ–‡ๅญ—** (็ต„ๅˆ data URL):
312
- ```
313
- data:audio/m4a;base64,Base64็ทจ็ขผ็ตๆžœ
314
- ```
315
- 4. **ๅญ—ๅ…ธ** (่ซ‹ๆฑ‚ๆœฌๆ–‡):
316
- - ้ต: `password`, ๅ€ผ: `chou`
317
- - ้ต: `file_data`, ๅ€ผ: ไธŠไธ€ๆญฅ็š„ๆ–‡ๅญ—
318
- - ้ต: `file_name`, ๅ€ผ: `recording.m4a`
319
- 5. **ๅ–ๅพ— URL ๅ…งๅฎน**:
320
- - URL: `https://ไฝ ็š„็ถฒๅ€/api/transcribe`
321
- - ๆ–นๆณ•: `POST`
322
- - ๆจ™้ ญ: `Content-Type` = `application/json`
323
- - ่ซ‹ๆฑ‚ๆœฌๆ–‡: ไธŠไธ€ๆญฅ็š„ๅญ—ๅ…ธ
324
- - ่ซ‹ๆฑ‚ๆœฌๆ–‡้กžๅž‹: `JSON`
325
- 6. **ๅพžๅญ—ๅ…ธๅ–ๅพ—ๅ€ผ**:
326
- - ้ต: `transcription` โ†’ ่ฝ‰้Œ„็ตๆžœ
327
- - ้ต: `summary` โ†’ ๆ‘˜่ฆ
328
-
329
- ---
330
-
331
- ### ๐Ÿ’ก ้‡่ฆๆ้†’
332
-
333
- - โœ… ้€™ๅ€‹็ซฏ้ปž**ๅฎŒๅ…จๅŒๆญฅ**,ๆœƒ็ญ‰ๅพ…่ฝ‰้Œ„ๅฎŒๆˆๅพŒๆ‰่ฟ”ๅ›ž
334
- - โœ… ็„ก่ซ–้Ÿณๆช”ๅคš้•ท,้ƒฝๆœƒ่‡ชๅ‹•่™•็†ๅฎŒๆˆ
335
- - โœ… ไธ้œ€่ฆ่จญๅฎš็ญ‰ๅพ…ๆ™‚้–“ๆˆ–่ผช่ฉขๆฉŸๅˆถ
336
- - โœ… ็›ดๆŽฅๅ–ๅพ—ๆœ€็ต‚็ตๆžœ,ไธๆœƒๆœ‰ `event_id`
337
-
338
- ### ๐Ÿงช ๆธฌ่ฉฆ API
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
- ไฝฟ็”จ curl ๆธฌ่ฉฆ:
341
- ```bash
342
- curl -X POST https://ไฝ ็š„็ถฒๅ€/api/transcribe \\
343
- -H "Content-Type: application/json" \\
344
- -d '{
345
- "password": "chou",
346
- "file_data": "data:audio/m4a;base64,AAAA...",
347
- "file_name": "test.m4a"
348
- }'
349
- ```
350
- """)
351
-
352
- gr.Markdown("""
353
- ---
354
- ๐Ÿ’ก **Supported Formats**: MP4, M4A, MP3, WAV, OGG, WEBM
355
- ๐Ÿ“ฆ **Max File Size**: 25MB per chunk (auto-split)
356
- ๐Ÿ”’ **Security**: Password-protected
357
- """)
358
-
359
- # ====== ๆŽ›่ผ‰ Gradio ๅˆฐ FastAPI ======
360
- app = gr.mount_gradio_app(fastapi_app, demo, path="/")
361
-
362
- # ====== ๅ•Ÿๅ‹• ======
363
- if __name__ == "__main__":
364
- print("\n" + "="*60)
365
- print("๐Ÿš€ ๅ•Ÿๅ‹• FastAPI + Gradio ๆ‡‰็”จ")
366
- print("๐Ÿ“ฑ ๅŒๆญฅ API: /api/transcribe")
367
- print("๐ŸŒ ็ถฒ้ ไป‹้ข: /")
368
- print("="*60 + "\n")
369
- import uvicorn
370
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  }
24
 
25
  def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
26
+ """ๅฐ‡ data URL ่ฝ‰ๆ›็‚บๆœฌๅœฐๆช”ๆกˆ"""
27
  print(f" โ†’ [_dataurl_to_file] ้–‹ๅง‹่™•็† data URL...")
28
  try:
29
  header, b64 = data_url.split(",", 1)
 
76
 
77
  # ====== ๅˆ†ๆฎต่™•็† ======
78
  def split_audio(path):
79
+ """ๅฐ‡้Ÿณ่จŠๆช”ๆกˆๅˆ†ๅ‰ฒๆˆๅคšๅ€‹ๅฐๆ–ผ 25MB ็š„็‰‡ๆฎต"""
80
  size = os.path.getsize(path)
81
  print(f"[split_audio] ๆช”ๆกˆๅคงๅฐ: {size} bytes ({size/1024/1024:.2f} MB)")
82
  if size <= MAX_SIZE:
 
96
 
97
  # ====== ่ฝ‰้Œ„ๆ ธๅฟƒ ======
98
  def transcribe_core(path, model="whisper-1"):
99
+ """ไฝฟ็”จ Whisper ้€ฒ่กŒ่ชž้Ÿณ่ฝ‰้Œ„๏ผŒไธฆไฝฟ็”จ GPT ้€ฒ่กŒ็น็ฐก่ฝ‰ๆ›ๅ’Œๆ‘˜่ฆ"""
100
  print(f"\n{'='*60}")
101
  print(f"[transcribe_core] ้–‹ๅง‹่ฝ‰้Œ„: {path}")
102
  print(f"{'='*60}")
103
 
104
  start_time = time.time()
105
 
106
+ # ่™•็† MP4 ๆ ผๅผ
107
  if path.lower().endswith(".mp4"):
108
  fixed = path[:-4] + ".m4a"
109
  try:
 
112
  except:
113
  pass
114
 
115
+ # ๅˆ†ๅ‰ฒ้Ÿณ่จŠ
116
  chunks = split_audio(path)
117
  print(f"\n[transcribe_core] === Whisper ่ฝ‰้Œ„ ({len(chunks)} ็‰‡ๆฎต) ===")
118
  raw = []
 
128
  raw_txt = "\n".join(raw)
129
  print(f"[transcribe_core] ๅŽŸๅง‹่ฝ‰้Œ„: {len(raw_txt)} ๅญ—ๅ…ƒ")
130
 
131
+ # ็ฐก่ฝ‰็น
132
  print(f"\n[transcribe_core] === ็ฐก่ฝ‰็น ===")
133
  conv = client.chat.completions.create(
134
  model="gpt-4o-mini",
 
141
  trad = conv.choices[0].message.content.strip()
142
  print(f"[transcribe_core] โœ… ็น้ซ”่ฝ‰ๆ›ๅฎŒๆˆ: {len(trad)} ๅญ—ๅ…ƒ")
143
 
144
+ # AI ๆ‘˜่ฆ
145
  print(f"\n[transcribe_core] === AI ๆ‘˜่ฆ ===")
146
  summ = client.chat.completions.create(
147
  model="gpt-4o-mini",
 
162
 
163
  # ====== Gradio UI ๅ‡ฝๅผ ======
164
  def transcribe_ui(password, file):
165
+ """็ถฒ้ ็•Œ้ข็š„่ฝ‰้Œ„่™•็†ๅ‡ฝๅผ"""
166
  print(f"\n๐ŸŒ [UI] ็ถฒ้ ็‰ˆ่ซ‹ๆฑ‚")
167
  if not password or password.strip() != PASSWORD:
168
+ return "๐Ÿ”’ Authentication", "โŒ Incorrect password. Please check and try again.", "", ""
169
  if not file:
170
+ return "โš ๏ธ No File", "Please upload an audio file first.", "", ""
171
  try:
172
+ # ๆ›ดๆ–ฐ็‹€ๆ…‹็‚บ่™•็†ไธญ
173
+ yield "โณ Processing", "๐ŸŽต Audio file received, starting transcription...", "", ""
174
+
175
  path = _extract_effective_path(file)
176
+
177
+ # ็ฒๅ–ๆ–‡ไปถไฟกๆฏ
178
+ file_size = os.path.getsize(path)
179
+ file_size_mb = file_size / 1024 / 1024
180
+
181
+ yield "๐ŸŽฏ Transcribing", f"๐Ÿ“Š File size: {file_size_mb:.2f} MB\n๐Ÿ”„ Processing with Whisper AI...", "", ""
182
+
183
  text, summary = transcribe_core(path)
184
+
185
+ # ่จˆ็ฎ—ๅญ—ๆ•ธ
186
+ char_count = len(text)
187
+ word_estimate = char_count // 2 # ไธญๆ–‡ไผฐ็ฎ—
188
+
189
+ status_msg = f"โœ… Transcription Complete\n๐Ÿ“ {char_count} characters ({word_estimate} words approx.)"
190
+
191
+ return "โœ… Success", status_msg, text, summary
192
  except Exception as e:
193
  import traceback
194
+ error_trace = traceback.format_exc()
195
+ print(f"โŒ [UI] ้Œฏ่ชค:\n{error_trace}")
196
+ return "โŒ Error", f"An error occurred during processing:\n{str(e)}", "", ""
197
 
198
  # ====== ๅปบ็ซ‹ FastAPI ๆ‡‰็”จ ======
199
  fastapi_app = FastAPI()
 
278
  content={"status": "error", "error": str(e)}
279
  )
280
 
281
+ # ====== ่‡ชๅฎš็พฉ CSS ======
282
+ custom_css = """
283
+ /* ๅ…จๅฑ€ๆจฃๅผ */
284
+ .gradio-container {
285
+ max-width: 1400px !important;
286
+ margin: auto !important;
287
+ }
288
+
289
+ /* ๆจ™้กŒๅ€ๅŸŸ */
290
+ .main-title {
291
+ text-align: center;
292
+ padding: 2rem 0;
293
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
294
+ border-radius: 15px;
295
+ margin-bottom: 2rem;
296
+ color: white;
297
+ }
298
+
299
+ .main-title h1 {
300
+ font-size: 2.5rem;
301
+ margin-bottom: 0.5rem;
302
+ font-weight: 700;
303
+ }
304
+
305
+ .main-title p {
306
+ font-size: 1.1rem;
307
+ opacity: 0.9;
308
+ }
309
+
310
+ /* ๅก็‰‡ๆจฃๅผ */
311
+ .upload-card, .result-card {
312
+ background: white;
313
+ border-radius: 12px;
314
+ padding: 1.5rem;
315
+ box-shadow: 0 4px 6px rgba(0,0,0,0.07);
316
+ margin-bottom: 1.5rem;
317
+ }
318
+
319
+ /* ๆŒ‰้ˆ•ๆจฃๅผ */
320
+ .custom-button {
321
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
322
+ border: none !important;
323
+ color: white !important;
324
+ font-weight: 600 !important;
325
+ padding: 0.75rem 2rem !important;
326
+ font-size: 1.1rem !important;
327
+ border-radius: 8px !important;
328
+ transition: transform 0.2s !important;
329
+ }
330
+
331
+ .custom-button:hover {
332
+ transform: translateY(-2px) !important;
333
+ box-shadow: 0 6px 12px rgba(102, 126, 234, 0.4) !important;
334
+ }
335
+
336
+ /* ็‹€ๆ…‹ๆจ™็ฑค */
337
+ .status-badge {
338
+ display: inline-block;
339
+ padding: 0.5rem 1rem;
340
+ border-radius: 20px;
341
+ font-weight: 600;
342
+ margin-bottom: 0.5rem;
343
+ }
344
+
345
+ .status-success { background: #10b981; color: white; }
346
+ .status-processing { background: #3b82f6; color: white; }
347
+ .status-error { background: #ef4444; color: white; }
348
+ .status-warning { background: #f59e0b; color: white; }
349
+
350
+ /* ๆ–‡ๅญ—ๅ€ๅŸŸ */
351
+ textarea {
352
+ border: 2px solid #e5e7eb !important;
353
+ border-radius: 8px !important;
354
+ font-size: 0.95rem !important;
355
+ line-height: 1.6 !important;
356
+ }
357
+
358
+ /* ๆช”ๆกˆไธŠๅ‚ณๅ€ๅŸŸ */
359
+ .file-upload {
360
+ border: 2px dashed #d1d5db !important;
361
+ border-radius: 12px !important;
362
+ padding: 2rem !important;
363
+ text-align: center !important;
364
+ transition: all 0.3s !important;
365
+ }
366
+
367
+ .file-upload:hover {
368
+ border-color: #667eea !important;
369
+ background: #f9fafb !important;
370
+ }
371
+
372
+ /* ่ณ‡่จŠๅก็‰‡ */
373
+ .info-card {
374
+ background: #f0f9ff;
375
+ border-left: 4px solid #3b82f6;
376
+ padding: 1rem;
377
+ border-radius: 8px;
378
+ margin: 1rem 0;
379
+ }
380
+
381
+ /* Tab ๆจฃๅผ */
382
+ .tab-nav button {
383
+ font-size: 1.05rem !important;
384
+ font-weight: 600 !important;
385
+ padding: 0.75rem 1.5rem !important;
386
+ }
387
+
388
+ .tab-nav button.selected {
389
+ border-bottom: 3px solid #667eea !important;
390
+ }
391
+
392
+ /* ็จ‹ๅผ็ขผๅ€ๅกŠ */
393
+ pre {
394
+ background: #1f2937 !important;
395
+ color: #f3f4f6 !important;
396
+ padding: 1rem !important;
397
+ border-radius: 8px !important;
398
+ overflow-x: auto !important;
399
+ font-size: 0.9rem !important;
400
+ }
401
+
402
+ code {
403
+ background: #1f2937 !important;
404
+ color: #f3f4f6 !important;
405
+ padding: 0.2rem 0.4rem !important;
406
+ border-radius: 4px !important;
407
+ font-family: 'Monaco', 'Menlo', monospace !important;
408
+ }
409
+
410
+ /* ๅŠŸ่ƒฝๅˆ—่กจ */
411
+ .feature-list {
412
+ display: grid;
413
+ grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
414
+ gap: 1rem;
415
+ margin: 1.5rem 0;
416
+ }
417
+
418
+ .feature-item {
419
+ background: white;
420
+ padding: 1.25rem;
421
+ border-radius: 10px;
422
+ border: 1px solid #e5e7eb;
423
+ transition: all 0.3s;
424
+ }
425
+
426
+ .feature-item:hover {
427
+ transform: translateY(-4px);
428
+ box-shadow: 0 8px 16px rgba(0,0,0,0.1);
429
+ }
430
+
431
+ .feature-icon {
432
+ font-size: 2rem;
433
+ margin-bottom: 0.5rem;
434
+ }
435
+
436
+ /* ้Ÿฟๆ‡‰ๅผ่จญ่จˆ */
437
+ @media (max-width: 768px) {
438
+ .main-title h1 { font-size: 1.8rem; }
439
+ .main-title p { font-size: 1rem; }
440
+ }
441
+ """
442
+
443
+ # ====== ๅปบ็ซ‹ Gradio ไป‹้ข ======
444
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription Service") as demo:
445
 
446
+ # ไธปๆจ™้กŒ
447
+ gr.HTML("""
448
+ <div class="main-title">
449
+ <h1>๐ŸŽง Audio Transcription Service</h1>
450
+ <p>AI-Powered Speech-to-Text with Smart Summarization</p>
451
+ </div>
452
+ """)
 
 
 
 
 
 
453
 
454
+ with gr.Tabs() as tabs:
455
+ # ====== Tab 1: Web Upload ======
456
+ with gr.Tab("๐ŸŒ Web Interface", id="upload"):
457
+ gr.Markdown("### Upload and transcribe audio files directly from your browser")
458
+
459
+ with gr.Row():
460
+ # ๅทฆๅด๏ผšไธŠๅ‚ณๅ€ๅŸŸ
461
+ with gr.Column(scale=1):
462
+ gr.HTML('<div class="upload-card">')
463
+ gr.Markdown("#### ๐Ÿ” Authentication")
464
+ pw_ui = gr.Textbox(
465
+ label="Password",
466
+ type="password",
467
+ placeholder="Enter your password...",
468
+ show_label=False
469
+ )
470
+
471
+ gr.Markdown("#### ๐Ÿ“ Upload Audio File")
472
+ file_ui = gr.File(
473
+ label="",
474
+ file_types=["audio", ".mp4"],
475
+ file_count="single",
476
+ show_label=False
477
+ )
478
+
479
+ gr.Markdown("""
480
+ <div class="info-card">
481
+ <strong>๐Ÿ’ก Supported Formats:</strong><br>
482
+ MP3, M4A, WAV, OGG, WEBM, MP4
483
+ </div>
484
+ """)
485
+
486
+ btn_ui = gr.Button(
487
+ "๐Ÿš€ Start Transcription",
488
+ variant="primary",
489
+ size="lg",
490
+ elem_classes="custom-button"
491
+ )
492
+ gr.HTML('</div>')
493
+
494
+ # ๅณๅด๏ผš็ตๆžœๅ€ๅŸŸ
495
+ with gr.Column(scale=2):
496
+ gr.HTML('<div class="result-card">')
497
+ gr.Markdown("#### ๐Ÿ“Š Processing Status")
498
+ status_label = gr.Textbox(
499
+ label="",
500
+ value="โธ๏ธ Ready",
501
+ interactive=False,
502
+ show_label=False,
503
+ max_lines=1
504
+ )
505
+ status_detail = gr.Textbox(
506
+ label="",
507
+ value="Upload an audio file and click 'Start Transcription' to begin",
508
+ interactive=False,
509
+ show_label=False,
510
+ lines=2
511
+ )
512
+
513
+ gr.Markdown("#### ๐Ÿ“ Transcription Result")
514
+ transcript_ui = gr.Textbox(
515
+ label="",
516
+ lines=12,
517
+ placeholder="Transcription will appear here...",
518
+ show_label=False,
519
+ show_copy_button=True
520
+ )
521
+
522
+ gr.Markdown("#### ๐Ÿ’ก AI Summary")
523
+ summary_ui = gr.Textbox(
524
+ label="",
525
+ lines=6,
526
+ placeholder="AI-generated summary will appear here...",
527
+ show_label=False,
528
+ show_copy_button=True
529
+ )
530
+ gr.HTML('</div>')
531
+
532
+ # ็ถๅฎšไบ‹ไปถ
533
+ btn_ui.click(
534
+ transcribe_ui,
535
+ inputs=[pw_ui, file_ui],
536
+ outputs=[status_label, status_detail, transcript_ui, summary_ui]
537
+ )
538
 
539
+ # ====== Tab 2: API Documentation ======
540
+ with gr.Tab("๐Ÿ“ฑ API Documentation", id="api"):
541
+ gr.Markdown("""
542
+ ## ๐Ÿš€ API Overview
543
+
544
+ This service provides a **synchronous REST API** for audio transcription, perfect for integration with iPhone Shortcuts, mobile apps, or any HTTP client.
545
+ """)
546
+
547
+ gr.HTML("""
548
+ <div class="feature-list">
549
+ <div class="feature-item">
550
+ <div class="feature-icon">โšก</div>
551
+ <h3>Fully Synchronous</h3>
552
+ <p>Returns complete results in a single request - no polling required</p>
553
+ </div>
554
+ <div class="feature-item">
555
+ <div class="feature-icon">๐Ÿ”„</div>
556
+ <h3>Auto-Processing</h3>
557
+ <p>Handles files of any length automatically with intelligent chunking</p>
558
+ </div>
559
+ <div class="feature-item">
560
+ <div class="feature-icon">๐Ÿ›ก๏ธ</div>
561
+ <h3>Reliable & Stable</h3>
562
+ <p>Waits for complete processing before returning results</p>
563
+ </div>
564
+ <div class="feature-item">
565
+ <div class="feature-icon">๐ŸŒ</div>
566
+ <h3>Universal Access</h3>
567
+ <p>Works with any HTTP client or programming language</p>
568
+ </div>
569
+ </div>
570
+ """)
571
+
572
+ gr.Markdown("""
573
+ ---
574
+
575
+ ## ๐Ÿ“ก API Endpoint
576
+
577
+ **URL:** `/api/transcribe`
578
+ **Method:** `POST`
579
+ **Content-Type:** `application/json`
580
+
581
+ ### Request Format
582
+
583
+ ```json
584
+ {
585
+ "password": "your_password_here",
586
+ "file_data": "data:audio/m4a;base64,UklGRiQAAABXQVZFZm10...",
587
+ "file_name": "recording.m4a"
588
+ }
589
+ ```
590
+
591
+ **Parameters:**
592
+ - `password` (string, required): Authentication password
593
+ - `file_data` (string, required): Base64-encoded audio file in data URL format
594
+ - `file_name` (string, optional): Original filename (default: "recording.m4a")
595
+
596
+ ### Response Format
597
+
598
+ **Success Response (200 OK):**
599
+ ```json
600
+ {
601
+ "status": "success",
602
+ "transcription": "ๅฎŒๆ•ด็š„่ชž้Ÿณ่ฝ‰ๆ–‡ๅญ—ๅ…งๅฎน...",
603
+ "summary": "AI ็”Ÿๆˆ็š„ๅ…งๅฎนๆ‘˜่ฆ..."
604
+ }
605
+ ```
606
+
607
+ **Error Response (401/400/500):**
608
+ ```json
609
+ {
610
+ "status": "error",
611
+ "error": "Error message description"
612
+ }
613
+ ```
614
+
615
+ ---
616
+
617
+ ## ๐Ÿ“ฑ iPhone Shortcuts Setup Guide
618
+
619
+ ### Step-by-Step Configuration:
620
+
621
+ 1. **Get File** โ†’ Select your audio recording
622
+
623
+ 2. **Base64 Encode** โ†’ Encode the file content
624
+
625
+ 3. **Text** โ†’ Create data URL format:
626
+ ```
627
+ data:audio/m4a;base64,[Base64 Encode Result]
628
+ ```
629
+
630
+ 4. **Dictionary** โ†’ Build request body:
631
+ - Key: `password`, Value: `chou`
632
+ - Key: `file_data`, Value: [Text from step 3]
633
+ - Key: `file_name`, Value: `recording.m4a`
634
+
635
+ 5. **Get Contents of URL**:
636
+ - URL: `https://your-domain.com/api/transcribe`
637
+ - Method: `POST`
638
+ - Headers:
639
+ - `Content-Type`: `application/json`
640
+ - Request Body: [Dictionary from step 4]
641
+ - Request Body Type: `JSON`
642
+
643
+ 6. **Get Dictionary Value**:
644
+ - Key: `transcription` โ†’ Get transcription result
645
+ - Key: `summary` โ†’ Get AI summary
646
+
647
+ 7. **Show Result** or **Copy to Clipboard**
648
+
649
+ ---
650
+
651
+ ## ๐Ÿงช Testing the API
652
+
653
+ ### Using cURL:
654
+
655
+ ```bash
656
+ curl -X POST https://your-domain.com/api/transcribe \\
657
+ -H "Content-Type: application/json" \\
658
+ -d '{
659
+ "password": "chou",
660
+ "file_data": "data:audio/m4a;base64,AAAA...",
661
+ "file_name": "test.m4a"
662
+ }'
663
+ ```
664
+
665
+ ### Using Python:
666
+
667
+ ```python
668
+ import requests
669
+ import base64
670
+
671
+ # Read and encode audio file
672
+ with open("audio.m4a", "rb") as f:
673
+ audio_b64 = base64.b64encode(f.read()).decode()
674
+
675
+ # Prepare request
676
+ url = "https://your-domain.com/api/transcribe"
677
+ payload = {
678
+ "password": "chou",
679
+ "file_data": f"data:audio/m4a;base64,{audio_b64}",
680
+ "file_name": "audio.m4a"
681
+ }
682
+
683
+ # Send request
684
+ response = requests.post(url, json=payload)
685
+ result = response.json()
686
+
687
+ if result["status"] == "success":
688
+ print("Transcription:", result["transcription"])
689
+ print("Summary:", result["summary"])
690
+ else:
691
+ print("Error:", result["error"])
692
+ ```
693
+
694
+ ### Using JavaScript (Node.js):
695
+
696
+ ```javascript
697
+ const fs = require('fs');
698
+ const axios = require('axios');
699
+
700
+ // Read and encode audio file
701
+ const audioBuffer = fs.readFileSync('audio.m4a');
702
+ const audioB64 = audioBuffer.toString('base64');
703
+
704
+ // Send request
705
+ axios.post('https://your-domain.com/api/transcribe', {
706
+ password: 'chou',
707
+ file_data: `data:audio/m4a;base64,${audioB64}`,
708
+ file_name: 'audio.m4a'
709
+ })
710
+ .then(response => {
711
+ const { transcription, summary } = response.data;
712
+ console.log('Transcription:', transcription);
713
+ console.log('Summary:', summary);
714
+ })
715
+ .catch(error => {
716
+ console.error('Error:', error.response.data);
717
+ });
718
+ ```
719
+
720
+ ---
721
+
722
+ ## โš™๏ธ Technical Specifications
723
+
724
+ | Feature | Details |
725
+ |---------|---------|
726
+ | **Max File Size** | 25 MB per chunk (automatically splits larger files) |
727
+ | **Supported Formats** | MP3, M4A, MP4, WAV, OGG, WEBM, AAC, OPUS |
728
+ | **Processing Model** | OpenAI Whisper (high accuracy) |
729
+ | **Summary Model** | GPT-4o-mini (intelligent summarization) |
730
+ | **Language Support** | Traditional Chinese (Taiwan) output |
731
+ | **Response Time** | Varies by file length (typically 5-30 seconds) |
732
+ | **Authentication** | Password-based security |
733
+
734
+ ---
735
+
736
+ ## ๐Ÿ’ก Important Notes
737
+
738
+ - โœ… **Fully synchronous:** The API waits for complete processing before responding
739
+ - โœ… **No polling needed:** Single request returns final results
740
+ - โœ… **Auto-chunking:** Large files are automatically split and processed
741
+ - โœ… **Reliable:** Connection remains open until processing completes
742
+ - โš ๏ธ **Timeout considerations:** Ensure your HTTP client has sufficient timeout settings (recommended: 300 seconds)
743
+ - ๐Ÿ”’ **Security:** Always use HTTPS in production environments
744
+
745
+ ---
746
+
747
+ ## ๐Ÿ†˜ Troubleshooting
748
+
749
+ **Problem:** 401 Unauthorized
750
+ **Solution:** Check that your password is correct
751
+
752
+ **Problem:** 400 Bad Request
753
+ **Solution:** Verify that `file_data` is in correct data URL format
754
+
755
+ **Problem:** 500 Internal Server Error
756
+ **Solution:** Check server logs for details; ensure audio file is valid
757
+
758
+ **Problem:** Request timeout
759
+ **Solution:** Increase HTTP client timeout setting or split audio into smaller files
760
+
761
+ **Problem:** Base64 encoding issues
762
+ **Solution:** Ensure proper encoding and data URL format: `data:audio/m4a;base64,[encoded_data]`
763
+
764
+ ---
765
+
766
+ ## ๐Ÿ“Š Example Response Times
767
+
768
+ | File Duration | Approximate Processing Time |
769
+ |---------------|---------------------------|
770
+ | 0-30 seconds | 5-10 seconds |
771
+ | 30-60 seconds | 10-15 seconds |
772
+ | 1-3 minutes | 15-30 seconds |
773
+ | 3-5 minutes | 30-60 seconds |
774
+ | 5-10 minutes | 1-2 minutes |
775
+
776
+ *Note: Processing time includes transcription, language conversion, and AI summarization.*
777
+
778
+ ---
779
+
780
+ ## ๐Ÿ”— Integration Examples
781
+
782
+ ### Zapier Integration
783
+ 1. Trigger: New file in storage
784
+ 2. Action: Webhooks by Zapier (POST request)
785
+ 3. Configure endpoint with password and base64 encoded file
786
+
787
+ ### Make.com Integration
788
+ 1. Add HTTP module
789
+ 2. Configure POST request with JSON payload
790
+ 3. Parse response and route to desired action
791
+
792
+ ### iOS Shortcuts Tips
793
+ - Use "Get Contents of URL" action
794
+ - Set request timeout to at least 120 seconds
795
+ - Add error handling for network issues
796
+ - Consider showing progress notification
797
+
798
+ ---
799
+
800
+ ## ๐Ÿ“ž Support & Resources
801
+
802
+ For additional help or feature requests, please contact your service administrator.
803
+
804
+ **Useful Links:**
805
+ - OpenAI Whisper Documentation
806
+ - Base64 Encoding Tools
807
+ - iPhone Shortcuts Gallery