MichaelChou0806 commited on
Commit
bc06406
·
verified ·
1 Parent(s): 35b1d50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -487
app.py CHANGED
@@ -165,35 +165,17 @@ def transcribe_ui(password, file):
165
  """網頁界面的轉錄處理函式"""
166
  print(f"\n🌐 [UI] 網頁版請求")
167
  if not password or password.strip() != PASSWORD:
168
- return "🔒 Authentication", "Incorrect password. Please check and try again.", "", ""
169
  if not file:
170
- return "⚠️ No File", "Please upload an audio file first.", "", ""
171
  try:
172
- # 更新狀態為處理中
173
- yield "⏳ Processing", "🎵 Audio file received, starting transcription...", "", ""
174
-
175
  path = _extract_effective_path(file)
176
-
177
- # 獲取文件信息
178
- file_size = os.path.getsize(path)
179
- file_size_mb = file_size / 1024 / 1024
180
-
181
- yield "🎯 Transcribing", f"📊 File size: {file_size_mb:.2f} MB\n🔄 Processing with Whisper AI...", "", ""
182
-
183
  text, summary = transcribe_core(path)
184
-
185
- # 計算字數
186
- char_count = len(text)
187
- word_estimate = char_count // 2 # 中文估算
188
-
189
- status_msg = f"✅ Transcription Complete\n📝 {char_count} characters ({word_estimate} words approx.)"
190
-
191
- return "✅ Success", status_msg, text, summary
192
  except Exception as e:
193
  import traceback
194
- error_trace = traceback.format_exc()
195
- print(f"❌ [UI] 錯誤:\n{error_trace}")
196
- return "❌ Error", f"An error occurred during processing:\n{str(e)}", "", ""
197
 
198
  # ====== 建立 FastAPI 應用 ======
199
  fastapi_app = FastAPI()
@@ -280,528 +262,214 @@ async def api_transcribe_sync(request: Request):
280
 
281
  # ====== 自定義 CSS ======
282
  custom_css = """
283
- /* 全局樣式 */
284
  .gradio-container {
285
- max-width: 1400px !important;
286
  margin: auto !important;
287
  }
288
 
289
- /* 標題區域 */
290
- .main-title {
291
  text-align: center;
292
- padding: 2rem 0;
293
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
294
- border-radius: 15px;
295
  margin-bottom: 2rem;
296
  color: white;
297
  }
298
 
299
- .main-title h1 {
300
- font-size: 2.5rem;
301
- margin-bottom: 0.5rem;
302
  font-weight: 700;
303
  }
304
 
305
- .main-title p {
306
- font-size: 1.1rem;
307
- opacity: 0.9;
308
- }
309
-
310
- /* 卡片樣式 */
311
- .upload-card, .result-card {
312
- background: white;
313
- border-radius: 12px;
314
- padding: 1.5rem;
315
- box-shadow: 0 4px 6px rgba(0,0,0,0.07);
316
- margin-bottom: 1.5rem;
317
  }
318
 
319
- /* 按鈕樣式 */
320
- .custom-button {
321
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
322
  border: none !important;
323
  color: white !important;
324
  font-weight: 600 !important;
325
- padding: 0.75rem 2rem !important;
326
- font-size: 1.1rem !important;
327
- border-radius: 8px !important;
328
- transition: transform 0.2s !important;
329
- }
330
-
331
- .custom-button:hover {
332
- transform: translateY(-2px) !important;
333
- box-shadow: 0 6px 12px rgba(102, 126, 234, 0.4) !important;
334
- }
335
-
336
- /* 狀態標籤 */
337
- .status-badge {
338
- display: inline-block;
339
- padding: 0.5rem 1rem;
340
- border-radius: 20px;
341
- font-weight: 600;
342
- margin-bottom: 0.5rem;
343
  }
344
 
345
- .status-success { background: #10b981; color: white; }
346
- .status-processing { background: #3b82f6; color: white; }
347
- .status-error { background: #ef4444; color: white; }
348
- .status-warning { background: #f59e0b; color: white; }
349
-
350
- /* 文字區域 */
351
  textarea {
352
- border: 2px solid #e5e7eb !important;
353
- border-radius: 8px !important;
354
  font-size: 0.95rem !important;
355
  line-height: 1.6 !important;
356
  }
357
 
358
- /* 檔案上傳區域 */
359
- .file-upload {
360
- border: 2px dashed #d1d5db !important;
361
- border-radius: 12px !important;
362
- padding: 2rem !important;
363
- text-align: center !important;
364
- transition: all 0.3s !important;
365
- }
366
-
367
- .file-upload:hover {
368
- border-color: #667eea !important;
369
- background: #f9fafb !important;
370
- }
371
-
372
  /* 資訊卡片 */
373
- .info-card {
374
  background: #f0f9ff;
375
  border-left: 4px solid #3b82f6;
376
  padding: 1rem;
377
- border-radius: 8px;
378
  margin: 1rem 0;
 
379
  }
380
 
381
- /* Tab 樣式 */
382
- .tab-nav button {
383
- font-size: 1.05rem !important;
384
- font-weight: 600 !important;
385
- padding: 0.75rem 1.5rem !important;
386
- }
387
-
388
- .tab-nav button.selected {
389
- border-bottom: 3px solid #667eea !important;
390
- }
391
-
392
- /* 程式碼區塊 */
393
  pre {
394
  background: #1f2937 !important;
395
  color: #f3f4f6 !important;
396
  padding: 1rem !important;
397
- border-radius: 8px !important;
398
- overflow-x: auto !important;
399
- font-size: 0.9rem !important;
400
  }
401
 
402
  code {
403
- background: #1f2937 !important;
404
- color: #f3f4f6 !important;
405
  padding: 0.2rem 0.4rem !important;
406
- border-radius: 4px !important;
407
- font-family: 'Monaco', 'Menlo', monospace !important;
408
- }
409
-
410
- /* 功能列表 */
411
- .feature-list {
412
- display: grid;
413
- grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
414
- gap: 1rem;
415
- margin: 1.5rem 0;
416
- }
417
-
418
- .feature-item {
419
- background: white;
420
- padding: 1.25rem;
421
- border-radius: 10px;
422
- border: 1px solid #e5e7eb;
423
- transition: all 0.3s;
424
- }
425
-
426
- .feature-item:hover {
427
- transform: translateY(-4px);
428
- box-shadow: 0 8px 16px rgba(0,0,0,0.1);
429
- }
430
-
431
- .feature-icon {
432
- font-size: 2rem;
433
- margin-bottom: 0.5rem;
434
- }
435
-
436
- /* 響應式設計 */
437
- @media (max-width: 768px) {
438
- .main-title h1 { font-size: 1.8rem; }
439
- .main-title p { font-size: 1rem; }
440
  }
441
  """
442
 
443
  # ====== 建立 Gradio 介面 ======
444
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription Service") as demo:
445
 
446
- # 主標題
447
  gr.HTML("""
448
- <div class="main-title">
449
  <h1>🎧 Audio Transcription Service</h1>
450
- <p>AI-Powered Speech-to-Text with Smart Summarization</p>
451
  </div>
452
  """)
453
 
454
- with gr.Tabs() as tabs:
455
- # ====== Tab 1: Web Upload ======
456
- with gr.Tab("🌐 Web Interface", id="upload"):
457
- gr.Markdown("### Upload and transcribe audio files directly from your browser")
458
-
459
  with gr.Row():
460
- # 左側:上傳區域
461
  with gr.Column(scale=1):
462
- gr.HTML('<div class="upload-card">')
463
- gr.Markdown("#### 🔐 Authentication")
464
- pw_ui = gr.Textbox(
465
- label="Password",
466
- type="password",
467
- placeholder="Enter your password...",
468
- show_label=False
469
- )
470
 
471
- gr.Markdown("#### 📁 Upload Audio File")
472
- file_ui = gr.File(
473
- label="",
474
- file_types=["audio", ".mp4"],
475
- file_count="single",
476
- show_label=False
477
- )
478
-
479
- gr.Markdown("""
480
- <div class="info-card">
481
- <strong>💡 Supported Formats:</strong><br>
482
- MP3, M4A, WAV, OGG, WEBM, MP4
483
- </div>
484
  """)
485
-
486
- btn_ui = gr.Button(
487
- "🚀 Start Transcription",
488
- variant="primary",
489
- size="lg",
490
- elem_classes="custom-button"
491
- )
492
- gr.HTML('</div>')
493
 
494
- # 右側:結果區域
495
  with gr.Column(scale=2):
496
- gr.HTML('<div class="result-card">')
497
- gr.Markdown("#### 📊 Processing Status")
498
- status_label = gr.Textbox(
499
- label="",
500
- value="⏸️ Ready",
501
- interactive=False,
502
- show_label=False,
503
- max_lines=1
504
- )
505
- status_detail = gr.Textbox(
506
- label="",
507
- value="Upload an audio file and click 'Start Transcription' to begin",
508
- interactive=False,
509
- show_label=False,
510
- lines=2
511
- )
512
-
513
- gr.Markdown("#### 📝 Transcription Result")
514
- transcript_ui = gr.Textbox(
515
- label="",
516
- lines=12,
517
- placeholder="Transcription will appear here...",
518
- show_label=False,
519
- show_copy_button=True
520
- )
521
-
522
- gr.Markdown("#### 💡 AI Summary")
523
- summary_ui = gr.Textbox(
524
- label="",
525
- lines=6,
526
- placeholder="AI-generated summary will appear here...",
527
- show_label=False,
528
- show_copy_button=True
529
- )
530
- gr.HTML('</div>')
531
 
532
- # 綁定事件
533
- btn_ui.click(
534
- transcribe_ui,
535
- inputs=[pw_ui, file_ui],
536
- outputs=[status_label, status_detail, transcript_ui, summary_ui]
537
- )
538
 
539
- # ====== Tab 2: API Documentation ======
540
- with gr.Tab("📱 API Documentation", id="api"):
541
  gr.Markdown("""
542
- ## 🚀 API Overview
543
-
544
- This service provides a **synchronous REST API** for audio transcription, perfect for integration with iPhone Shortcuts, mobile apps, or any HTTP client.
545
- """)
546
-
547
- gr.HTML("""
548
- <div class="feature-list">
549
- <div class="feature-item">
550
- <div class="feature-icon">⚡</div>
551
- <h3>Fully Synchronous</h3>
552
- <p>Returns complete results in a single request - no polling required</p>
553
- </div>
554
- <div class="feature-item">
555
- <div class="feature-icon">🔄</div>
556
- <h3>Auto-Processing</h3>
557
- <p>Handles files of any length automatically with intelligent chunking</p>
558
- </div>
559
- <div class="feature-item">
560
- <div class="feature-icon">🛡️</div>
561
- <h3>Reliable & Stable</h3>
562
- <p>Waits for complete processing before returning results</p>
563
- </div>
564
- <div class="feature-item">
565
- <div class="feature-icon">🌍</div>
566
- <h3>Universal Access</h3>
567
- <p>Works with any HTTP client or programming language</p>
568
- </div>
569
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  """)
571
-
572
- gr.Markdown("""
573
- ---
574
-
575
- ## 📡 API Endpoint
576
-
577
- **URL:** `/api/transcribe`
578
- **Method:** `POST`
579
- **Content-Type:** `application/json`
580
-
581
- ### Request Format
582
-
583
- ```json
584
- {
585
- "password": "your_password_here",
586
- "file_data": "data:audio/m4a;base64,UklGRiQAAABXQVZFZm10...",
587
- "file_name": "recording.m4a"
588
- }
589
- ```
590
-
591
- **Parameters:**
592
- - `password` (string, required): Authentication password
593
- - `file_data` (string, required): Base64-encoded audio file in data URL format
594
- - `file_name` (string, optional): Original filename (default: "recording.m4a")
595
-
596
- ### Response Format
597
-
598
- **Success Response (200 OK):**
599
- ```json
600
- {
601
- "status": "success",
602
- "transcription": "完整的語音轉文字內容...",
603
- "summary": "AI 生成的內容摘要..."
604
- }
605
- ```
606
-
607
- **Error Response (401/400/500):**
608
- ```json
609
- {
610
- "status": "error",
611
- "error": "Error message description"
612
- }
613
- ```
614
-
615
- ---
616
-
617
- ## 📱 iPhone Shortcuts Setup Guide
618
-
619
- ### Step-by-Step Configuration:
620
-
621
- 1. **Get File** → Select your audio recording
622
-
623
- 2. **Base64 Encode** → Encode the file content
624
-
625
- 3. **Text** → Create data URL format:
626
- ```
627
- data:audio/m4a;base64,[Base64 Encode Result]
628
- ```
629
-
630
- 4. **Dictionary** → Build request body:
631
- - Key: `password`, Value: `chou`
632
- - Key: `file_data`, Value: [Text from step 3]
633
- - Key: `file_name`, Value: `recording.m4a`
634
-
635
- 5. **Get Contents of URL**:
636
- - URL: `https://your-domain.com/api/transcribe`
637
- - Method: `POST`
638
- - Headers:
639
- - `Content-Type`: `application/json`
640
- - Request Body: [Dictionary from step 4]
641
- - Request Body Type: `JSON`
642
-
643
- 6. **Get Dictionary Value**:
644
- - Key: `transcription` → Get transcription result
645
- - Key: `summary` → Get AI summary
646
-
647
- 7. **Show Result** or **Copy to Clipboard**
648
-
649
- ---
650
-
651
- ## 🧪 Testing the API
652
-
653
- ### Using cURL:
654
-
655
- ```bash
656
- curl -X POST https://your-domain.com/api/transcribe \\
657
- -H "Content-Type: application/json" \\
658
- -d '{
659
- "password": "chou",
660
- "file_data": "data:audio/m4a;base64,AAAA...",
661
- "file_name": "test.m4a"
662
- }'
663
- ```
664
-
665
- ### Using Python:
666
-
667
- ```python
668
- import requests
669
- import base64
670
-
671
- # Read and encode audio file
672
- with open("audio.m4a", "rb") as f:
673
- audio_b64 = base64.b64encode(f.read()).decode()
674
-
675
- # Prepare request
676
- url = "https://your-domain.com/api/transcribe"
677
- payload = {
678
- "password": "chou",
679
- "file_data": f"data:audio/m4a;base64,{audio_b64}",
680
- "file_name": "audio.m4a"
681
- }
682
-
683
- # Send request
684
- response = requests.post(url, json=payload)
685
- result = response.json()
686
-
687
- if result["status"] == "success":
688
- print("Transcription:", result["transcription"])
689
- print("Summary:", result["summary"])
690
- else:
691
- print("Error:", result["error"])
692
- ```
693
-
694
- ### Using JavaScript (Node.js):
695
-
696
- ```javascript
697
- const fs = require('fs');
698
- const axios = require('axios');
699
-
700
- // Read and encode audio file
701
- const audioBuffer = fs.readFileSync('audio.m4a');
702
- const audioB64 = audioBuffer.toString('base64');
703
-
704
- // Send request
705
- axios.post('https://your-domain.com/api/transcribe', {
706
- password: 'chou',
707
- file_data: `data:audio/m4a;base64,${audioB64}`,
708
- file_name: 'audio.m4a'
709
- })
710
- .then(response => {
711
- const { transcription, summary } = response.data;
712
- console.log('Transcription:', transcription);
713
- console.log('Summary:', summary);
714
- })
715
- .catch(error => {
716
- console.error('Error:', error.response.data);
717
- });
718
- ```
719
-
720
- ---
721
-
722
- ## ⚙️ Technical Specifications
723
-
724
- | Feature | Details |
725
- |---------|---------|
726
- | **Max File Size** | 25 MB per chunk (automatically splits larger files) |
727
- | **Supported Formats** | MP3, M4A, MP4, WAV, OGG, WEBM, AAC, OPUS |
728
- | **Processing Model** | OpenAI Whisper (high accuracy) |
729
- | **Summary Model** | GPT-4o-mini (intelligent summarization) |
730
- | **Language Support** | Traditional Chinese (Taiwan) output |
731
- | **Response Time** | Varies by file length (typically 5-30 seconds) |
732
- | **Authentication** | Password-based security |
733
-
734
- ---
735
-
736
- ## 💡 Important Notes
737
-
738
- - ✅ **Fully synchronous:** The API waits for complete processing before responding
739
- - ✅ **No polling needed:** Single request returns final results
740
- - ✅ **Auto-chunking:** Large files are automatically split and processed
741
- - ✅ **Reliable:** Connection remains open until processing completes
742
- - ⚠️ **Timeout considerations:** Ensure your HTTP client has sufficient timeout settings (recommended: 300 seconds)
743
- - 🔒 **Security:** Always use HTTPS in production environments
744
-
745
- ---
746
-
747
- ## 🆘 Troubleshooting
748
-
749
- **Problem:** 401 Unauthorized
750
- **Solution:** Check that your password is correct
751
-
752
- **Problem:** 400 Bad Request
753
- **Solution:** Verify that `file_data` is in correct data URL format
754
-
755
- **Problem:** 500 Internal Server Error
756
- **Solution:** Check server logs for details; ensure audio file is valid
757
-
758
- **Problem:** Request timeout
759
- **Solution:** Increase HTTP client timeout setting or split audio into smaller files
760
-
761
- **Problem:** Base64 encoding issues
762
- **Solution:** Ensure proper encoding and data URL format: `data:audio/m4a;base64,[encoded_data]`
763
-
764
- ---
765
-
766
- ## 📊 Example Response Times
767
-
768
- | File Duration | Approximate Processing Time |
769
- |---------------|---------------------------|
770
- | 0-30 seconds | 5-10 seconds |
771
- | 30-60 seconds | 10-15 seconds |
772
- | 1-3 minutes | 15-30 seconds |
773
- | 3-5 minutes | 30-60 seconds |
774
- | 5-10 minutes | 1-2 minutes |
775
-
776
- *Note: Processing time includes transcription, language conversion, and AI summarization.*
777
-
778
- ---
779
-
780
- ## 🔗 Integration Examples
781
-
782
- ### Zapier Integration
783
- 1. Trigger: New file in storage
784
- 2. Action: Webhooks by Zapier (POST request)
785
- 3. Configure endpoint with password and base64 encoded file
786
-
787
- ### Make.com Integration
788
- 1. Add HTTP module
789
- 2. Configure POST request with JSON payload
790
- 3. Parse response and route to desired action
791
-
792
- ### iOS Shortcuts Tips
793
- - Use "Get Contents of URL" action
794
- - Set request timeout to at least 120 seconds
795
- - Add error handling for network issues
796
- - Consider showing progress notification
797
-
798
- ---
799
-
800
- ## 📞 Support & Resources
801
-
802
- For additional help or feature requests, please contact your service administrator.
803
-
804
- **Useful Links:**
805
- - OpenAI Whisper Documentation
806
- - Base64 Encoding Tools
807
- - iPhone Shortcuts Gallery
 
165
  """網頁界面的轉錄處理函式"""
166
  print(f"\n🌐 [UI] 網頁版請求")
167
  if not password or password.strip() != PASSWORD:
168
+ return "❌ Password incorrect", "", ""
169
  if not file:
170
+ return "⚠️ No file uploaded", "", ""
171
  try:
 
 
 
172
  path = _extract_effective_path(file)
 
 
 
 
 
 
 
173
  text, summary = transcribe_core(path)
174
+ return "✅ Transcription completed", text, summary
 
 
 
 
 
 
 
175
  except Exception as e:
176
  import traceback
177
+ print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}")
178
+ return f"❌ Error: {e}", "", ""
 
179
 
180
  # ====== 建立 FastAPI 應用 ======
181
  fastapi_app = FastAPI()
 
262
 
263
  # ====== 自定義 CSS ======
264
  custom_css = """
 
265
  .gradio-container {
266
+ max-width: 1200px !important;
267
  margin: auto !important;
268
  }
269
 
270
+ /* 主標題 */
271
+ .main-header {
272
  text-align: center;
273
+ padding: 2.5rem 1rem;
274
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
275
+ border-radius: 12px;
276
  margin-bottom: 2rem;
277
  color: white;
278
  }
279
 
280
+ .main-header h1 {
281
+ font-size: 2.2rem;
282
+ margin: 0 0 0.5rem 0;
283
  font-weight: 700;
284
  }
285
 
286
+ .main-header p {
287
+ font-size: 1rem;
288
+ margin: 0;
289
+ opacity: 0.95;
 
 
 
 
 
 
 
 
290
  }
291
 
292
+ /* 按鈕 */
293
+ .primary-btn {
294
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
295
  border: none !important;
296
  color: white !important;
297
  font-weight: 600 !important;
298
+ font-size: 1.05rem !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  }
300
 
301
+ /* 文字框 */
 
 
 
 
 
302
  textarea {
 
 
303
  font-size: 0.95rem !important;
304
  line-height: 1.6 !important;
305
  }
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  /* 資訊卡片 */
308
+ .info-box {
309
  background: #f0f9ff;
310
  border-left: 4px solid #3b82f6;
311
  padding: 1rem;
312
+ border-radius: 6px;
313
  margin: 1rem 0;
314
+ font-size: 0.9rem;
315
  }
316
 
317
+ /* 程式碼 */
 
 
 
 
 
 
 
 
 
 
 
318
  pre {
319
  background: #1f2937 !important;
320
  color: #f3f4f6 !important;
321
  padding: 1rem !important;
322
+ border-radius: 6px !important;
323
+ font-size: 0.85rem !important;
 
324
  }
325
 
326
  code {
327
+ background: #e5e7eb !important;
328
+ color: #1f2937 !important;
329
  padding: 0.2rem 0.4rem !important;
330
+ border-radius: 3px !important;
331
+ font-size: 0.9rem !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  }
333
  """
334
 
335
  # ====== 建立 Gradio 介面 ======
336
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription") as demo:
337
 
338
+ # 標題
339
  gr.HTML("""
340
+ <div class="main-header">
341
  <h1>🎧 Audio Transcription Service</h1>
342
+ <p>AI-Powered Speech-to-Text with Summarization</p>
343
  </div>
344
  """)
345
 
346
+ with gr.Tabs():
347
+ # ====== Tab 1: Upload ======
348
+ with gr.Tab("🌐 Web Upload"):
 
 
349
  with gr.Row():
 
350
  with gr.Column(scale=1):
351
+ pw = gr.Textbox(label="Password", type="password", placeholder="Enter password")
352
+ audio_file = gr.File(label="Audio File", file_types=["audio", ".mp4"])
353
+ submit_btn = gr.Button("🚀 Start Transcription", variant="primary", elem_classes="primary-btn")
 
 
 
 
 
354
 
355
+ gr.HTML("""
356
+ <div class="info-box">
357
+ <strong>Supported:</strong> MP3, M4A, WAV, OGG, WEBM, MP4<br>
358
+ <strong>Max Size:</strong> Auto-split for large files
359
+ </div>
 
 
 
 
 
 
 
 
360
  """)
 
 
 
 
 
 
 
 
361
 
 
362
  with gr.Column(scale=2):
363
+ status = gr.Textbox(label="Status", interactive=False)
364
+ transcription = gr.Textbox(label="Transcription", lines=12, show_copy_button=True)
365
+ summary = gr.Textbox(label="Summary", lines=5, show_copy_button=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
+ submit_btn.click(transcribe_ui, [pw, audio_file], [status, transcription, summary])
 
 
 
 
 
368
 
369
+ # ====== Tab 2: API ======
370
+ with gr.Tab("📱 API Documentation"):
371
  gr.Markdown("""
372
+ ## API Endpoint
373
+
374
+ **URL:** `/api/transcribe` (POST)
375
+ **Type:** Synchronous - returns complete results in one request
376
+
377
+ ### Request Format
378
+
379
+ ```json
380
+ {
381
+ "password": "your_password",
382
+ "file_data": "data:audio/m4a;base64,UklGR...",
383
+ "file_name": "recording.m4a"
384
+ }
385
+ ```
386
+
387
+ ### Response Format
388
+
389
+ ```json
390
+ {
391
+ "status": "success",
392
+ "transcription": "Full transcription text...",
393
+ "summary": "AI-generated summary..."
394
+ }
395
+ ```
396
+
397
+ ---
398
+
399
+ ## iPhone Shortcuts Setup
400
+
401
+ 1. **Get File** → Audio recording
402
+ 2. **Base64 Encode** → File content
403
+ 3. **Text** → Create data URL:
404
+ ```
405
+ data:audio/m4a;base64,[Base64 Result]
406
+ ```
407
+ 4. **Dictionary** → Request body:
408
+ - `password`: `chou`
409
+ - `file_data`: [Text from step 3]
410
+ - `file_name`: `recording.m4a`
411
+ 5. **Get Contents of URL**:
412
+ - URL: `https://your-domain.com/api/transcribe`
413
+ - Method: `POST`
414
+ - Headers: `Content-Type: application/json`
415
+ - Body: [Dictionary], Type: `JSON`
416
+ 6. **Get Dictionary Value**:
417
+ - `transcription` → Full text
418
+ - `summary` → Summary
419
+
420
+ ---
421
+
422
+ ## Testing with cURL
423
+
424
+ ```bash
425
+ curl -X POST https://your-domain.com/api/transcribe \\
426
+ -H "Content-Type: application/json" \\
427
+ -d '{
428
+ "password": "chou",
429
+ "file_data": "data:audio/m4a;base64,AAAA...",
430
+ "file_name": "test.m4a"
431
+ }'
432
+ ```
433
+
434
+ ---
435
+
436
+ ## Technical Details
437
+
438
+ - **Transcription:** OpenAI Whisper (high accuracy)
439
+ - **Summarization:** GPT-4o-mini
440
+ - **Output:** Traditional Chinese (Taiwan)
441
+ - **Processing:** Fully synchronous, no polling needed
442
+ - **File Handling:** Auto-split for files > 25MB
443
+
444
+ ---
445
+
446
+ ## Error Codes
447
+
448
+ - `401` - Incorrect password
449
+ - `400` - Invalid file format
450
+ - `500` - Processing error
451
+
452
+ For support, contact your administrator.
453
  """)
454
+
455
+ # 頁腳
456
+ gr.HTML("""
457
+ <div style="text-align: center; margin-top: 2rem; padding: 1.5rem; background: #f9fafb; border-radius: 8px;">
458
+ <p style="color: #6b7280; font-size: 0.9rem; margin: 0;">
459
+ Audio Transcription Service v2.0 | Powered by OpenAI
460
+ </p>
461
+ </div>
462
+ """)
463
+
464
+ # ====== 掛載 Gradio 到 FastAPI ======
465
+ app = gr.mount_gradio_app(fastapi_app, demo, path="/")
466
+
467
+ # ====== 啟動 ======
468
+ if __name__ == "__main__":
469
+ print("\n" + "="*60)
470
+ print("🚀 啟動 FastAPI + Gradio 應用")
471
+ print("📱 同步 API: /api/transcribe")
472
+ print("🌐 網頁介面: /")
473
+ print("="*60 + "\n")
474
+ import uvicorn
475
+ uvicorn.run(app, host="0.0.0.0", port=7860)