Spaces:

MichaelChou0806
/

LINE_audio_transcript

Sleeping

App Files Files Community

MichaelChou0806 commited on Oct 8, 2025

Commit

bc06406

verified ·

1 Parent(s): 35b1d50

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -487

app.py CHANGED Viewed

@@ -165,35 +165,17 @@ def transcribe_ui(password, file):
     """網頁界面的轉錄處理函式"""
     print(f"\n🌐 [UI] 網頁版請求")
     if not password or password.strip() != PASSWORD:
-        return "🔒 Authentication", "❌ Incorrect password. Please check and try again.", "", ""
     if not file:
-        return "⚠️ No File", "Please upload an audio file first.", "", ""
     try:
-        # 更新狀態為處理中
-        yield "⏳ Processing", "🎵 Audio file received, starting transcription...", "", ""
         path = _extract_effective_path(file)
-        # 獲取文件信息
-        file_size = os.path.getsize(path)
-        file_size_mb = file_size / 1024 / 1024
-        yield "🎯 Transcribing", f"📊 File size: {file_size_mb:.2f} MB\n🔄 Processing with Whisper AI...", "", ""
         text, summary = transcribe_core(path)
-        # 計算字數
-        char_count = len(text)
-        word_estimate = char_count // 2  # 中文估算
-        status_msg = f"✅ Transcription Complete\n📝 {char_count} characters ({word_estimate} words approx.)"
-        return "✅ Success", status_msg, text, summary
     except Exception as e:
         import traceback
-        error_trace = traceback.format_exc()
-        print(f"❌ [UI] 錯誤:\n{error_trace}")
-        return "❌ Error", f"An error occurred during processing:\n{str(e)}", "", ""
 # ====== 建立 FastAPI 應用 ======
 fastapi_app = FastAPI()
@@ -280,528 +262,214 @@ async def api_transcribe_sync(request: Request):
 # ====== 自定義 CSS ======
 custom_css = """
-/* 全局樣式 */
 .gradio-container {
-    max-width: 1400px !important;
     margin: auto !important;
 }
-/* 標題區域 */
-.main-title {
     text-align: center;
-    padding: 2rem 0;
     background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    border-radius: 15px;
     margin-bottom: 2rem;
     color: white;
 }
-.main-title h1 {
-    font-size: 2.5rem;
-    margin-bottom: 0.5rem;
     font-weight: 700;
 }
-.main-title p {
-    font-size: 1.1rem;
-    opacity: 0.9;
-}
-/* 卡片樣式 */
-.upload-card, .result-card {
-    background: white;
-    border-radius: 12px;
-    padding: 1.5rem;
-    box-shadow: 0 4px 6px rgba(0,0,0,0.07);
-    margin-bottom: 1.5rem;
 }
-/* 按鈕樣式 */
-.custom-button {
     background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
     border: none !important;
     color: white !important;
     font-weight: 600 !important;
-    padding: 0.75rem 2rem !important;
-    font-size: 1.1rem !important;
-    border-radius: 8px !important;
-    transition: transform 0.2s !important;
-}
-.custom-button:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 6px 12px rgba(102, 126, 234, 0.4) !important;
-}
-/* 狀態標籤 */
-.status-badge {
-    display: inline-block;
-    padding: 0.5rem 1rem;
-    border-radius: 20px;
-    font-weight: 600;
-    margin-bottom: 0.5rem;
 }
-.status-success { background: #10b981; color: white; }
-.status-processing { background: #3b82f6; color: white; }
-.status-error { background: #ef4444; color: white; }
-.status-warning { background: #f59e0b; color: white; }
-/* 文字區域 */
 textarea {
-    border: 2px solid #e5e7eb !important;
-    border-radius: 8px !important;
     font-size: 0.95rem !important;
     line-height: 1.6 !important;
 }
-/* 檔案上傳區域 */
-.file-upload {
-    border: 2px dashed #d1d5db !important;
-    border-radius: 12px !important;
-    padding: 2rem !important;
-    text-align: center !important;
-    transition: all 0.3s !important;
-}
-.file-upload:hover {
-    border-color: #667eea !important;
-    background: #f9fafb !important;
-}
 /* 資訊卡片 */
-.info-card {
     background: #f0f9ff;
     border-left: 4px solid #3b82f6;
     padding: 1rem;
-    border-radius: 8px;
     margin: 1rem 0;
 }
-/* Tab 樣式 */
-.tab-nav button {
-    font-size: 1.05rem !important;
-    font-weight: 600 !important;
-    padding: 0.75rem 1.5rem !important;
-}
-.tab-nav button.selected {
-    border-bottom: 3px solid #667eea !important;
-}
-/* 程式碼區塊 */
 pre {
     background: #1f2937 !important;
     color: #f3f4f6 !important;
     padding: 1rem !important;
-    border-radius: 8px !important;
-    overflow-x: auto !important;
-    font-size: 0.9rem !important;
 }
 code {
-    background: #1f2937 !important;
-    color: #f3f4f6 !important;
     padding: 0.2rem 0.4rem !important;
-    border-radius: 4px !important;
-    font-family: 'Monaco', 'Menlo', monospace !important;
-}
-/* 功能列表 */
-.feature-list {
-    display: grid;
-    grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
-    gap: 1rem;
-    margin: 1.5rem 0;
-}
-.feature-item {
-    background: white;
-    padding: 1.25rem;
-    border-radius: 10px;
-    border: 1px solid #e5e7eb;
-    transition: all 0.3s;
-}
-.feature-item:hover {
-    transform: translateY(-4px);
-    box-shadow: 0 8px 16px rgba(0,0,0,0.1);
-}
-.feature-icon {
-    font-size: 2rem;
-    margin-bottom: 0.5rem;
-}
-/* 響應式設計 */
-@media (max-width: 768px) {
-    .main-title h1 { font-size: 1.8rem; }
-    .main-title p { font-size: 1rem; }
 }
 """
 # ====== 建立 Gradio 介面 ======
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription Service") as demo:
-    # 主標題
     gr.HTML("""
-        <div class="main-title">
             <h1>🎧 Audio Transcription Service</h1>
-            <p>AI-Powered Speech-to-Text with Smart Summarization</p>
         </div>
     """)
-    with gr.Tabs() as tabs:
-        # ====== Tab 1: Web Upload ======
-        with gr.Tab("🌐 Web Interface", id="upload"):
-            gr.Markdown("### Upload and transcribe audio files directly from your browser")
             with gr.Row():
-                # 左側：上傳區域
                 with gr.Column(scale=1):
-                    gr.HTML('<div class="upload-card">')
-                    gr.Markdown("#### 🔐 Authentication")
-                    pw_ui = gr.Textbox(
-                        label="Password",
-                        type="password",
-                        placeholder="Enter your password...",
-                        show_label=False
-                    )
-                    gr.Markdown("#### 📁 Upload Audio File")
-                    file_ui = gr.File(
-                        label="",
-                        file_types=["audio", ".mp4"],
-                        file_count="single",
-                        show_label=False
-                    )
-                    gr.Markdown("""
-                    <div class="info-card">
-                        <strong>💡 Supported Formats:</strong><br>
-                        MP3, M4A, WAV, OGG, WEBM, MP4
-                    </div>
                     """)
-                    btn_ui = gr.Button(
-                        "🚀 Start Transcription",
-                        variant="primary",
-                        size="lg",
-                        elem_classes="custom-button"
-                    )
-                    gr.HTML('</div>')
-                # 右側：結果區域
                 with gr.Column(scale=2):
-                    gr.HTML('<div class="result-card">')
-                    gr.Markdown("#### 📊 Processing Status")
-                    status_label = gr.Textbox(
-                        label="",
-                        value="⏸️ Ready",
-                        interactive=False,
-                        show_label=False,
-                        max_lines=1
-                    )
-                    status_detail = gr.Textbox(
-                        label="",
-                        value="Upload an audio file and click 'Start Transcription' to begin",
-                        interactive=False,
-                        show_label=False,
-                        lines=2
-                    )
-                    gr.Markdown("#### 📝 Transcription Result")
-                    transcript_ui = gr.Textbox(
-                        label="",
-                        lines=12,
-                        placeholder="Transcription will appear here...",
-                        show_label=False,
-                        show_copy_button=True
-                    )
-                    gr.Markdown("#### 💡 AI Summary")
-                    summary_ui = gr.Textbox(
-                        label="",
-                        lines=6,
-                        placeholder="AI-generated summary will appear here...",
-                        show_label=False,
-                        show_copy_button=True
-                    )
-                    gr.HTML('</div>')
-            # 綁定事件
-            btn_ui.click(
-                transcribe_ui,
-                inputs=[pw_ui, file_ui],
-                outputs=[status_label, status_detail, transcript_ui, summary_ui]
-            )
-        # ====== Tab 2: API Documentation ======
-        with gr.Tab("📱 API Documentation", id="api"):
             gr.Markdown("""
-            ## 🚀 API Overview
-            This service provides a **synchronous REST API** for audio transcription, perfect for integration with iPhone Shortcuts, mobile apps, or any HTTP client.
-            """)
-            gr.HTML("""
-            <div class="feature-list">
-                <div class="feature-item">
-                    <div class="feature-icon">⚡</div>
-                    <h3>Fully Synchronous</h3>
-                    <p>Returns complete results in a single request - no polling required</p>
-                </div>
-                <div class="feature-item">
-                    <div class="feature-icon">🔄</div>
-                    <h3>Auto-Processing</h3>
-                    <p>Handles files of any length automatically with intelligent chunking</p>
-                </div>
-                <div class="feature-item">
-                    <div class="feature-icon">🛡️</div>
-                    <h3>Reliable & Stable</h3>
-                    <p>Waits for complete processing before returning results</p>
-                </div>
-                <div class="feature-item">
-                    <div class="feature-icon">🌍</div>
-                    <h3>Universal Access</h3>
-                    <p>Works with any HTTP client or programming language</p>
-                </div>
-            </div>
             """)
-            gr.Markdown("""
-            ---
-            ## 📡 API Endpoint
-            **URL:** `/api/transcribe`
-            **Method:** `POST`
-            **Content-Type:** `application/json`
-            ### Request Format
-            ```json
-            {
-              "password": "your_password_here",
-              "file_data": "data:audio/m4a;base64,UklGRiQAAABXQVZFZm10...",
-              "file_name": "recording.m4a"
-            }
-            ```
-            **Parameters:**
-            - `password` (string, required): Authentication password
-            - `file_data` (string, required): Base64-encoded audio file in data URL format
-            - `file_name` (string, optional): Original filename (default: "recording.m4a")
-            ### Response Format
-            **Success Response (200 OK):**
-            ```json
-            {
-              "status": "success",
-              "transcription": "完整的語音轉文字內容...",
-              "summary": "AI 生成的內容摘要..."
-            }
-            ```
-            **Error Response (401/400/500):**
-            ```json
-            {
-              "status": "error",
-              "error": "Error message description"
-            }
-            ```
-            ---
-            ## 📱 iPhone Shortcuts Setup Guide
-            ### Step-by-Step Configuration:
-            1. **Get File** → Select your audio recording
-            2. **Base64 Encode** → Encode the file content
-            3. **Text** → Create data URL format:
-               ```
-               data:audio/m4a;base64,[Base64 Encode Result]
-               ```
-            4. **Dictionary** → Build request body:
-               - Key: `password`, Value: `chou`
-               - Key: `file_data`, Value: [Text from step 3]
-               - Key: `file_name`, Value: `recording.m4a`
-            5. **Get Contents of URL**:
-               - URL: `https://your-domain.com/api/transcribe`
-               - Method: `POST`
-               - Headers:
-                 - `Content-Type`: `application/json`
-               - Request Body: [Dictionary from step 4]
-               - Request Body Type: `JSON`
-            6. **Get Dictionary Value**:
-               - Key: `transcription` → Get transcription result
-               - Key: `summary` → Get AI summary
-            7. **Show Result** or **Copy to Clipboard**
-            ---
-            ## 🧪 Testing the API
-            ### Using cURL:
-            ```bash
-            curl -X POST https://your-domain.com/api/transcribe \\
-              -H "Content-Type: application/json" \\
-              -d '{
-                "password": "chou",
-                "file_data": "data:audio/m4a;base64,AAAA...",
-                "file_name": "test.m4a"
-              }'
-            ```
-            ### Using Python:
-            ```python
-            import requests
-            import base64
-            # Read and encode audio file
-            with open("audio.m4a", "rb") as f:
-                audio_b64 = base64.b64encode(f.read()).decode()
-            # Prepare request
-            url = "https://your-domain.com/api/transcribe"
-            payload = {
-                "password": "chou",
-                "file_data": f"data:audio/m4a;base64,{audio_b64}",
-                "file_name": "audio.m4a"
-            }
-            # Send request
-            response = requests.post(url, json=payload)
-            result = response.json()
-            if result["status"] == "success":
-                print("Transcription:", result["transcription"])
-                print("Summary:", result["summary"])
-            else:
-                print("Error:", result["error"])
-            ```
-            ### Using JavaScript (Node.js):
-            ```javascript
-            const fs = require('fs');
-            const axios = require('axios');
-            // Read and encode audio file
-            const audioBuffer = fs.readFileSync('audio.m4a');
-            const audioB64 = audioBuffer.toString('base64');
-            // Send request
-            axios.post('https://your-domain.com/api/transcribe', {
-              password: 'chou',
-              file_data: `data:audio/m4a;base64,${audioB64}`,
-              file_name: 'audio.m4a'
-            })
-            .then(response => {
-              const { transcription, summary } = response.data;
-              console.log('Transcription:', transcription);
-              console.log('Summary:', summary);
-            })
-            .catch(error => {
-              console.error('Error:', error.response.data);
-            });
-            ```
-            ---
-            ## ⚙️ Technical Specifications
-            | Feature | Details |
-            |---------|---------|
-            | **Max File Size** | 25 MB per chunk (automatically splits larger files) |
-            | **Supported Formats** | MP3, M4A, MP4, WAV, OGG, WEBM, AAC, OPUS |
-            | **Processing Model** | OpenAI Whisper (high accuracy) |
-            | **Summary Model** | GPT-4o-mini (intelligent summarization) |
-            | **Language Support** | Traditional Chinese (Taiwan) output |
-            | **Response Time** | Varies by file length (typically 5-30 seconds) |
-            | **Authentication** | Password-based security |
-            ---
-            ## 💡 Important Notes
-            - ✅ **Fully synchronous:** The API waits for complete processing before responding
-            - ✅ **No polling needed:** Single request returns final results
-            - ✅ **Auto-chunking:** Large files are automatically split and processed
-            - ✅ **Reliable:** Connection remains open until processing completes
-            - ⚠️ **Timeout considerations:** Ensure your HTTP client has sufficient timeout settings (recommended: 300 seconds)
-            - 🔒 **Security:** Always use HTTPS in production environments
-            ---
-            ## 🆘 Troubleshooting
-            **Problem:** 401 Unauthorized
-            **Solution:** Check that your password is correct
-            **Problem:** 400 Bad Request
-            **Solution:** Verify that `file_data` is in correct data URL format
-            **Problem:** 500 Internal Server Error
-            **Solution:** Check server logs for details; ensure audio file is valid
-            **Problem:** Request timeout
-            **Solution:** Increase HTTP client timeout setting or split audio into smaller files
-            **Problem:** Base64 encoding issues
-            **Solution:** Ensure proper encoding and data URL format: `data:audio/m4a;base64,[encoded_data]`
-            ---
-            ## 📊 Example Response Times
-            | File Duration | Approximate Processing Time |
-            |---------------|---------------------------|
-            | 0-30 seconds | 5-10 seconds |
-            | 30-60 seconds | 10-15 seconds |
-            | 1-3 minutes | 15-30 seconds |
-            | 3-5 minutes | 30-60 seconds |
-            | 5-10 minutes | 1-2 minutes |
-            *Note: Processing time includes transcription, language conversion, and AI summarization.*
-            ---
-            ## 🔗 Integration Examples
-            ### Zapier Integration
-            1. Trigger: New file in storage
-            2. Action: Webhooks by Zapier (POST request)
-            3. Configure endpoint with password and base64 encoded file
-            ### Make.com Integration
-            1. Add HTTP module
-            2. Configure POST request with JSON payload
-            3. Parse response and route to desired action
-            ### iOS Shortcuts Tips
-            - Use "Get Contents of URL" action
-            - Set request timeout to at least 120 seconds
-            - Add error handling for network issues
-            - Consider showing progress notification
-            ---
-            ## 📞 Support & Resources
-            For additional help or feature requests, please contact your service administrator.
-            **Useful Links:**
-            - OpenAI Whisper Documentation
-            - Base64 Encoding Tools
-            - iPhone Shortcuts Gallery

     """網頁界面的轉錄處理函式"""
     print(f"\n🌐 [UI] 網頁版請求")
     if not password or password.strip() != PASSWORD:
+        return "❌ Password incorrect", "", ""
     if not file:
+        return "⚠️ No file uploaded", "", ""
     try:
         path = _extract_effective_path(file)
         text, summary = transcribe_core(path)
+        return "✅ Transcription completed", text, summary
     except Exception as e:
         import traceback
+        print(f"❌ [UI] 錯誤:\n{traceback.format_exc()}")
+        return f"❌ Error: {e}", "", ""
 # ====== 建立 FastAPI 應用 ======
 fastapi_app = FastAPI()
 # ====== 自定義 CSS ======
 custom_css = """
 .gradio-container {
+    max-width: 1200px !important;
     margin: auto !important;
 }
+/* 主標題 */
+.main-header {
     text-align: center;
+    padding: 2.5rem 1rem;
     background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border-radius: 12px;
     margin-bottom: 2rem;
     color: white;
 }
+.main-header h1 {
+    font-size: 2.2rem;
+    margin: 0 0 0.5rem 0;
     font-weight: 700;
 }
+.main-header p {
+    font-size: 1rem;
+    margin: 0;
+    opacity: 0.95;
 }
+/* 按鈕 */
+.primary-btn {
     background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
     border: none !important;
     color: white !important;
     font-weight: 600 !important;
+    font-size: 1.05rem !important;
 }
+/* 文字框 */
 textarea {
     font-size: 0.95rem !important;
     line-height: 1.6 !important;
 }
 /* 資訊卡片 */
+.info-box {
     background: #f0f9ff;
     border-left: 4px solid #3b82f6;
     padding: 1rem;
+    border-radius: 6px;
     margin: 1rem 0;
+    font-size: 0.9rem;
 }
+/* 程式碼 */
 pre {
     background: #1f2937 !important;
     color: #f3f4f6 !important;
     padding: 1rem !important;
+    border-radius: 6px !important;
+    font-size: 0.85rem !important;
 }
 code {
+    background: #e5e7eb !important;
+    color: #1f2937 !important;
     padding: 0.2rem 0.4rem !important;
+    border-radius: 3px !important;
+    font-size: 0.9rem !important;
 }
 """
 # ====== 建立 Gradio 介面 ======
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription") as demo:
+    # 標題
     gr.HTML("""
+        <div class="main-header">
             <h1>🎧 Audio Transcription Service</h1>
+            <p>AI-Powered Speech-to-Text with Summarization</p>
         </div>
     """)
+    with gr.Tabs():
+        # ====== Tab 1: Upload ======
+        with gr.Tab("🌐 Web Upload"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    pw = gr.Textbox(label="Password", type="password", placeholder="Enter password")
+                    audio_file = gr.File(label="Audio File", file_types=["audio", ".mp4"])
+                    submit_btn = gr.Button("🚀 Start Transcription", variant="primary", elem_classes="primary-btn")
+                    gr.HTML("""
+                        <div class="info-box">
+                            <strong>Supported:</strong> MP3, M4A, WAV, OGG, WEBM, MP4<br>
+                            <strong>Max Size:</strong> Auto-split for large files
+                        </div>
                     """)
                 with gr.Column(scale=2):
+                    status = gr.Textbox(label="Status", interactive=False)
+                    transcription = gr.Textbox(label="Transcription", lines=12, show_copy_button=True)
+                    summary = gr.Textbox(label="Summary", lines=5, show_copy_button=True)
+            submit_btn.click(transcribe_ui, [pw, audio_file], [status, transcription, summary])
+        # ====== Tab 2: API ======
+        with gr.Tab("📱 API Documentation"):
             gr.Markdown("""
+## API Endpoint
+**URL:** `/api/transcribe` (POST)
+**Type:** Synchronous - returns complete results in one request
+### Request Format
+```json
+{
+  "password": "your_password",
+  "file_data": "data:audio/m4a;base64,UklGR...",
+  "file_name": "recording.m4a"
+}
+```
+### Response Format
+```json
+{
+  "status": "success",
+  "transcription": "Full transcription text...",
+  "summary": "AI-generated summary..."
+}
+```
+---
+## iPhone Shortcuts Setup
+1. **Get File** → Audio recording
+2. **Base64 Encode** → File content
+3. **Text** → Create data URL:
+   ```
+   data:audio/m4a;base64,[Base64 Result]
+   ```
+4. **Dictionary** → Request body:
+   - `password`: `chou`
+   - `file_data`: [Text from step 3]
+   - `file_name`: `recording.m4a`
+5. **Get Contents of URL**:
+   - URL: `https://your-domain.com/api/transcribe`
+   - Method: `POST`
+   - Headers: `Content-Type: application/json`
+   - Body: [Dictionary], Type: `JSON`
+6. **Get Dictionary Value**:
+   - `transcription` → Full text
+   - `summary` → Summary
+---
+## Testing with cURL
+```bash
+curl -X POST https://your-domain.com/api/transcribe \\
+  -H "Content-Type: application/json" \\
+  -d '{
+    "password": "chou",
+    "file_data": "data:audio/m4a;base64,AAAA...",
+    "file_name": "test.m4a"
+  }'
+```
+---
+## Technical Details
+- **Transcription:** OpenAI Whisper (high accuracy)
+- **Summarization:** GPT-4o-mini
+- **Output:** Traditional Chinese (Taiwan)
+- **Processing:** Fully synchronous, no polling needed
+- **File Handling:** Auto-split for files > 25MB
+---
+## Error Codes
+- `401` - Incorrect password
+- `400` - Invalid file format
+- `500` - Processing error
+For support, contact your administrator.
             """)
+    # 頁腳
+    gr.HTML("""
+        <div style="text-align: center; margin-top: 2rem; padding: 1.5rem; background: #f9fafb; border-radius: 8px;">
+            <p style="color: #6b7280; font-size: 0.9rem; margin: 0;">
+                Audio Transcription Service v2.0 | Powered by OpenAI
+            </p>
+        </div>
+    """)
+# ====== 掛載 Gradio 到 FastAPI ======
+app = gr.mount_gradio_app(fastapi_app, demo, path="/")
+# ====== 啟動 ======
+if __name__ == "__main__":
+    print("\n" + "="*60)
+    print("🚀 啟動 FastAPI + Gradio 應用")
+    print("📱 同步 API: /api/transcribe")
+    print("🌐 網頁介面: /")
+    print("="*60 + "\n")
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)