File size: 12,478 Bytes
ec64510
e7d0f5a
 
 
c76e92c
 
 
e7d0f5a
70cf2d7
d339fc0
7ecf1a3
 
 
c69672a
 
70cf2d7
89d8668
7ecf1a3
70cf2d7
6c57120
70cf2d7
c8ee0eb
 
 
70cf2d7
 
 
 
35b1d50
70cf2d7
 
 
6c57120
70cf2d7
 
 
 
 
 
89d8668
70cf2d7
a433c1d
6ddb6b7
b1c58b3
 
 
 
6ddb6b7
d339fc0
70cf2d7
 
b1c58b3
70cf2d7
 
 
 
a433c1d
d339fc0
70cf2d7
6ddb6b7
b1c58b3
d339fc0
20943a5
 
c8ee0eb
b1c58b3
d339fc0
b1c58b3
 
 
 
 
d339fc0
b1c58b3
6ddb6b7
d339fc0
b1c58b3
 
 
 
 
 
d339fc0
b1c58b3
6ddb6b7
d339fc0
6ddb6b7
b1c58b3
 
d339fc0
b1c58b3
6ddb6b7
 
a433c1d
d339fc0
70cf2d7
c69672a
35b1d50
e7d0f5a
70cf2d7
 
6c57120
e7d0f5a
 
c69672a
70cf2d7
e7d0f5a
 
c69672a
70cf2d7
 
89d8668
c69672a
35b1d50
a433c1d
 
 
 
 
ec64510
d339fc0
e7d0f5a
 
70cf2d7
 
 
c76e92c
 
ec64510
6c57120
c69672a
70cf2d7
ec64510
a433c1d
70cf2d7
 
c8ee0eb
70cf2d7
 
a433c1d
70cf2d7
c8ee0eb
35b1d50
70cf2d7
e7d0f5a
 
c69672a
20943a5
70cf2d7
 
 
 
c8ee0eb
35b1d50
c69672a
e7d0f5a
 
c69672a
20943a5
70cf2d7
 
 
a433c1d
ec64510
a433c1d
d339fc0
a433c1d
 
e7d0f5a
1ebe0ef
6c57120
d339fc0
6ddb6b7
d339fc0
6ddb6b7
 
 
6c57120
 
6ddb6b7
 
f317cf1
6ddb6b7
 
d339fc0
f317cf1
6c57120
 
 
6ddb6b7
f317cf1
6c57120
34eab1f
6c57120
d339fc0
6c57120
d339fc0
6c57120
f317cf1
 
 
6c57120
6ddb6b7
20943a5
6c57120
d339fc0
6c57120
d339fc0
6c57120
d339fc0
f317cf1
6c57120
20943a5
a433c1d
6c57120
d339fc0
f317cf1
e7d0f5a
6c57120
c76e92c
a433c1d
c76e92c
 
 
 
 
 
 
 
 
6c57120
 
678d6a4
c76e92c
6ddb6b7
d339fc0
6ddb6b7
c76e92c
6c57120
c76e92c
 
6ddb6b7
c76e92c
 
 
 
 
d339fc0
c76e92c
 
 
 
6ddb6b7
c76e92c
 
d339fc0
c76e92c
 
6c57120
c76e92c
678d6a4
d339fc0
a433c1d
6c57120
678d6a4
ec64510
 
678d6a4
 
 
 
a433c1d
d339fc0
c76e92c
ec64510
678d6a4
 
a433c1d
d339fc0
c76e92c
 
 
 
678d6a4
d339fc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c57120
d339fc0
 
 
 
 
1ebe0ef
d339fc0
 
0d4c4bd
d339fc0
 
 
 
 
 
6c57120
f317cf1
 
 
 
 
 
 
 
 
 
 
 
d339fc0
 
 
 
 
 
 
 
 
 
 
 
 
 
6c57120
d339fc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c57120
d339fc0
6ddb6b7
 
bc06406
6ddb6b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc06406
6c57120
dba472f
6ddb6b7
6c57120
 
f317cf1
6c57120
bc06406
6c57120
bc06406
 
 
 
 
6ddb6b7
 
 
bc06406
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
import os, shutil, base64, uuid, mimetypes, json, time
from pydub import AudioSegment
from openai import OpenAI
import gradio as gr
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware

# ====== 基本設定 ======
PASSWORD = os.getenv("APP_PASSWORD")
if not PASSWORD:
    raise ValueError("APP_PASSWORD environment variable is not set!")

MAX_SIZE = 25 * 1024 * 1024
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

print("===== 🚀 啟動中 =====")
print(f"APP_PASSWORD: ✅ 已載入")

# ====== 工具函數 ======
MIME_EXT = {
    "audio/mp4": "m4a", "audio/m4a": "m4a", "audio/aac": "aac",
    "audio/mpeg": "mp3", "audio/wav": "wav", "audio/x-wav": "wav",
    "audio/ogg": "ogg", "audio/webm": "webm", "audio/opus": "opus",
    "video/mp4": "mp4",
}

def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
    """將 data URL 轉換為本地檔案"""
    try:
        header, b64 = data_url.split(",", 1)
    except ValueError:
        raise ValueError("Invalid data URL format")
    mime = header.split(";")[0].split(":", 1)[-1].strip()
    ext = MIME_EXT.get(mime) or (mimetypes.guess_extension(mime) or "m4a").lstrip(".")
    fname = orig_name if (orig_name and "." in orig_name) else f"upload_{uuid.uuid4().hex}.{ext}"
    with open(fname, "wb") as f:
        f.write(base64.b64decode(b64))
    return fname

def _extract_effective_path(file_obj) -> str:
    """從各種格式中提取有效檔案路徑"""
    print(f"[DEBUG] 檔案物件類型: {type(file_obj)}")
    print(f"[DEBUG] 檔案物件內容: {file_obj}")
    
    if file_obj is None:
        raise FileNotFoundError("File object is None")
    
    # 字串路徑
    if isinstance(file_obj, str):
        s = file_obj.strip().strip('"')
        print(f"[DEBUG] 字串路徑: {s}")
        if s.startswith("data:"):
            return _dataurl_to_file(s, None)
        if os.path.isfile(s):
            return s
    
    # 字典格式
    if isinstance(file_obj, dict):
        print(f"[DEBUG] 字典 keys: {list(file_obj.keys())}")
        
        # data URL
        data = file_obj.get("data")
        if isinstance(data, str) and data.startswith("data:"):
            return _dataurl_to_file(data, file_obj.get("orig_name"))
        
        # 路徑
        for key in ["path", "name", "file", "filepath"]:
            p = file_obj.get(key)
            if p and isinstance(p, str):
                p = p.strip().strip('"')
                if os.path.isfile(p):
                    print(f"[DEBUG] 找到路徑 (key={key}): {p}")
                    return p
    
    # 物件屬性
    for attr in ["name", "path", "file", "filepath"]:
        if hasattr(file_obj, attr):
            p = getattr(file_obj, attr, None)
            if p and isinstance(p, str):
                p = p.strip().strip('"')
                if os.path.isfile(p):
                    print(f"[DEBUG] 找到路徑 (attr={attr}): {p}")
                    return p
    
    # 直接轉換
    try:
        path_str = str(file_obj).strip().strip('"')
        if os.path.isfile(path_str):
            print(f"[DEBUG] 直接路徑: {path_str}")
            return path_str
    except:
        pass
    
    raise FileNotFoundError(f"Cannot parse file: {type(file_obj)} - {file_obj}")

def split_audio(path):
    """將音訊檔案分割成多個小於 25MB 的片段"""
    size = os.path.getsize(path)
    if size <= MAX_SIZE:
        return [path]
    
    audio = AudioSegment.from_file(path)
    n = int(size / MAX_SIZE) + 1
    chunk_ms = len(audio) / n
    parts = []
    for i in range(n):
        fn = f"chunk_{i+1}.wav"
        audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
        parts.append(fn)
    return parts

def transcribe_core(path, model="whisper-1"):
    """使用 Whisper 進行語音轉錄,並使用 GPT 進行繁簡轉換和摘要"""
    print(f"\n{'='*60}")
    print(f"[transcribe_core] 開始轉錄: {path}")
    print(f"{'='*60}")
    
    start_time = time.time()
    
    # 處理 MP4
    if path.lower().endswith(".mp4"):
        fixed = path[:-4] + ".m4a"
        try:
            shutil.copy(path, fixed)
            path = fixed
        except:
            pass
    
    # 分割並轉錄
    chunks = split_audio(path)
    raw = []
    for i, c in enumerate(chunks, 1):
        print(f"[transcribe_core] 轉錄片段 {i}/{len(chunks)}")
        with open(c, "rb") as af:
            txt = client.audio.transcriptions.create(
                model=model, file=af, response_format="text"
            )
            raw.append(txt)
    
    raw_txt = "\n".join(raw)
    
    # 簡轉繁
    conv = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role":"system","content":"你是嚴格的繁體中文轉換器"},
            {"role":"user","content":f"將以下內容轉為台灣繁體,不意譯:\n{raw_txt}"}
        ],
        temperature=0.0
    )
    trad = conv.choices[0].message.content.strip()
    
    # AI 摘要
    summ = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role":"system","content":"你是繁體摘要助手"},
            {"role":"user","content":f"請用台灣繁體中文摘要;內容多則條列重點,內容短則一句話:\n{trad}"}
        ],
        temperature=0.2
    )
    summary = summ.choices[0].message.content.strip()
    
    total_time = time.time() - start_time
    print(f"[transcribe_core] ✅ 完成! 耗時: {total_time:.1f}秒\n")
    
    return trad, summary

# ====== Gradio UI 函式 ======
def transcribe_web(password, audio_file):
    """網頁版轉錄處理"""
    print(f"\n{'='*60}")
    print(f"🌐 [WEB] 收到請求")
    print(f"密碼: {'已提供' if password else '未提供'}")
    print(f"檔案: {audio_file}")
    print(f"{'='*60}")
    
    # 驗證密碼
    if not password:
        print("[WEB] ❌ 密碼為空")
        return "", "❌ Please enter password", "", ""
    
    if password.strip() != PASSWORD:
        print(f"[WEB] ❌ 密碼錯誤")
        return "", "❌ Incorrect password", "", ""
    
    # 檢查檔案
    if not audio_file:
        print("[WEB] ❌ 未上傳檔案")
        return "", "⚠️ Please upload audio file", "", ""
    
    try:
        # 處理檔案
        print(f"[WEB] 處理檔案...")
        path = _extract_effective_path(audio_file)
        print(f"[WEB] ✅ 檔案: {path}")
        
        # 顯示檔案名稱
        file_name = os.path.basename(path)
        
        # 轉錄
        print(f"[WEB] 開始轉錄...")
        text, summary = transcribe_core(path)
        
        # 統計
        char_count = len(text)
        status = f"✅ Completed! ({char_count} chars)"
        
        print(f"[WEB] ✅ 成功\n")
        return file_name, status, text, summary
        
    except Exception as e:
        import traceback
        error_msg = traceback.format_exc()
        print(f"❌ [WEB] 錯誤:\n{error_msg}\n")
        return "", f"❌ Error: {str(e)}", "", ""

# ====== FastAPI 應用 ======
fastapi_app = FastAPI()

fastapi_app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@fastapi_app.post("/api/transcribe")
async def api_transcribe(request: Request):
    """API 端點 - 用於手機等外部調用"""
    try:
        body = await request.json()
        print(f"\n{'='*60}")
        print(f"📱 [API] 收到請求")
        print(f"{'='*60}")
        
        # 驗證密碼
        password = body.get("password", "")
        if password.strip() != PASSWORD:
            print(f"[API] ❌ 密碼錯誤")
            return JSONResponse(
                status_code=401,
                content={"status": "error", "error": "Password incorrect"}
            )
        
        # 檢查檔案
        file_data = body.get("file_data", "")
        file_name = body.get("file_name", "recording.m4a")
        
        if not file_data or not file_data.startswith("data:"):
            print(f"[API] ❌ 檔案格式錯誤")
            return JSONResponse(
                status_code=400,
                content={"status": "error", "error": "Invalid file format"}
            )
        
        # 處理檔案
        file_dict = {"data": file_data, "orig_name": file_name}
        path = _extract_effective_path(file_dict)
        print(f"[API] ✅ 檔案: {path}")
        
        # 轉錄
        text, summary = transcribe_core(path)
        
        result = {
            "status": "success",
            "transcription": text,
            "summary": summary
        }
        
        print(f"[API] ✅ 成功\n")
        return JSONResponse(content=result)
        
    except Exception as e:
        import traceback
        error_trace = traceback.format_exc()
        print(f"❌ [API] 錯誤:\n{error_trace}\n")
        return JSONResponse(
            status_code=500,
            content={"status": "error", "error": str(e)}
        )

# ====== 手機優化 CSS ======
mobile_css = """
/* 基本響應式 */
.gradio-container {
    max-width: 100% !important;
    padding: 0.5rem !important;
}

/* 手機優化 */
@media (max-width: 768px) {
    .gradio-container {
        padding: 0.25rem !important;
    }
    
    /* 標題縮小 */
    h1 {
        font-size: 1.5rem !important;
    }
    
    h3 {
        font-size: 1.1rem !important;
    }
    
    /* 按鈕加大點擊區域 */
    button {
        min-height: 44px !important;
        font-size: 1rem !important;
    }
    
    /* 輸入框 */
    input, textarea {
        font-size: 16px !important; /* 防止手機自動縮放 */
    }
    
    /* 行布局改為列布局 */
    .row {
        flex-direction: column !important;
    }
    
    .column {
        width: 100% !important;
        max-width: 100% !important;
    }
}

/* 確保文字可選取和複製 */
textarea {
    user-select: text !important;
    -webkit-user-select: text !important;
}
"""

# ====== Gradio 介面 ======
with gr.Blocks(
    title="Audio Transcription",
    theme=gr.themes.Soft(),
    css=mobile_css
) as demo:
    
    gr.Markdown("# 🎧 Audio Transcription")
    gr.Markdown("AI-Powered Speech-to-Text")
    
    # 密碼輸入
    password_input = gr.Textbox(
        label="Password",
        type="password",
        placeholder="Enter password"
    )
    
    # 檔案上傳 - 使用最基本的 UploadButton
    audio_input = gr.UploadButton(
        label="📁 Choose Audio File",
        file_types=["audio/*", ".mp3", ".m4a", ".wav", ".ogg", ".webm", ".mp4"],
        file_count="single"
    )
    
    # 顯示已選擇的檔案
    file_display = gr.Textbox(
        label="Selected File",
        interactive=False,
        placeholder="No file selected"
    )
    
    # 提交按鈕
    submit_btn = gr.Button(
        "🚀 Start Transcription",
        variant="primary",
        size="lg"
    )
    
    # 狀態顯示
    status_output = gr.Textbox(
        label="Status",
        interactive=False
    )
    
    # 轉錄結果
    transcription_output = gr.Textbox(
        label="Transcription",
        lines=10,
        max_lines=20
    )
    
    # 摘要
    summary_output = gr.Textbox(
        label="Summary",
        lines=5,
        max_lines=10
    )
    
    gr.Markdown("---")
    gr.Markdown("""
    ### 📱 API Integration
    
    **Endpoint:** `POST /api/transcribe`
    
    **Request:**
    ```json
    {
      "password": "your_password",
      "file_data": "data:audio/m4a;base64,...",
      "file_name": "recording.m4a"
    }
    ```
    
    **Response:**
    ```json
    {
      "status": "success",
      "transcription": "...",
      "summary": "..."
    }
    ```
    """)
    
    # 事件綁定 - 只在點擊提交按鈕時處理
    submit_btn.click(
        fn=transcribe_web,
        inputs=[password_input, audio_input],
        outputs=[file_display, status_output, transcription_output, summary_output]
    )

# ====== 掛載到 FastAPI ======
app = gr.mount_gradio_app(fastapi_app, demo, path="/")

# ====== 啟動 ======
if __name__ == "__main__":
    print("\n" + "="*60)
    print("🚀 服務啟動")
    print("🌐 網頁: http://0.0.0.0:7860")
    print("📱 API: http://0.0.0.0:7860/api/transcribe")
    print("="*60 + "\n")
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)