Spaces:

MichaelChou0806
/

LINE_audio_transcript

Running

App Files Files Community

MichaelChou0806 commited on Oct 8, 2025

Commit

bcbcc58

verified ·

1 Parent(s): e7d0f5a

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -59

app.py CHANGED Viewed

@@ -1,28 +1,35 @@
 import os
 import time
 import shutil
 from pydub import AudioSegment
 from openai import OpenAI
 import gradio as gr
-from fastapi import FastAPI, UploadFile, File, Form
-from threading import Thread
-import uvicorn
-# ======================================================
-# 🔐 設定區
-# ======================================================
 PASSWORD = os.getenv("APP_PASSWORD", "chou")
-MAX_SIZE = 25 * 1024 * 1024
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 print("===== 🚀 啟動中 =====")
 print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
 print(f"目前密碼內容：{PASSWORD}")
-# ======================================================
-# 🎧 音訊轉錄核心
-# ======================================================
-def split_audio_if_needed(path):
     size = os.path.getsize(path)
     if size <= MAX_SIZE:
         return [path]
@@ -36,41 +43,51 @@ def split_audio_if_needed(path):
         parts.append(fn)
     return parts
-def transcribe_core(path, model="whisper-1"):
     if path.lower().endswith(".mp4"):
         fixed = path[:-4] + ".m4a"
         try:
             shutil.copy(path, fixed)
             path = fixed
         except Exception as e:
-            print(f"⚠️ mp4→m4a 轉檔失敗：{e}")
     chunks = split_audio_if_needed(path)
-    txts = []
     for f in chunks:
         with open(f, "rb") as af:
-            res = client.audio.transcriptions.create(model=model, file=af, response_format="text")
-            txts.append(res)
-    full_raw = "\n".join(txts)
     conv_prompt = (
         "請將以下內容完整轉換為「繁體中文（台灣用語）」：\n"
-        "規則：1) 僅做簡→繁字形轉換；2) 不要意譯或改寫；3) 不要添加任何前後綴。\n-----\n" + full_raw
     )
-    trad = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
-            {"role": "system", "content": "你是嚴格的繁體中文轉換器。"},
             {"role": "user", "content": conv_prompt}
         ],
         temperature=0.0,
     ).choices[0].message.content.strip()
     sum_prompt = (
         "請用台灣繁體中文撰寫摘要。若內容資訊多，可條列出重點；"
-        "若內容簡短，請用一句話概述即可。\n\n" + trad
     )
-    summ = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
             {"role": "system", "content": "你是一位精準且嚴格使用台灣繁體中文的摘要助手。"},
@@ -79,53 +96,120 @@ def transcribe_core(path, model="whisper-1"):
         temperature=0.2,
     ).choices[0].message.content.strip()
-    return trad, summ
-# ======================================================
-# 🌐 FastAPI for 捷徑
-# ======================================================
-api_app = FastAPI()
-@api_app.post("/api/transcribe")
-async def api_transcribe(file: UploadFile = File(...), token: str = Form(...)):
     if token != PASSWORD:
-        return {"error": "Invalid token"}
-    temp = file.filename
-    with open(temp, "wb") as f:
-        f.write(await file.read())
-    text, summary = transcribe_core(temp)
-    os.remove(temp)
-    return {"text": text, "summary": summary}
-# ======================================================
-# 💬 Gradio 介面
-# ======================================================
-def transcribe_with_password(password, file):
     if password.strip() != PASSWORD:
-        return "❌ 密碼錯誤", "", ""
     if not file:
-        return "⚠️ 未選擇檔案", "", ""
-    text, summary = transcribe_core(file.name)
-    return "✅ 完成", text, summary
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🎧 LINE 語音轉錄與摘要（支援 .m4a / .mp4）")
-    pw = gr.Textbox(label="輸入密碼", type="password")
-    f = gr.File(label="上傳音訊檔")
     run = gr.Button("開始轉錄 🚀")
     s = gr.Textbox(label="狀態", interactive=False)
-    t = gr.Textbox(label="轉錄結果", lines=10)
     su = gr.Textbox(label="AI 摘要", lines=8)
-    run.click(transcribe_with_password, [pw, f], [s, t, su])
-# ======================================================
-# 🚀 啟動
-# ======================================================
-def run_api():
-    uvicorn.run(api_app, host="0.0.0.0", port=7861)
-Thread(target=run_api, daemon=True).start()
-app = demo  # ✅ Hugging Face 主入口使用 Gradio
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
 import time
 import shutil
+import tempfile
+from typing import Tuple
 from pydub import AudioSegment
 from openai import OpenAI
 import gradio as gr
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+# ========================
+# 🔐 設定
+# ========================
 PASSWORD = os.getenv("APP_PASSWORD", "chou")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+MAX_SIZE = 25 * 1024 * 1024  # 25MB
+if not OPENAI_API_KEY:
+    raise RuntimeError("OPENAI_API_KEY 未設定（請到 HF 的 Secrets 設定）")
+client = OpenAI(api_key=OPENAI_API_KEY)
 print("===== 🚀 啟動中 =====")
 print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
 print(f"目前密碼內容：{PASSWORD}")
+# ========================
+# 🎧 轉錄核心
+# ========================
+def split_audio_if_needed(path: str) -> list:
     size = os.path.getsize(path)
     if size <= MAX_SIZE:
         return [path]
         parts.append(fn)
     return parts
+def transcribe_core(path: str, model: str = "whisper-1") -> Tuple[str, str]:
+    # iPhone LINE 部分 mp4 其實是 audio-only，這裡只改副檔名避免 MIME 阻擋
     if path.lower().endswith(".mp4"):
         fixed = path[:-4] + ".m4a"
         try:
             shutil.copy(path, fixed)
             path = fixed
+            print("🔧 已自動修正 mp4 → m4a")
         except Exception as e:
+            print(f"⚠️ mp4→m4a 複製失敗：{e}")
+    # 1) Whisper 逐段轉錄（原始：可能有簡體）
     chunks = split_audio_if_needed(path)
+    raw_parts = []
     for f in chunks:
         with open(f, "rb") as af:
+            res = client.audio.transcriptions.create(
+                model=model,
+                file=af,
+                response_format="text"
+            )
+            raw_parts.append(res)
+    full_raw = "\n".join(raw_parts)
+    # 2) 僅簡→繁（不意譯）
     conv_prompt = (
         "請將以下內容完整轉換為「繁體中文（台灣用語）」：\n"
+        "規則：1) 僅做簡→繁字形轉換；2) 不要意譯或改寫；3) 不要添加任何前後綴。\n"
+        "-----\n" + full_raw
     )
+    full_trad = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
+            {"role": "system", "content": "你是嚴格的繁體中文轉換器，只進行字形轉換。"},
             {"role": "user", "content": conv_prompt}
         ],
         temperature=0.0,
     ).choices[0].message.content.strip()
+    # 3) 摘要（長就條列、短就一句話）
     sum_prompt = (
         "請用台灣繁體中文撰寫摘要。若內容資訊多，可條列出重點；"
+        "若內容簡短，請用一句話概述即可。\n\n" + full_trad
     )
+    summary = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
             {"role": "system", "content": "你是一位精準且嚴格使用台灣繁體中文的摘要助手。"},
         temperature=0.2,
     ).choices[0].message.content.strip()
+    return full_trad, summary
+# ========================
+# 🌐 FastAPI 主應用
+# ========================
+app = FastAPI(title="LINE Transcription (Gradio + API)")
+@app.get("/health")
+def health():
+    return {"status": "ok", "time": int(time.time())}
+@app.post("/api/transcribe")
+async def api_transcribe(
+    file: UploadFile = File(...),
+    token: str = Form(...),
+    model: str = Form("whisper-1")
+):
     if token != PASSWORD:
+        raise HTTPException(status_code=403, detail="Invalid token")
+    # 以原副檔名建立臨時檔，避免沒有副檔名導致 pydub 判斷錯誤
+    suffix = ""
+    if "." in file.filename:
+        suffix = "." + file.filename.rsplit(".", 1)[-1]
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        tmp.write(await file.read())
+        tmp_path = tmp.name
+    try:
+        text, summary = transcribe_core(tmp_path, model=model)
+        return {"text": text, "summary": summary}
+    finally:
+        try:
+            os.remove(tmp_path)
+        except Exception:
+            pass
+# ========================
+# 💬 Gradio UI（掛在 /）
+# ========================
+def transcribe_with_password(password, file, model_choice, question):
     if password.strip() != PASSWORD:
+        return "❌ 密碼錯誤", "", "", ""
     if not file:
+        return "⚠️ 未選擇檔案", "", "", ""
+    text, summary = transcribe_core(file.name, model=model_choice)
+    # 「進一步問 AI」：若使用者有填問題，就用轉錄全文回答
+    followup = ""
+    if question and question.strip():
+        prompt = (
+            "以下是逐字轉錄內容，請用台灣繁體中文回答我的問題：\n\n"
+            f"【逐字稿】\n{text}\n\n"
+            f"【問題】\n{question.strip()}"
+        )
+        followup = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.6,
+        ).choices[0].message.content.strip()
+    return "✅ 完成", text, summary, followup
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🎧 LINE 語音轉錄與摘要工具（支援 .m4a / .mp4｜API + UI）")
+    with gr.Row():
+        pw = gr.Textbox(label="輸入密碼", type="password", placeholder="請輸入英文數字")
+        model_dd = gr.Dropdown(
+            ["whisper-1", "gpt-4o-mini-transcribe"],
+            value="whisper-1",
+            label="選擇模型"
+        )
+    file_u = gr.File(label="上傳音訊檔（.m4a/.mp3/.wav/.mp4）")
     run = gr.Button("開始轉錄 🚀")
     s = gr.Textbox(label="狀態", interactive=False)
+    t = gr.Textbox(label="轉錄結果（已轉繁體）", lines=10)
     su = gr.Textbox(label="AI 摘要", lines=8)
+    with gr.Accordion("💬 進一步問 AI（針對上述逐字稿）", open=False):
+        q = gr.Textbox(label="輸入問題", lines=2, placeholder="例如：幫我整理我該如何回覆對方？")
+        ask = gr.Button("詢問 AI 🤔")
+        ans = gr.Textbox(label="AI 回覆", lines=8)
+    # 複製按鈕
+    copy_js = """
+    async (txt) => {
+        try { await navigator.clipboard.writeText(txt); alert("✅ 已複製到剪貼簿！"); }
+        catch(e){ alert("❌ 複製失敗：" + e); }
+    }
+    """
+    copy_t = gr.Button("📋 複製逐字稿")
+    copy_su = gr.Button("📋 複製摘要")
+    copy_ans = gr.Button("📋 複製 AI 回覆")
+    run.click(transcribe_with_password, [pw, file_u, model_dd, gr.State("")], [s, t, su, ans])
+    ask.click(
+        lambda text, question, pwd, model: transcribe_with_password(pwd, gr.State(None), model, question)[3],
+        [t, q, pw, model_dd],
+        [ans]
+    )
+    copy_t.click(fn=None, inputs=t, outputs=None, js=copy_js)
+    copy_su.click(fn=None, inputs=su, outputs=None, js=copy_js)
+    copy_ans.click(fn=None, inputs=ans, outputs=None, js=copy_js)
+# 把 Gradio 掛在 FastAPI 根路徑（/）
+app = gr.mount_gradio_app(app, demo, path="/")
+# 本地測試才會啟動 uvicorn；在 HF 上不需要
 if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)