Update app.py
Browse files
app.py
CHANGED
|
@@ -89,7 +89,7 @@ def split_audio_if_needed(path):
|
|
| 89 |
return files
|
| 90 |
|
| 91 |
def transcribe_core(path, model):
|
| 92 |
-
# 1
|
| 93 |
if path and path.lower().endswith(".mp4"):
|
| 94 |
fixed_path = path[:-4] + ".m4a"
|
| 95 |
try:
|
|
@@ -99,20 +99,20 @@ def transcribe_core(path, model):
|
|
| 99 |
except Exception as e:
|
| 100 |
print(f"⚠️ mp4→m4a 複製失敗:{e},改用原檔嘗試")
|
| 101 |
|
| 102 |
-
# 2
|
| 103 |
chunks = split_audio_if_needed(path)
|
| 104 |
raw_parts = []
|
| 105 |
for f in chunks:
|
| 106 |
with open(f, "rb") as af:
|
| 107 |
res = client.audio.transcriptions.create(
|
| 108 |
-
model=model,
|
| 109 |
file=af,
|
| 110 |
response_format="text"
|
| 111 |
)
|
| 112 |
raw_parts.append(res)
|
| 113 |
-
full_raw = "\n".join(raw_parts)
|
| 114 |
|
| 115 |
-
# 3
|
| 116 |
conv_prompt = (
|
| 117 |
"請將以下內容完整轉換為「繁體中文(台灣用語)」:\n"
|
| 118 |
"規則:1) 僅做簡→繁字形轉換;2) 不要意譯或改寫;3) 不要添加任何前後綴。\n"
|
|
@@ -126,29 +126,28 @@ def transcribe_core(path, model):
|
|
| 126 |
],
|
| 127 |
temperature=0.0,
|
| 128 |
)
|
| 129 |
-
full_trad = trad_resp.choices[0].message.content.strip()
|
| 130 |
|
| 131 |
-
# 4
|
| 132 |
sum_prompt = (
|
| 133 |
-
"
|
| 134 |
-
|
|
|
|
| 135 |
)
|
| 136 |
sum_resp = client.chat.completions.create(
|
| 137 |
model="gpt-4o-mini",
|
| 138 |
messages=[
|
| 139 |
-
{"role": "system", "content": "
|
| 140 |
{"role": "user", "content": sum_prompt}
|
| 141 |
],
|
| 142 |
temperature=0.2,
|
| 143 |
)
|
| 144 |
summ = sum_resp.choices[0].message.content.strip()
|
| 145 |
|
| 146 |
-
# 依原介面回傳兩個值:完整逐字稿(已繁體)、摘要
|
| 147 |
return full_trad, summ
|
| 148 |
|
| 149 |
-
|
| 150 |
# ======================================================
|
| 151 |
-
# 🌐 FastAPI API
|
| 152 |
# ======================================================
|
| 153 |
app = FastAPI(title="LINE Transcription API")
|
| 154 |
|
|
@@ -175,7 +174,7 @@ def health():
|
|
| 175 |
return {"status": "ok", "time": int(time.time())}
|
| 176 |
|
| 177 |
# ======================================================
|
| 178 |
-
# 💬 Gradio
|
| 179 |
# ======================================================
|
| 180 |
def _normalize_upload_path(file_input):
|
| 181 |
if not file_input:
|
|
@@ -254,10 +253,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 254 |
copy_reply.click(fn=None, inputs=ai_reply, outputs=None, js=copy_js)
|
| 255 |
|
| 256 |
# ======================================================
|
| 257 |
-
# 🚀
|
| 258 |
# ======================================================
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
| 262 |
import uvicorn
|
| 263 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 89 |
return files
|
| 90 |
|
| 91 |
def transcribe_core(path, model):
|
| 92 |
+
# 1️⃣ 修正 LINE 語音 mp4 假副檔名
|
| 93 |
if path and path.lower().endswith(".mp4"):
|
| 94 |
fixed_path = path[:-4] + ".m4a"
|
| 95 |
try:
|
|
|
|
| 99 |
except Exception as e:
|
| 100 |
print(f"⚠️ mp4→m4a 複製失敗:{e},改用原檔嘗試")
|
| 101 |
|
| 102 |
+
# 2️⃣ Whisper 轉錄
|
| 103 |
chunks = split_audio_if_needed(path)
|
| 104 |
raw_parts = []
|
| 105 |
for f in chunks:
|
| 106 |
with open(f, "rb") as af:
|
| 107 |
res = client.audio.transcriptions.create(
|
| 108 |
+
model=model,
|
| 109 |
file=af,
|
| 110 |
response_format="text"
|
| 111 |
)
|
| 112 |
raw_parts.append(res)
|
| 113 |
+
full_raw = "\n".join(raw_parts)
|
| 114 |
|
| 115 |
+
# 3️⃣ 簡轉繁(不改寫內容)
|
| 116 |
conv_prompt = (
|
| 117 |
"請將以下內容完整轉換為「繁體中文(台灣用語)」:\n"
|
| 118 |
"規則:1) 僅做簡→繁字形轉換;2) 不要意譯或改寫;3) 不要添加任何前後綴。\n"
|
|
|
|
| 126 |
],
|
| 127 |
temperature=0.0,
|
| 128 |
)
|
| 129 |
+
full_trad = trad_resp.choices[0].message.content.strip()
|
| 130 |
|
| 131 |
+
# 4️⃣ 生成繁體摘要(自動決定條列與否)
|
| 132 |
sum_prompt = (
|
| 133 |
+
"請用台灣繁體中文撰寫摘要。"
|
| 134 |
+
"若內容資訊較多,可條列出重點;若內容簡短,請用一句話概述即可。"
|
| 135 |
+
"請勿添加前綴或評論,僅輸出摘要。\n\n" + full_trad
|
| 136 |
)
|
| 137 |
sum_resp = client.chat.completions.create(
|
| 138 |
model="gpt-4o-mini",
|
| 139 |
messages=[
|
| 140 |
+
{"role": "system", "content": "你是一位精準且嚴格使用台灣繁體中文的摘要助手。"},
|
| 141 |
{"role": "user", "content": sum_prompt}
|
| 142 |
],
|
| 143 |
temperature=0.2,
|
| 144 |
)
|
| 145 |
summ = sum_resp.choices[0].message.content.strip()
|
| 146 |
|
|
|
|
| 147 |
return full_trad, summ
|
| 148 |
|
|
|
|
| 149 |
# ======================================================
|
| 150 |
+
# 🌐 FastAPI API(捷徑用)
|
| 151 |
# ======================================================
|
| 152 |
app = FastAPI(title="LINE Transcription API")
|
| 153 |
|
|
|
|
| 174 |
return {"status": "ok", "time": int(time.time())}
|
| 175 |
|
| 176 |
# ======================================================
|
| 177 |
+
# 💬 Gradio 主介面
|
| 178 |
# ======================================================
|
| 179 |
def _normalize_upload_path(file_input):
|
| 180 |
if not file_input:
|
|
|
|
| 253 |
copy_reply.click(fn=None, inputs=ai_reply, outputs=None, js=copy_js)
|
| 254 |
|
| 255 |
# ======================================================
|
| 256 |
+
# 🚀 啟動(自動偵測:HF / 本地)
|
| 257 |
# ======================================================
|
| 258 |
+
# Hugging Face 會自動啟動,不需再執行 uvicorn
|
| 259 |
+
if os.getenv("SPACE_ID"): # 在 HF 上運行時會自動設此變數
|
| 260 |
+
gr.mount_gradio_app(app, demo, path="/")
|
| 261 |
+
else:
|
| 262 |
+
# 本地測試用
|
| 263 |
+
gr.mount_gradio_app(app, demo, path="/")
|
| 264 |
import uvicorn
|
| 265 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|