MichaelChou0806's picture
Update app.py
5e2ef58 verified
raw
history blame
10.2 kB
import os
import time
import shutil
from pydub import AudioSegment
from openai import OpenAI
import gradio as gr
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
# ======================================================
# 🔐 設定區
# ======================================================
PASSWORD = os.getenv("APP_PASSWORD", "defaultpass")
MAX_SIZE = 25 * 1024 * 1024
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
print("===== 🚀 啟動中 =====")
print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
print(f"目前密碼內容:{PASSWORD}")
# ======================================================
# ⚔️ 防暴力破解
# ======================================================
MAX_FAILED_IN_WINDOW = 10
WINDOW_SECONDS = 24 * 3600
LOCK_DURATION_SECONDS = 24 * 3600
SHORT_BURST_LIMIT = 5
SHORT_BURST_SECONDS = 60
attempts = {}
locked = {}
def _now():
return int(time.time())
def prune_old_attempts(sid):
cutoff = _now() - WINDOW_SECONDS
if sid in attempts:
attempts[sid] = [t for t in attempts[sid] if t >= cutoff]
if not attempts[sid]:
del attempts[sid]
def check_lock(sid):
if sid in locked:
if _now() < locked[sid]:
remain = locked[sid] - _now()
return True, f"🔒 已被鎖定,請 {remain // 60} 分鐘後再試。"
else:
locked.pop(sid, None)
attempts.pop(sid, None)
prune_old_attempts(sid)
cnt = len(attempts.get(sid, []))
if cnt >= MAX_FAILED_IN_WINDOW:
locked[sid] = _now() + LOCK_DURATION_SECONDS
return True, f"🔒 嘗試過多,已鎖定 24 小時。"
return False, ""
def record_failed_attempt(sid):
now = _now()
attempts.setdefault(sid, []).append(now)
prune_old_attempts(sid)
recent_cutoff = now - SHORT_BURST_SECONDS
recent = [t for t in attempts[sid] if t >= recent_cutoff]
if len(recent) >= SHORT_BURST_LIMIT:
locked[sid] = now + 300
return len(attempts[sid]), "⚠️ 多次快速嘗試,暫時鎖定5分鐘。"
return len(attempts[sid]), ""
def clear_attempts(sid):
attempts.pop(sid, None)
locked.pop(sid, None)
# ======================================================
# 🎧 音訊轉錄核心
# ======================================================
def split_audio_if_needed(path):
size = os.path.getsize(path)
if size <= MAX_SIZE:
return [path]
audio = AudioSegment.from_file(path)
num = int(size / MAX_SIZE) + 1
chunk_ms = len(audio) / num
files = []
for i in range(num):
start, end = int(i * chunk_ms), int((i + 1) * chunk_ms)
chunk = audio[start:end]
fn = f"chunk_{i+1}.wav"
chunk.export(fn, format="wav")
files.append(fn)
return files
def transcribe_core(path, model):
# 1) iPhone LINE .mp4 → 假副檔名修正
if path and path.lower().endswith(".mp4"):
fixed_path = path[:-4] + ".m4a"
try:
shutil.copy(path, fixed_path)
path = fixed_path
print("🔧 已自動修正 mp4 → m4a")
except Exception as e:
print(f"⚠️ mp4→m4a 複製失敗:{e},改用原檔嘗試")
# 2) Whisper 逐段轉錄(原始:可能含簡體)
chunks = split_audio_if_needed(path)
raw_parts = []
for f in chunks:
with open(f, "rb") as af:
res = client.audio.transcriptions.create(
model=model, # "whisper-1" 或 "gpt-4o-mini-transcribe"
file=af,
response_format="text"
)
raw_parts.append(res)
full_raw = "\n".join(raw_parts) # 原始逐字稿(可能有簡體)
# 3) 將全文「只做字形轉換」成繁體(不意譯、不刪減)
conv_prompt = (
"請將以下內容完整轉換為「繁體中文(台灣用語)」:\n"
"規則:1) 僅做簡→繁字形轉換;2) 不要意譯或改寫;3) 不要添加任何前後綴。\n"
"-----\n" + full_raw
)
trad_resp = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "你是嚴格的繁體中文轉換器,只進行字形轉換。"},
{"role": "user", "content": conv_prompt}
],
temperature=0.0,
)
full_trad = trad_resp.choices[0].message.content.strip() # ✅ 確保為繁體
# 4) 以繁體全文產生摘要(也強制繁體)
sum_prompt = (
"請用台灣繁體中文 (避免使用簡體字), 條列式呈現精簡重點 (如果需要的話)。\n\n"
+ full_trad
)
sum_resp = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "你是一位精準且嚴格使用繁體中文(台灣)的摘要助手。"},
{"role": "user", "content": sum_prompt}
],
temperature=0.2,
)
summ = sum_resp.choices[0].message.content.strip()
# 依原介面回傳兩個值:完整逐字稿(已繁體)、摘要
return full_trad, summ
# ======================================================
# 🌐 FastAPI API(iPhone 捷徑可用)
# ======================================================
app = FastAPI(title="LINE Transcription API")
@app.post("/api/transcribe")
async def api_transcribe(
file: UploadFile = File(...),
token: str = Form(default=None)
):
"""供捷徑上傳音訊並取得 JSON"""
if token != PASSWORD:
raise HTTPException(status_code=403, detail="Invalid token")
temp = file.filename
with open(temp, "wb") as f:
f.write(await file.read())
text, summary = transcribe_core(temp, "whisper-1")
os.remove(temp)
return {"text": text, "summary": summary}
@app.get("/health")
def health():
"""捷徑可 ping 這個確認服務運作中"""
return {"status": "ok", "time": int(time.time())}
# ======================================================
# 💬 Gradio 介面(完整舊版)
# ======================================================
def _normalize_upload_path(file_input):
if not file_input:
return None
if isinstance(file_input, str):
return file_input
if isinstance(file_input, list) and file_input:
return _normalize_upload_path(file_input[0])
path = getattr(file_input, "name", None)
if not path and isinstance(file_input, dict):
path = file_input.get("name") or file_input.get("path")
return path
def transcribe_with_password(session_id, password, file_input, model_choice):
password = password.strip().replace(" ", "").replace("\u200b", "")
locked_flag, msg = check_lock(session_id)
if locked_flag:
return msg, "", ""
if password != PASSWORD:
cnt, msg2 = record_failed_attempt(session_id)
return msg2 or f"密碼錯誤(第 {cnt} 次)", "", ""
path = _normalize_upload_path(file_input)
if not path or not os.path.exists(path):
return "找不到上傳檔案,請重新選擇。", "", ""
clear_attempts(session_id)
full, summ = transcribe_core(path, model_choice)
return "✅ 轉錄完成", full, summ
def ask_about_transcript(full_text, q):
if not full_text.strip():
return "⚠️ 尚未有轉錄內容"
if not q.strip():
return "請輸入問題"
prompt = f"以下是轉錄內容:\n{full_text}\n\n問題:{q}\n請用繁體中文回答。"
res = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role":"user","content":prompt}],
temperature=0.6,
)
return res.choices[0].message.content.strip()
# ======================================================
# 🖥️ Gradio UI
# ======================================================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## 🎧 語音轉錄與摘要工具(私人API勿轉傳|支援 iPhone LINE .mp4)")
session_state = gr.State(value=None)
with gr.Row():
password_input = gr.Textbox(label="輸入密碼", placeholder="請輸入英文與數字(請切換成英文輸入法)", type="password", max_lines=1)
model_choice = gr.Dropdown(["whisper-1", "gpt-4o-mini-transcribe"], value="whisper-1", label="選擇模型")
file_input = gr.File(label="上傳音訊 / LINE 語音檔(支援 .m4a, .aac, .wav, .mp4)", file_count="single", file_types=["audio", ".mp4", ".m4a", ".aac", ".wav"])
transcribe_btn = gr.Button("開始轉錄與摘要 🚀")
status_box = gr.Textbox(label="狀態", interactive=False)
transcript_box = gr.Textbox(label="完整轉錄文字", lines=10)
copy_transcript = gr.Button("📋 複製轉錄文字")
summary_box = gr.Textbox(label="摘要結果", lines=10)
copy_summary = gr.Button("📋 複製摘要結果")
with gr.Accordion("💬 進一步問 AI", open=False):
user_q = gr.Textbox(label="輸入問題", lines=2)
ask_btn = gr.Button("詢問 AI 🤔")
ai_reply = gr.Textbox(label="AI 回覆", lines=6)
copy_reply = gr.Button("📋 複製 AI 回覆")
def init_session():
import uuid
return str(uuid.uuid4())
demo.load(init_session, None, session_state)
transcribe_btn.click(transcribe_with_password, [session_state, password_input, file_input, model_choice], [status_box, transcript_box, summary_box])
ask_btn.click(ask_about_transcript, [transcript_box, user_q], [ai_reply])
copy_js = """async (text) => {try {await navigator.clipboard.writeText(text); alert("✅ 已複製到剪貼簿!");} catch (e) {alert("❌ 複製失敗:" + e);}}"""
copy_transcript.click(fn=None, inputs=transcript_box, outputs=None, js=copy_js)
copy_summary.click(fn=None, inputs=summary_box, outputs=None, js=copy_js)
copy_reply.click(fn=None, inputs=ai_reply, outputs=None, js=copy_js)
# ======================================================
# 🚀 啟動(Hugging Face)
# ======================================================
gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)