透過 o4-mini 將語音轉錄文字翻譯成繁體中文
Browse files
app.py
CHANGED
|
@@ -89,6 +89,7 @@ def split_audio_if_needed(path):
|
|
| 89 |
return files
|
| 90 |
|
| 91 |
def transcribe_core(path, model):
|
|
|
|
| 92 |
if path and path.lower().endswith(".mp4"):
|
| 93 |
fixed_path = path[:-4] + ".m4a"
|
| 94 |
try:
|
|
@@ -98,22 +99,53 @@ def transcribe_core(path, model):
|
|
| 98 |
except Exception as e:
|
| 99 |
print(f"⚠️ mp4→m4a 複製失敗:{e},改用原檔嘗試")
|
| 100 |
|
|
|
|
| 101 |
chunks = split_audio_if_needed(path)
|
| 102 |
-
|
| 103 |
for f in chunks:
|
| 104 |
with open(f, "rb") as af:
|
| 105 |
res = client.audio.transcriptions.create(
|
| 106 |
-
model=model,
|
|
|
|
|
|
|
| 107 |
)
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
model="gpt-4o-mini",
|
| 112 |
-
messages=[
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
)
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# ======================================================
|
| 119 |
# 🌐 FastAPI API(iPhone 捷徑可用)
|
|
|
|
| 89 |
return files
|
| 90 |
|
| 91 |
def transcribe_core(path, model):
|
| 92 |
+
# 1) iPhone LINE .mp4 → 假副檔名修正
|
| 93 |
if path and path.lower().endswith(".mp4"):
|
| 94 |
fixed_path = path[:-4] + ".m4a"
|
| 95 |
try:
|
|
|
|
| 99 |
except Exception as e:
|
| 100 |
print(f"⚠️ mp4→m4a 複製失敗:{e},改用原檔嘗試")
|
| 101 |
|
| 102 |
+
# 2) Whisper 逐段轉錄(原始:可能含簡體)
|
| 103 |
chunks = split_audio_if_needed(path)
|
| 104 |
+
raw_parts = []
|
| 105 |
for f in chunks:
|
| 106 |
with open(f, "rb") as af:
|
| 107 |
res = client.audio.transcriptions.create(
|
| 108 |
+
model=model, # "whisper-1" 或 "gpt-4o-mini-transcribe"
|
| 109 |
+
file=af,
|
| 110 |
+
response_format="text"
|
| 111 |
)
|
| 112 |
+
raw_parts.append(res)
|
| 113 |
+
full_raw = "\n".join(raw_parts) # 原始逐字稿(可能有簡體)
|
| 114 |
+
|
| 115 |
+
# 3) 將全文「只做字形轉換」成繁體(不意譯、不刪減)
|
| 116 |
+
conv_prompt = (
|
| 117 |
+
"請將以下內容完整轉換為「繁體中文(台灣用語)」:\n"
|
| 118 |
+
"規則:1) 僅做簡→繁字形轉換;2) 不要意譯或改寫;3) 保留標點、數字與段落;4) 不要添加任何前後綴。\n"
|
| 119 |
+
"-----\n" + full_raw
|
| 120 |
+
)
|
| 121 |
+
trad_resp = client.chat.completions.create(
|
| 122 |
model="gpt-4o-mini",
|
| 123 |
+
messages=[
|
| 124 |
+
{"role": "system", "content": "你是嚴格的繁體中文轉換器,只進行字形轉換。"},
|
| 125 |
+
{"role": "user", "content": conv_prompt}
|
| 126 |
+
],
|
| 127 |
+
temperature=0.0,
|
| 128 |
+
)
|
| 129 |
+
full_trad = trad_resp.choices[0].message.content.strip() # ✅ 確保為繁體
|
| 130 |
+
|
| 131 |
+
# 4) 以繁體全文產生摘要(也強制繁體)
|
| 132 |
+
sum_prompt = (
|
| 133 |
+
"請用繁體中文(台灣用語)撰寫重點式摘要(2–6 點),必要時保留時間、人名、數字;避免使用簡體字。\n\n"
|
| 134 |
+
+ full_trad
|
| 135 |
)
|
| 136 |
+
sum_resp = client.chat.completions.create(
|
| 137 |
+
model="gpt-4o-mini",
|
| 138 |
+
messages=[
|
| 139 |
+
{"role": "system", "content": "你是一位精準且嚴格使用繁體中文(台灣)的摘要助手。"},
|
| 140 |
+
{"role": "user", "content": sum_prompt}
|
| 141 |
+
],
|
| 142 |
+
temperature=0.3,
|
| 143 |
+
)
|
| 144 |
+
summ = sum_resp.choices[0].message.content.strip()
|
| 145 |
+
|
| 146 |
+
# 依原介面回傳兩個值:完整逐字稿(已繁體)、摘要
|
| 147 |
+
return full_trad, summ
|
| 148 |
+
|
| 149 |
|
| 150 |
# ======================================================
|
| 151 |
# 🌐 FastAPI API(iPhone 捷徑可用)
|