MichaelChou0806 commited on
Commit
c69672a
·
verified ·
1 Parent(s): bcbcc58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -174
app.py CHANGED
@@ -1,215 +1,78 @@
1
- import os
2
- import time
3
- import shutil
4
- import tempfile
5
- from typing import Tuple
6
-
7
  from pydub import AudioSegment
8
  from openai import OpenAI
9
-
10
  import gradio as gr
11
- from fastapi import FastAPI, UploadFile, File, Form, HTTPException
12
 
13
- # ========================
14
- # 🔐 設定
15
- # ========================
16
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
17
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
- MAX_SIZE = 25 * 1024 * 1024 # 25MB
19
-
20
- if not OPENAI_API_KEY:
21
- raise RuntimeError("OPENAI_API_KEY 未設定(請到 HF 的 Secrets 設定)")
22
-
23
- client = OpenAI(api_key=OPENAI_API_KEY)
24
 
25
- print("===== 🚀 啟動中 =====")
26
- print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
27
- print(f"目前密碼內容:{PASSWORD}")
28
-
29
- # ========================
30
- # 🎧 轉錄核心
31
- # ========================
32
- def split_audio_if_needed(path: str) -> list:
33
  size = os.path.getsize(path)
34
  if size <= MAX_SIZE:
35
  return [path]
36
  audio = AudioSegment.from_file(path)
37
  n = int(size / MAX_SIZE) + 1
38
- chunk_ms = len(audio) / n
39
  parts = []
 
40
  for i in range(n):
41
  fn = f"chunk_{i+1}.wav"
42
- audio[int(i * chunk_ms):int((i + 1) * chunk_ms)].export(fn, format="wav")
43
  parts.append(fn)
44
  return parts
45
 
46
- def transcribe_core(path: str, model: str = "whisper-1") -> Tuple[str, str]:
47
- # iPhone LINE 部分 mp4 其實是 audio-only,這裡只改副檔名避免 MIME 阻擋
48
  if path.lower().endswith(".mp4"):
49
  fixed = path[:-4] + ".m4a"
50
  try:
51
  shutil.copy(path, fixed)
52
  path = fixed
53
- print("🔧 已自動修正 mp4 → m4a")
54
  except Exception as e:
55
- print(f"⚠️ mp4→m4a 複製失敗:{e}")
56
 
57
- # 1) Whisper 逐段轉錄(原始:可能有簡體)
58
- chunks = split_audio_if_needed(path)
59
- raw_parts = []
60
- for f in chunks:
61
- with open(f, "rb") as af:
62
- res = client.audio.transcriptions.create(
63
- model=model,
64
- file=af,
65
- response_format="text"
66
- )
67
- raw_parts.append(res)
68
- full_raw = "\n".join(raw_parts)
69
 
70
- # 2) 僅簡→繁(不意譯)
71
- conv_prompt = (
72
- "請將以下內容完整轉換為「繁體中文(台灣用語)」:\n"
73
- "規則:1) 僅做簡→繁字形轉換;2) 不要意譯或改寫;3) 不要添加任何前後綴。\n"
74
- "-----\n" + full_raw
75
- )
76
- full_trad = client.chat.completions.create(
77
  model="gpt-4o-mini",
78
  messages=[
79
- {"role": "system", "content": "你是嚴格的繁體中文轉換器,只進行字形轉換。"},
80
- {"role": "user", "content": conv_prompt}
81
- ],
82
- temperature=0.0,
83
- ).choices[0].message.content.strip()
84
 
85
- # 3) 摘要(長就條列、短就一句話)
86
- sum_prompt = (
87
- "請用台灣繁體中文撰寫摘要。若內容資訊多,可條列出重點;"
88
- "若內容簡短,請用一句話概述即可。\n\n" + full_trad
89
- )
90
- summary = client.chat.completions.create(
91
  model="gpt-4o-mini",
92
  messages=[
93
- {"role": "system", "content": "你是一位精準且嚴格使用台灣繁體中文的摘要助手。"},
94
- {"role": "user", "content": sum_prompt}
95
- ],
96
- temperature=0.2,
97
- ).choices[0].message.content.strip()
98
 
99
- return full_trad, summary
100
-
101
- # ========================
102
- # 🌐 FastAPI 主應用
103
- # ========================
104
- app = FastAPI(title="LINE Transcription (Gradio + API)")
105
-
106
- @app.get("/health")
107
- def health():
108
- return {"status": "ok", "time": int(time.time())}
109
-
110
- @app.post("/api/transcribe")
111
- async def api_transcribe(
112
- file: UploadFile = File(...),
113
- token: str = Form(...),
114
- model: str = Form("whisper-1")
115
- ):
116
- if token != PASSWORD:
117
- raise HTTPException(status_code=403, detail="Invalid token")
118
-
119
- # 以原副檔名建立臨時檔,避免沒有副檔名導致 pydub 判斷錯誤
120
- suffix = ""
121
- if "." in file.filename:
122
- suffix = "." + file.filename.rsplit(".", 1)[-1]
123
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
124
- tmp.write(await file.read())
125
- tmp_path = tmp.name
126
-
127
- try:
128
- text, summary = transcribe_core(tmp_path, model=model)
129
- return {"text": text, "summary": summary}
130
- finally:
131
- try:
132
- os.remove(tmp_path)
133
- except Exception:
134
- pass
135
-
136
- # ========================
137
- # 💬 Gradio UI(掛在 /)
138
- # ========================
139
- def transcribe_with_password(password, file, model_choice, question):
140
  if password.strip() != PASSWORD:
141
- return "❌ 密碼錯誤", "", "", ""
142
  if not file:
143
- return "⚠️ 未選擇檔案", "", "", ""
144
-
145
- text, summary = transcribe_core(file.name, model=model_choice)
146
-
147
- # 「進一步問 AI」:若使用者有填問題,就用轉錄全文回答
148
- followup = ""
149
- if question and question.strip():
150
- prompt = (
151
- "以下是逐字轉錄內容,請用台灣繁體中文回答我的問題:\n\n"
152
- f"【逐字稿】\n{text}\n\n"
153
- f"【問題】\n{question.strip()}"
154
- )
155
- followup = client.chat.completions.create(
156
- model="gpt-4o-mini",
157
- messages=[{"role": "user", "content": prompt}],
158
- temperature=0.6,
159
- ).choices[0].message.content.strip()
160
-
161
- return "✅ 完成", text, summary, followup
162
 
163
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
164
- gr.Markdown("## 🎧 LINE 語音轉錄與摘要工具(支援 .m4a / .mp4|API + UI)")
165
-
166
- with gr.Row():
167
- pw = gr.Textbox(label="輸入密碼", type="password", placeholder="請輸入英文數字")
168
- model_dd = gr.Dropdown(
169
- ["whisper-1", "gpt-4o-mini-transcribe"],
170
- value="whisper-1",
171
- label="選擇模型"
172
- )
173
-
174
- file_u = gr.File(label="上傳音訊檔(.m4a/.mp3/.wav/.mp4)")
175
  run = gr.Button("開始轉錄 🚀")
176
-
177
  s = gr.Textbox(label="狀態", interactive=False)
178
- t = gr.Textbox(label="轉錄結果(已轉繁體)", lines=10)
179
  su = gr.Textbox(label="AI 摘要", lines=8)
 
180
 
181
- with gr.Accordion("💬 進一步問 AI(針對上述逐字稿)", open=False):
182
- q = gr.Textbox(label="輸入問題", lines=2, placeholder="例如:幫我整理我該如何回覆對方?")
183
- ask = gr.Button("詢問 AI 🤔")
184
- ans = gr.Textbox(label="AI 回覆", lines=8)
185
-
186
- # 複製按鈕
187
- copy_js = """
188
- async (txt) => {
189
- try { await navigator.clipboard.writeText(txt); alert("✅ 已複製到剪貼簿!"); }
190
- catch(e){ alert("❌ 複製失敗:" + e); }
191
- }
192
- """
193
-
194
- copy_t = gr.Button("📋 複製逐字稿")
195
- copy_su = gr.Button("📋 複製摘要")
196
- copy_ans = gr.Button("📋 複製 AI 回覆")
197
-
198
- run.click(transcribe_with_password, [pw, file_u, model_dd, gr.State("")], [s, t, su, ans])
199
- ask.click(
200
- lambda text, question, pwd, model: transcribe_with_password(pwd, gr.State(None), model, question)[3],
201
- [t, q, pw, model_dd],
202
- [ans]
203
- )
204
-
205
- copy_t.click(fn=None, inputs=t, outputs=None, js=copy_js)
206
- copy_su.click(fn=None, inputs=su, outputs=None, js=copy_js)
207
- copy_ans.click(fn=None, inputs=ans, outputs=None, js=copy_js)
208
-
209
- # 把 Gradio 掛在 FastAPI 根路徑(/)
210
- app = gr.mount_gradio_app(app, demo, path="/")
211
 
212
- # 本地測試才會啟動 uvicorn;在 HF 上不需要
213
  if __name__ == "__main__":
214
- import uvicorn
215
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import os, shutil
 
 
 
 
 
2
  from pydub import AudioSegment
3
  from openai import OpenAI
 
4
  import gradio as gr
 
5
 
 
 
 
6
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
7
+ MAX_SIZE = 25 * 1024 * 1024
8
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
 
 
9
 
10
+ def split_audio(path):
 
 
 
 
 
 
 
11
  size = os.path.getsize(path)
12
  if size <= MAX_SIZE:
13
  return [path]
14
  audio = AudioSegment.from_file(path)
15
  n = int(size / MAX_SIZE) + 1
 
16
  parts = []
17
+ chunk_ms = len(audio) / n
18
  for i in range(n):
19
  fn = f"chunk_{i+1}.wav"
20
+ audio[int(i*chunk_ms):int((i+1)*chunk_ms)].export(fn, format="wav")
21
  parts.append(fn)
22
  return parts
23
 
24
+ def transcribe_core(path, model="whisper-1"):
 
25
  if path.lower().endswith(".mp4"):
26
  fixed = path[:-4] + ".m4a"
27
  try:
28
  shutil.copy(path, fixed)
29
  path = fixed
 
30
  except Exception as e:
31
+ print(f"⚠️ mp4→m4a 失敗: {e}")
32
 
33
+ chunks = split_audio(path)
34
+ raw = []
35
+ for c in chunks:
36
+ with open(c, "rb") as af:
37
+ txt = client.audio.transcriptions.create(model=model, file=af, response_format="text")
38
+ raw.append(txt)
39
+ raw_txt = "\n".join(raw)
 
 
 
 
 
40
 
41
+ conv = client.chat.completions.create(
 
 
 
 
 
 
42
  model="gpt-4o-mini",
43
  messages=[
44
+ {"role":"system","content":"你是嚴格的繁體中文轉換器"},
45
+ {"role":"user","content":f"將以下內容轉為台灣繁體,不意譯:\n{raw_txt}"}
46
+ ], temperature=0.0)
47
+ trad = conv.choices[0].message.content.strip()
 
48
 
49
+ summ = client.chat.completions.create(
 
 
 
 
 
50
  model="gpt-4o-mini",
51
  messages=[
52
+ {"role":"system","content":"你是繁體摘要助手"},
53
+ {"role":"user","content":f"用條列或一句話摘要:\n{trad}"}
54
+ ], temperature=0.2)
55
+ return trad, summ.choices[0].message.content.strip()
 
56
 
57
+ def transcribe(password, file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  if password.strip() != PASSWORD:
59
+ return "❌ 密碼錯誤", "", ""
60
  if not file:
61
+ return "⚠️ 未選擇檔案", "", ""
62
+ text, summary = transcribe_core(file.name)
63
+ return "✅ 完成", text, summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
66
+ gr.Markdown("## 🎧 LINE 語音轉錄與摘要(Hugging Face 版)")
67
+ pw = gr.Textbox(label="密碼", type="password")
68
+ f = gr.File(label="上傳音訊檔")
 
 
 
 
 
 
 
 
69
  run = gr.Button("開始轉錄 🚀")
 
70
  s = gr.Textbox(label="狀態", interactive=False)
71
+ t = gr.Textbox(label="轉錄結果", lines=10)
72
  su = gr.Textbox(label="AI 摘要", lines=8)
73
+ run.click(transcribe, [pw, f], [s, t, su])
74
 
75
+ app = demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
 
77
  if __name__ == "__main__":
78
+ demo.launch(server_name="0.0.0.0", server_port=7860)