MichaelChou0806 commited on
Commit
bcbcc58
·
verified ·
1 Parent(s): e7d0f5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -59
app.py CHANGED
@@ -1,28 +1,35 @@
1
  import os
2
  import time
3
  import shutil
 
 
 
4
  from pydub import AudioSegment
5
  from openai import OpenAI
 
6
  import gradio as gr
7
- from fastapi import FastAPI, UploadFile, File, Form
8
- from threading import Thread
9
- import uvicorn
10
 
11
- # ======================================================
12
- # 🔐 設定區
13
- # ======================================================
14
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
15
- MAX_SIZE = 25 * 1024 * 1024
16
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
 
 
17
 
18
  print("===== 🚀 啟動中 =====")
19
  print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
20
  print(f"目前密碼內容:{PASSWORD}")
21
 
22
- # ======================================================
23
- # 🎧 音訊轉錄核心
24
- # ======================================================
25
- def split_audio_if_needed(path):
26
  size = os.path.getsize(path)
27
  if size <= MAX_SIZE:
28
  return [path]
@@ -36,41 +43,51 @@ def split_audio_if_needed(path):
36
  parts.append(fn)
37
  return parts
38
 
39
- def transcribe_core(path, model="whisper-1"):
 
40
  if path.lower().endswith(".mp4"):
41
  fixed = path[:-4] + ".m4a"
42
  try:
43
  shutil.copy(path, fixed)
44
  path = fixed
 
45
  except Exception as e:
46
- print(f"⚠️ mp4→m4a 轉檔失敗:{e}")
47
 
 
48
  chunks = split_audio_if_needed(path)
49
- txts = []
50
  for f in chunks:
51
  with open(f, "rb") as af:
52
- res = client.audio.transcriptions.create(model=model, file=af, response_format="text")
53
- txts.append(res)
54
- full_raw = "\n".join(txts)
 
 
 
 
55
 
 
56
  conv_prompt = (
57
  "請將以下內容完整轉換為「繁體中文(台灣用語)」:\n"
58
- "規則:1) 僅做簡→繁字形轉換;2) 不要意譯或改寫;3) 不要添加任何前後綴。\n-----\n" + full_raw
 
59
  )
60
- trad = client.chat.completions.create(
61
  model="gpt-4o-mini",
62
  messages=[
63
- {"role": "system", "content": "你是嚴格的繁體中文轉換器。"},
64
  {"role": "user", "content": conv_prompt}
65
  ],
66
  temperature=0.0,
67
  ).choices[0].message.content.strip()
68
 
 
69
  sum_prompt = (
70
  "請用台灣繁體中文撰寫摘要。若內容資訊多,可條列出重點;"
71
- "若內容簡短,請用一句話概述即可。\n\n" + trad
72
  )
73
- summ = client.chat.completions.create(
74
  model="gpt-4o-mini",
75
  messages=[
76
  {"role": "system", "content": "你是一位精準且嚴格使用台灣繁體中文的摘要助手。"},
@@ -79,53 +96,120 @@ def transcribe_core(path, model="whisper-1"):
79
  temperature=0.2,
80
  ).choices[0].message.content.strip()
81
 
82
- return trad, summ
 
 
 
 
 
83
 
84
- # ======================================================
85
- # 🌐 FastAPI for 捷徑
86
- # ======================================================
87
- api_app = FastAPI()
88
 
89
- @api_app.post("/api/transcribe")
90
- async def api_transcribe(file: UploadFile = File(...), token: str = Form(...)):
 
 
 
 
91
  if token != PASSWORD:
92
- return {"error": "Invalid token"}
93
- temp = file.filename
94
- with open(temp, "wb") as f:
95
- f.write(await file.read())
96
- text, summary = transcribe_core(temp)
97
- os.remove(temp)
98
- return {"text": text, "summary": summary}
99
-
100
- # ======================================================
101
- # 💬 Gradio 介面
102
- # ======================================================
103
- def transcribe_with_password(password, file):
 
 
 
 
 
 
 
 
 
 
 
104
  if password.strip() != PASSWORD:
105
- return "❌ 密碼錯誤", "", ""
106
  if not file:
107
- return "⚠️ 未選擇檔案", "", ""
108
- text, summary = transcribe_core(file.name)
109
- return "✅ 完成", text, summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
112
- gr.Markdown("## 🎧 LINE 語音轉錄與摘要(支援 .m4a / .mp4)")
113
- pw = gr.Textbox(label="輸入密碼", type="password")
114
- f = gr.File(label="上傳音訊檔")
 
 
 
 
 
 
 
 
115
  run = gr.Button("開始轉錄 🚀")
 
116
  s = gr.Textbox(label="狀態", interactive=False)
117
- t = gr.Textbox(label="轉錄結果", lines=10)
118
  su = gr.Textbox(label="AI 摘要", lines=8)
119
- run.click(transcribe_with_password, [pw, f], [s, t, su])
120
 
121
- # ======================================================
122
- # 🚀 啟動
123
- # ======================================================
124
- def run_api():
125
- uvicorn.run(api_app, host="0.0.0.0", port=7861)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- Thread(target=run_api, daemon=True).start()
128
- app = demo # ✅ Hugging Face 主入口使用 Gradio
129
 
 
130
  if __name__ == "__main__":
131
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
1
  import os
2
  import time
3
  import shutil
4
+ import tempfile
5
+ from typing import Tuple
6
+
7
  from pydub import AudioSegment
8
  from openai import OpenAI
9
+
10
  import gradio as gr
11
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 
 
12
 
13
+ # ========================
14
+ # 🔐 設定
15
+ # ========================
16
  PASSWORD = os.getenv("APP_PASSWORD", "chou")
17
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
+ MAX_SIZE = 25 * 1024 * 1024 # 25MB
19
+
20
+ if not OPENAI_API_KEY:
21
+ raise RuntimeError("OPENAI_API_KEY 未設定(請到 HF 的 Secrets 設定)")
22
+
23
+ client = OpenAI(api_key=OPENAI_API_KEY)
24
 
25
  print("===== 🚀 啟動中 =====")
26
  print(f"APP_PASSWORD: {'✅ 已載入' if PASSWORD else '❌ 未載入'}")
27
  print(f"目前密碼內容:{PASSWORD}")
28
 
29
+ # ========================
30
+ # 🎧 轉錄核心
31
+ # ========================
32
+ def split_audio_if_needed(path: str) -> list:
33
  size = os.path.getsize(path)
34
  if size <= MAX_SIZE:
35
  return [path]
 
43
  parts.append(fn)
44
  return parts
45
 
46
+ def transcribe_core(path: str, model: str = "whisper-1") -> Tuple[str, str]:
47
+ # iPhone LINE 部分 mp4 其實是 audio-only,這裡只改副檔名避免 MIME 阻擋
48
  if path.lower().endswith(".mp4"):
49
  fixed = path[:-4] + ".m4a"
50
  try:
51
  shutil.copy(path, fixed)
52
  path = fixed
53
+ print("🔧 已自動修正 mp4 → m4a")
54
  except Exception as e:
55
+ print(f"⚠️ mp4→m4a 複製失敗:{e}")
56
 
57
+ # 1) Whisper 逐段轉錄(原始:可能有簡體)
58
  chunks = split_audio_if_needed(path)
59
+ raw_parts = []
60
  for f in chunks:
61
  with open(f, "rb") as af:
62
+ res = client.audio.transcriptions.create(
63
+ model=model,
64
+ file=af,
65
+ response_format="text"
66
+ )
67
+ raw_parts.append(res)
68
+ full_raw = "\n".join(raw_parts)
69
 
70
+ # 2) 僅簡→繁(不意譯)
71
  conv_prompt = (
72
  "請將以下內容完整轉換為「繁體中文(台灣用語)」:\n"
73
+ "規則:1) 僅做簡→繁字形轉換;2) 不要意譯或改寫;3) 不要添加任何前後綴。\n"
74
+ "-----\n" + full_raw
75
  )
76
+ full_trad = client.chat.completions.create(
77
  model="gpt-4o-mini",
78
  messages=[
79
+ {"role": "system", "content": "你是嚴格的繁體中文轉換器,只進行字形轉換。"},
80
  {"role": "user", "content": conv_prompt}
81
  ],
82
  temperature=0.0,
83
  ).choices[0].message.content.strip()
84
 
85
+ # 3) 摘要(長就條列、短就一句話)
86
  sum_prompt = (
87
  "請用台灣繁體中文撰寫摘要。若內容資訊多,可條列出重點;"
88
+ "若內容簡短,請用一句話概述即可。\n\n" + full_trad
89
  )
90
+ summary = client.chat.completions.create(
91
  model="gpt-4o-mini",
92
  messages=[
93
  {"role": "system", "content": "你是一位精準且嚴格使用台灣繁體中文的摘要助手。"},
 
96
  temperature=0.2,
97
  ).choices[0].message.content.strip()
98
 
99
+ return full_trad, summary
100
+
101
+ # ========================
102
+ # 🌐 FastAPI 主應用
103
+ # ========================
104
+ app = FastAPI(title="LINE Transcription (Gradio + API)")
105
 
106
+ @app.get("/health")
107
+ def health():
108
+ return {"status": "ok", "time": int(time.time())}
 
109
 
110
+ @app.post("/api/transcribe")
111
+ async def api_transcribe(
112
+ file: UploadFile = File(...),
113
+ token: str = Form(...),
114
+ model: str = Form("whisper-1")
115
+ ):
116
  if token != PASSWORD:
117
+ raise HTTPException(status_code=403, detail="Invalid token")
118
+
119
+ # 以原副檔名建立臨時檔,避免沒有副檔名導致 pydub 判斷錯誤
120
+ suffix = ""
121
+ if "." in file.filename:
122
+ suffix = "." + file.filename.rsplit(".", 1)[-1]
123
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
124
+ tmp.write(await file.read())
125
+ tmp_path = tmp.name
126
+
127
+ try:
128
+ text, summary = transcribe_core(tmp_path, model=model)
129
+ return {"text": text, "summary": summary}
130
+ finally:
131
+ try:
132
+ os.remove(tmp_path)
133
+ except Exception:
134
+ pass
135
+
136
+ # ========================
137
+ # 💬 Gradio UI(掛在 /)
138
+ # ========================
139
+ def transcribe_with_password(password, file, model_choice, question):
140
  if password.strip() != PASSWORD:
141
+ return "❌ 密碼錯誤", "", "", ""
142
  if not file:
143
+ return "⚠️ 未選擇檔案", "", "", ""
144
+
145
+ text, summary = transcribe_core(file.name, model=model_choice)
146
+
147
+ # 「進一步問 AI」:若使用者有填問題,就用轉錄全文回答
148
+ followup = ""
149
+ if question and question.strip():
150
+ prompt = (
151
+ "以下是逐字轉錄內容,請用台灣繁體中文回答我的問題:\n\n"
152
+ f"【逐字稿】\n{text}\n\n"
153
+ f"【問題】\n{question.strip()}"
154
+ )
155
+ followup = client.chat.completions.create(
156
+ model="gpt-4o-mini",
157
+ messages=[{"role": "user", "content": prompt}],
158
+ temperature=0.6,
159
+ ).choices[0].message.content.strip()
160
+
161
+ return "✅ 完成", text, summary, followup
162
 
163
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
164
+ gr.Markdown("## 🎧 LINE 語音轉錄與摘要工具(支援 .m4a / .mp4|API + UI)")
165
+
166
+ with gr.Row():
167
+ pw = gr.Textbox(label="輸入密碼", type="password", placeholder="請輸入英文數字")
168
+ model_dd = gr.Dropdown(
169
+ ["whisper-1", "gpt-4o-mini-transcribe"],
170
+ value="whisper-1",
171
+ label="選擇模型"
172
+ )
173
+
174
+ file_u = gr.File(label="上傳音訊檔(.m4a/.mp3/.wav/.mp4)")
175
  run = gr.Button("開始轉錄 🚀")
176
+
177
  s = gr.Textbox(label="狀態", interactive=False)
178
+ t = gr.Textbox(label="轉錄結果(已轉繁體)", lines=10)
179
  su = gr.Textbox(label="AI 摘要", lines=8)
 
180
 
181
+ with gr.Accordion("💬 進一步問 AI(針對上述逐字稿)", open=False):
182
+ q = gr.Textbox(label="輸入問題", lines=2, placeholder="例如:幫我整理我該如何回覆對方?")
183
+ ask = gr.Button("詢問 AI 🤔")
184
+ ans = gr.Textbox(label="AI 回覆", lines=8)
185
+
186
+ # 複製按鈕
187
+ copy_js = """
188
+ async (txt) => {
189
+ try { await navigator.clipboard.writeText(txt); alert("✅ 已複製到剪貼簿!"); }
190
+ catch(e){ alert("❌ 複製失敗:" + e); }
191
+ }
192
+ """
193
+
194
+ copy_t = gr.Button("📋 複製逐字稿")
195
+ copy_su = gr.Button("📋 複製摘要")
196
+ copy_ans = gr.Button("📋 複製 AI 回覆")
197
+
198
+ run.click(transcribe_with_password, [pw, file_u, model_dd, gr.State("")], [s, t, su, ans])
199
+ ask.click(
200
+ lambda text, question, pwd, model: transcribe_with_password(pwd, gr.State(None), model, question)[3],
201
+ [t, q, pw, model_dd],
202
+ [ans]
203
+ )
204
+
205
+ copy_t.click(fn=None, inputs=t, outputs=None, js=copy_js)
206
+ copy_su.click(fn=None, inputs=su, outputs=None, js=copy_js)
207
+ copy_ans.click(fn=None, inputs=ans, outputs=None, js=copy_js)
208
 
209
+ # 把 Gradio 掛在 FastAPI 根路徑(/)
210
+ app = gr.mount_gradio_app(app, demo, path="/")
211
 
212
+ # 本地測試才會啟動 uvicorn;在 HF 上不需要
213
  if __name__ == "__main__":
214
+ import uvicorn
215
+ uvicorn.run(app, host="0.0.0.0", port=7860)