MichaelChou0806 commited on
Commit
1feca42
·
verified ·
1 Parent(s): e7bb2ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -202
app.py CHANGED
@@ -1,247 +1,103 @@
1
  import os
2
  import time
3
  import shutil
4
- from fastapi import FastAPI, File, UploadFile
5
  from pydub import AudioSegment
6
  from openai import OpenAI
7
  import gradio as gr
 
8
 
9
  # ========================
10
- # 🔐 設定
11
  # ========================
12
  PASSWORD = os.getenv("APP_PASSWORD", "defaultpass")
13
- MAX_SIZE = 25 * 1024 * 1024
14
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
15
 
16
  # ========================
17
- # ⚔️ 防暴力破解
18
  # ========================
19
- MAX_FAILED_IN_WINDOW = 10
20
- WINDOW_SECONDS = 24 * 3600
21
- LOCK_DURATION_SECONDS = 24 * 3600
22
- SHORT_BURST_LIMIT = 5
23
- SHORT_BURST_SECONDS = 60
24
-
25
- attempts = {}
26
- locked = {}
27
-
28
- def _now(): return int(time.time())
29
-
30
- def prune_old_attempts(sid):
31
- cutoff = _now() - WINDOW_SECONDS
32
- if sid in attempts:
33
- attempts[sid] = [t for t in attempts[sid] if t >= cutoff]
34
- if not attempts[sid]:
35
- del attempts[sid]
36
-
37
- def check_lock(sid):
38
- if sid in locked:
39
- if _now() < locked[sid]:
40
- remain = locked[sid] - _now()
41
- return True, f"🔒 已被鎖定,請 {remain // 60} 分鐘後再試。"
42
- else:
43
- locked.pop(sid, None)
44
- attempts.pop(sid, None)
45
- prune_old_attempts(sid)
46
- cnt = len(attempts.get(sid, []))
47
- if cnt >= MAX_FAILED_IN_WINDOW:
48
- locked[sid] = _now() + LOCK_DURATION_SECONDS
49
- return True, f"🔒 嘗試過多,已鎖定 24 小時。"
50
- return False, ""
51
-
52
- def record_failed_attempt(sid):
53
- now = _now()
54
- attempts.setdefault(sid, []).append(now)
55
- prune_old_attempts(sid)
56
- recent_cutoff = now - SHORT_BURST_SECONDS
57
- recent = [t for t in attempts[sid] if t >= recent_cutoff]
58
- if len(recent) >= SHORT_BURST_LIMIT:
59
- locked[sid] = now + 300
60
- return len(attempts[sid]), "⚠️ 多次快速嘗試,暫時鎖定5分鐘。"
61
- return len(attempts[sid]), ""
62
-
63
- def clear_attempts(sid):
64
- attempts.pop(sid, None)
65
- locked.pop(sid, None)
66
-
67
- # ========================
68
- # 🎧 音訊轉錄
69
- # ========================
70
- def split_audio_if_needed(path):
71
  size = os.path.getsize(path)
72
  if size <= MAX_SIZE:
73
  return [path]
74
  audio = AudioSegment.from_file(path)
75
- num = int(size / MAX_SIZE) + 1
76
- chunk_ms = len(audio) / num
77
- files = []
78
- for i in range(num):
79
- start, end = int(i * chunk_ms), int((i + 1) * chunk_ms)
80
- chunk = audio[start:end]
81
  fn = f"chunk_{i+1}.wav"
82
- chunk.export(fn, format="wav")
83
- files.append(fn)
84
- return files
85
 
86
- def transcribe_core(path, model):
87
- # ✅ iPhone LINE 語音(mp4 audio-only)— 不轉檔,只複製改副檔名
88
- if path and path.lower().endswith(".mp4"):
89
- fixed_path = path[:-4] + ".m4a"
90
  try:
91
- shutil.copy(path, fixed_path)
92
- path = fixed_path
93
  print("🔧 已自動修正 mp4 → m4a")
94
  except Exception as e:
95
- print(f"⚠️ mp4→m4a 複製失敗:{e},改用原檔嘗試")
96
 
97
  chunks = split_audio_if_needed(path)
98
  txts = []
99
  for f in chunks:
100
  with open(f, "rb") as af:
101
- res = client.audio.transcriptions.create(
102
- model=model,
103
- file=af,
104
- response_format="text"
105
  )
106
- txts.append(res)
107
  full = "\n".join(txts)
108
- res = client.chat.completions.create(
109
  model="gpt-4o-mini",
110
- messages=[{"role":"user","content":f"請用繁體中文摘要以下內容:\n{full}"}],
111
  temperature=0.4,
112
- )
113
- summ = res.choices[0].message.content.strip()
114
  return full, summ
115
 
116
  # ========================
117
- # 💬 主流程(Gradio)
118
- # ========================
119
- def _normalize_upload_path(file_input):
120
- if not file_input:
121
- return None
122
- if isinstance(file_input, str):
123
- return file_input
124
- if isinstance(file_input, list) and file_input:
125
- return _normalize_upload_path(file_input[0])
126
- path = getattr(file_input, "name", None)
127
- if not path and isinstance(file_input, dict):
128
- path = file_input.get("name") or file_input.get("path")
129
- return path
130
-
131
- def transcribe_with_password(session_id, password, file_input, model_choice):
132
- password = password.strip().replace(" ", "").replace("\u200b", "")
133
- locked_flag, msg = check_lock(session_id)
134
- if locked_flag:
135
- return msg, "", ""
136
- if password != PASSWORD:
137
- cnt, msg2 = record_failed_attempt(session_id)
138
- return msg2 or f"密碼錯誤(第 {cnt} 次)", "", ""
139
- path = _normalize_upload_path(file_input)
140
- if not path or not os.path.exists(path):
141
- return "找不到上傳檔案,請重新選擇。", "", ""
142
- clear_attempts(session_id)
143
- full, summ = transcribe_core(path, model_choice)
144
- return "✅ 轉錄完成", full, summ
145
-
146
- def ask_about_transcript(full_text, q):
147
- if not full_text.strip():
148
- return "⚠️ 尚未有轉錄內容"
149
- if not q.strip():
150
- return "請輸入問題"
151
- prompt = f"以下是轉錄內容:\n{full_text}\n\n問題:{q}\n請用繁體中文回答。"
152
- res = client.chat.completions.create(
153
- model="gpt-4o-mini",
154
- messages=[{"role":"user","content":prompt}],
155
- temperature=0.6,
156
- )
157
- return res.choices[0].message.content.strip()
158
-
159
- # ========================
160
- # 🌐 FastAPI for捷徑 / API
161
  # ========================
162
- api = FastAPI()
163
-
164
- @api.post("/api/transcribe")
165
  async def api_transcribe(file: UploadFile = File(...)):
166
- """供 iPhone 捷徑上傳音"""
167
- temp_path = file.filename
168
- with open(temp_path, "wb") as f:
169
  f.write(await file.read())
170
- text, summary = transcribe_core(temp_path, "whisper-1")
171
- os.remove(temp_path)
172
  return {"text": text, "summary": summary}
173
 
 
 
 
 
174
  # ========================
175
- # 🌐 Gradio介面
176
  # ========================
177
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
178
- gr.Markdown("## 🎧 語音轉錄與摘要工具(私人API勿轉傳|支援 iPhone LINE .mp4)")
179
-
180
- session_state = gr.State(value=None)
181
- with gr.Row():
182
- password_input = gr.Textbox(
183
- label="輸入密碼",
184
- placeholder="請輸入英文與數字(請切換成英文輸入法)",
185
- type="password",
186
- max_lines=1
187
- )
188
- model_choice = gr.Dropdown(
189
- ["whisper-1", "gpt-4o-mini-transcribe"],
190
- value="whisper-1",
191
- label="選擇模型"
192
- )
193
-
194
- file_input = gr.File(
195
- label="上傳音訊 / LINE 語音檔(支援 .m4a, .aac, .wav, .mp4)",
196
- file_count="single",
197
- file_types=["audio", ".mp4", ".m4a", ".aac", ".wav"]
198
- )
199
-
200
- transcribe_btn = gr.Button("開始轉錄與摘要 🚀")
201
- status_box = gr.Textbox(label="狀態", interactive=False)
202
- transcript_box = gr.Textbox(label="完整轉錄文字", lines=10)
203
- copy_transcript = gr.Button("📋 複製轉錄文字")
204
- summary_box = gr.Textbox(label="摘要結果", lines=10)
205
- copy_summary = gr.Button("📋 複製摘要結果")
206
 
207
- with gr.Accordion("💬 進一步問 AI", open=False):
208
- user_q = gr.Textbox(label="輸入問題", lines=2)
209
- ask_btn = gr.Button("詢問 AI 🤔")
210
- ai_reply = gr.Textbox(label="AI 回覆", lines=6)
211
- copy_reply = gr.Button("📋 複製 AI 回覆")
212
-
213
- def init_session():
214
- import uuid
215
- return str(uuid.uuid4())
216
- demo.load(init_session, None, session_state)
217
-
218
- transcribe_btn.click(
219
- transcribe_with_password,
220
- [session_state, password_input, file_input, model_choice],
221
- [status_box, transcript_box, summary_box],
222
- )
223
- ask_btn.click(ask_about_transcript, [transcript_box, user_q], [ai_reply])
224
-
225
- copy_js = """
226
- async (text) => {
227
- try {
228
- await navigator.clipboard.writeText(text);
229
- alert("✅ 已複製到剪貼簿!");
230
- } catch (e) {
231
- alert("❌ 複製失敗:" + e);
232
- }
233
- }
234
- """
235
- copy_transcript.click(fn=None, inputs=transcript_box, outputs=None, js=copy_js)
236
- copy_summary.click(fn=None, inputs=summary_box, outputs=None, js=copy_js)
237
- copy_reply.click(fn=None, inputs=ai_reply, outputs=None, js=copy_js)
238
-
239
- # ✅ 同時啟動 Gradio 與 FastAPI
240
- import threading
241
- import uvicorn
242
 
243
- def run_api():
244
- uvicorn.run(api, host="0.0.0.0", port=7861)
245
 
246
- threading.Thread(target=run_api, daemon=True).start()
247
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
1
  import os
2
  import time
3
  import shutil
 
4
  from pydub import AudioSegment
5
  from openai import OpenAI
6
  import gradio as gr
7
+ from fastapi import FastAPI, File, UploadFile
8
 
9
  # ========================
10
+ # 🔐 基本設定
11
  # ========================
12
  PASSWORD = os.getenv("APP_PASSWORD", "defaultpass")
13
+ MAX_SIZE = 25 * 1024 * 1024 # 25 MB
14
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
15
+ app = FastAPI() # FastAPI 物件供 Gradio 掛載
16
 
17
  # ========================
18
+ # 🎧 音訊轉錄核心
19
  # ========================
20
+ def split_audio_if_needed(path: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  size = os.path.getsize(path)
22
  if size <= MAX_SIZE:
23
  return [path]
24
  audio = AudioSegment.from_file(path)
25
+ n = int(size / MAX_SIZE) + 1
26
+ chunk_ms = len(audio) / n
27
+ parts = []
28
+ for i in range(n):
 
 
29
  fn = f"chunk_{i+1}.wav"
30
+ audio[int(i * chunk_ms):int((i + 1) * chunk_ms)].export(fn, format="wav")
31
+ parts.append(fn)
32
+ return parts
33
 
34
+ def transcribe_core(path: str, model: str = "whisper-1"):
35
+ if path.lower().endswith(".mp4"):
36
+ fixed = path[:-4] + ".m4a"
 
37
  try:
38
+ shutil.copy(path, fixed)
39
+ path = fixed
40
  print("🔧 已自動修正 mp4 → m4a")
41
  except Exception as e:
42
+ print(f"⚠️ mp4→m4a 轉檔失敗:{e}")
43
 
44
  chunks = split_audio_if_needed(path)
45
  txts = []
46
  for f in chunks:
47
  with open(f, "rb") as af:
48
+ t = client.audio.transcriptions.create(
49
+ model=model, file=af, response_format="text"
 
 
50
  )
51
+ txts.append(t)
52
  full = "\n".join(txts)
53
+ summ = client.chat.completions.create(
54
  model="gpt-4o-mini",
55
+ messages=[{"role": "user", "content": f"請用繁體中文摘要以下內容:\n{full}"}],
56
  temperature=0.4,
57
+ ).choices[0].message.content.strip()
 
58
  return full, summ
59
 
60
  # ========================
61
+ # 🌐 API for 捷徑
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # ========================
63
+ @app.post("/api/transcribe")
 
 
64
  async def api_transcribe(file: UploadFile = File(...)):
65
+ """供 iPhone 捷徑上傳音訊並取得 JSON"""
66
+ temp = file.filename
67
+ with open(temp, "wb") as f:
68
  f.write(await file.read())
69
+ text, summary = transcribe_core(temp)
70
+ os.remove(temp)
71
  return {"text": text, "summary": summary}
72
 
73
+ @app.get("/health")
74
+ def health():
75
+ return {"status": "ok", "time": int(time.time())}
76
+
77
  # ========================
78
+ # 💬 Gradio 前端
79
  # ========================
80
+ def transcribe_with_pw(password, file):
81
+ if password.strip() != PASSWORD:
82
+ return "❌ 密碼錯誤", "", ""
83
+ if not file:
84
+ return "⚠️ 未選擇檔案", "", ""
85
+ text, summary = transcribe_core(file.name)
86
+ return "✅ 完成", text, summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
89
+ gr.Markdown("## 🎧 LINE 語音轉錄與摘要工具(支援 .m4a / .mp4)")
90
+ pw = gr.Textbox(label="輸入密碼", type="password")
91
+ f = gr.File(label="上傳音訊檔 (.m4a/.mp3/.wav/.mp4)")
92
+ run = gr.Button("開始轉錄 🚀")
93
+ s = gr.Textbox(label="狀態", interactive=False)
94
+ t = gr.Textbox(label="逐字稿", lines=10)
95
+ su = gr.Textbox(label="摘要", lines=8)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ run.click(transcribe_with_pw, [pw, f], [s, t, su])
 
98
 
99
+ # ========================
100
+ # 🚀 啟動(單一 port)
101
+ # ========================
102
+ demo.queue()
103
+ demo.launch(share=True, app=app)