MichaelChou0806 commited on
Commit
65c4624
·
verified ·
1 Parent(s): 7f10151

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # file: app.py
2
+ import os
3
+ from pydub import AudioSegment
4
+ from openai import OpenAI
5
+ import gradio as gr
6
+
7
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
8
+ MAX_SIZE = 25 * 1024 * 1024
9
+
10
+ def split_audio_if_needed(input_path):
11
+ size = os.path.getsize(input_path)
12
+ if size <= MAX_SIZE:
13
+ return [input_path]
14
+
15
+ audio = AudioSegment.from_file(input_path)
16
+ num_chunks = int(size / MAX_SIZE) + 1
17
+ chunk_length = len(audio) / num_chunks
18
+
19
+ chunk_files = []
20
+ for i in range(num_chunks):
21
+ start = int(i * chunk_length)
22
+ end = int((i + 1) * chunk_length)
23
+ chunk = audio[start:end]
24
+ chunk_filename = f"chunk_{i+1}.wav"
25
+ chunk.export(chunk_filename, format="wav")
26
+ chunk_files.append(chunk_filename)
27
+ return chunk_files
28
+
29
+ def transcribe_and_summarize(file):
30
+ if not file:
31
+ return "請上傳音訊檔。", ""
32
+ chunks = split_audio_if_needed(file)
33
+ transcripts = []
34
+ for idx, f in enumerate(chunks, 1):
35
+ with open(f, "rb") as audio_file:
36
+ text = client.audio.transcriptions.create(
37
+ model="whisper-1",
38
+ file=audio_file,
39
+ response_format="text"
40
+ )
41
+ transcripts.append(text)
42
+ full_text = "\n".join(transcripts)
43
+
44
+ response = client.chat.completions.create(
45
+ model="gpt-4o-mini",
46
+ messages=[
47
+ {"role": "system", "content": "你是一位精準且擅長摘要的助手。"},
48
+ {"role": "user", "content": "請用繁體中文摘要以下內容:\n" + full_text}
49
+ ],
50
+ temperature=0.4,
51
+ )
52
+ summary = response.choices[0].message.content.strip()
53
+ return full_text, summary
54
+
55
+ iface = gr.Interface(
56
+ fn=transcribe_and_summarize,
57
+ inputs=gr.Audio(type="filepath", label="上傳音檔 (.m4a, .aac, .wav)"),
58
+ outputs=[
59
+ gr.Textbox(label="完整轉錄文字", lines=10),
60
+ gr.Textbox(label="摘要結果", lines=10),
61
+ ],
62
+ title="語音轉錄與摘要工具",
63
+ description="上傳音檔後,自動轉錄成文字並生成摘要(繁體中文)。"
64
+ )
65
+
66
+ iface.launch()