File size: 15,540 Bytes
06047fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6c7e29
06047fb
 
ab8c665
 
 
 
06047fb
073e826
06047fb
c6c7e29
 
06047fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6c7e29
06047fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab8c665
06047fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0ec3d3
06047fb
 
 
 
073e826
 
06047fb
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
# -*- coding: utf-8 -*-
# 財政部財政資訊中心 江信宗

import gradio as gr
import tempfile
import subprocess
from groq import Groq
from zhconv_rs import zhconv
from datetime import timedelta
import os
from pathlib import Path

def check_ffmpeg():
    try:
        subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True, text=True)
        print("FFmpeg is installed and working.")
    except subprocess.CalledProcessError:
        print("Error: FFmpeg is not installed or not working properly.")
    except FileNotFoundError:
        print("Error: FFmpeg is not installed or not in the system PATH.")

def format_time(seconds):
    td = timedelta(seconds=seconds)
    hours, remainder = divmod(td.seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    milliseconds = round(td.microseconds / 1000)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"

def json_to_srt(segments):
    srt_lines = []
    for i, item in enumerate(segments, 1):
        start_time = format_time(item['start'])
        end_time = format_time(item['end'])
        text = zhconv(item['text'], "zh-tw")
        srt_lines.append(f"{i}\n{start_time} --> {end_time}\n{text}\n")
    return "\n".join(srt_lines)

def validate_and_convert(file, Language, api_key):
    try:
        if not api_key:
            os.remove(file.name)
            gr.Warning("請輸入正確的 API Key!!")
            return None, None, "請輸入正確的 API Key!!", None, None
        if api_key == os.getenv("SPACE_ID"):
            api_key = os.getenv("YOUR_API_KEY")
        else:
            os.remove(file.name)
            gr.Warning("請輸入正確的 API Key!!")
            return None, None, "請輸入正確的 API Key!!", None, None
    except Exception as e:
        os.remove(file.name)
        gr.Warning("請輸入正確的 API Key!!")
        return None, None, f"請輸入正確的 API Key!!", None, None
    gr.Info("檔案上傳完成,開始轉換......")
    MAX_FILE_SIZE = 200 * 1024 * 1024  # 200MB
    ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'm4a', 'wav', 'ogg', 'flac', 'webm', 'mpga']
    if file is None:
        return None, None, "沒有選擇文件", None, None
    try:
        file_path = Path(file.name)
        if not file_path.exists():
            return None, None, f"找不到上傳的檔案:{file.name}", None, None
        file_extension = file_path.suffix[1:].lower()
        if file_extension not in ALLOWED_EXTENSIONS:
            return None, None, f"不支援的檔案類型!請上傳以下格式之一的檔案:{', '.join(ALLOWED_EXTENSIONS)}", None, None
        file_size = file_path.stat().st_size
        if file_size > MAX_FILE_SIZE:
            return None, None, "檔案已超過200MB限制,請上傳較小的檔案。", None, None
        show_info = file_size > 50 * 1024 * 1024
        return convert_to_mp3(file, Language, api_key, show_info)
    except Exception as e:
        return None, None, f"檔案處理錯誤:{str(e)}", None, None

def convert_to_mp3(file, Language, api_key, show_info):
    temp_dir = tempfile.gettempdir()
    input_path = file.name
    output_path = os.path.join(temp_dir, "output.mp3")
    command = f"ffmpeg -i \"{input_path}\" -acodec libmp3lame -b:a 48k -y \"{output_path}\""
    try:
        if show_info:
            gr.Info("開始轉換為音檔,請稍候......")
        result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
        if os.path.exists(output_path):
            file_size = os.path.getsize(output_path)
            if file_size > 25 * 1024 * 1024:
                return None, None, "轉譯限制:MP3檔案大小不得超過25MB!", None, None
            if file_size > 5 * 1024 * 1024:
                show_info = True
            size_info = f"MP3 檔案大小: {file_size / 1024:.2f} KB"
            transcription, transcription_text = transcribe_audio(output_path, Language, api_key, show_info)
            if isinstance(transcription, list):
                if show_info:
                    gr.Info("開始產製SRT字幕檔......")
                srt_content = json_to_srt(transcription)
                srt_file = create_srt_file(srt_content)
                return file, output_path, size_info, transcription_text, srt_file
            else:
                return file, output_path, size_info, transcription, None
        else:
            return None, None, "轉換失敗:輸出文件不存在", None, None
    except subprocess.CalledProcessError as e:
        error_message = f"轉換失敗:{str(e)}"
        return None, None, error_message, None, None

def summarize_article(trans_text, api_key):
    try:
        if not api_key:
            api_key = os.getenv("YOUR_API_KEY")
        client = Groq(api_key=api_key)
        response = client.chat.completions.create(
            model="llama-3.1-70b-versatile",
            messages=[
                {"role": "system", "content": """

你是一個精通繁體中文和臺灣用語的中文編輯。

使用者會提供給你一段逐字稿,請協助我檢查錯字、修正標點符號的使用,盡可能保持原意不變、保留內容細節(例如故事、提到的數字和案例)的情況下,提供優化中文排版後所有的逐字稿內容。



## 限制

提供時請分成數個段落,並替這段落下個合適的標題,並依照以下限制輸出。而且無論如何,都要提供所有的逐字稿內容,請不要擅自刪減或總結成段落!



### 中文錯字修正範例

1. 「罰還」改成「罰緩」

2. 「巧巧」改成「悄悄」

3. 「辯試」改成「辨識」

4. 「規護」改成「歸戶」

5. 「披頭」改成「劈頭」

6. 「查器」改成「查緝」



### 中文排版&標點符號的使用原則

1. 一律使用全形符號,例如用「」做引號,而不是 “”;用 "," 做為逗號,而不是 ","。

2. 省略號是……(兩個英文省略號),不是。。。,也不是......(六個點)

3. 中文與英文或數字之間需要增加半形空格。正確用法:「Apple 課程人數已經超過 2000 人了。」;錯誤用法:「Apple課程人數已經超過2000人了。」

4. 遇到完整的英文整句、特殊名詞,其內容使用半形標點。正確用法:「賈伯斯說過:"Stay hungry, stay foolish."」;錯誤用法:「賈伯斯說過:"Stay hungry,stay foolish。”」

5. 一律在中英文之間增加空格

6. 在中文與數字之間增加空格

7. 在數字與單位之間增加空格

8. 全形標點與其他字符之間不加空格。



### 其他限制

- 請注意,直接輸出結果給我,不需要有開頭招呼。

- 無論如何,都要提供所有的逐字稿內容,請不要擅自刪減或總結成段落!



## 輸出格式

**{總結後的段落重點}**



{文字段落內容}



**{總結後的段落重點}**



{文字段落內容}

"""},
                {"role": "user", "content": trans_text}
            ],
            temperature=0.2
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"總結失敗:{str(e)}"

def transcribe_audio(filename, Language, api_key, show_info):
    try:
        if not api_key:
            api_key = os.getenv("YOUR_API_KEY")
        if show_info:
            gr.Info("開始轉譯,請稍候......")
        client = Groq(api_key=api_key)
        language_dict = {"繁體中文": "zh", "English": "en", "German": "de", "Spanish": "es", "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi", "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la", "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn", "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu", "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw", "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am", "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo", "Haitian creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn", "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Myanmar": "my", "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as", "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha", "Bashkir": "ba", "Javanese": "jw", "Sundanese": "su"}
        selected_language = language_dict.get(Language)
        selected_model = "distil-whisper-large-v3-en" if Language == "English" else "whisper-large-v3"
        with open(filename, "rb") as file:
            transcription = client.audio.transcriptions.create(
                file=(filename, file.read()),
                model=selected_model,
                response_format="verbose_json",
                language=selected_language,
                temperature=0.0
            )
        if Language == "English":
            summary = transcription.text
        else:
            full_text = zhconv(transcription.text, "zh-tw")
            if show_info:
                chunks = [full_text[i:i+1000] for i in range(0, len(full_text), 1000)]
                summaries = []
                for i, chunk in enumerate(chunks):
                    gr.Info(f"正在處理第 {i+1}/{len(chunks)} 部分...")
                    chunk_summary = summarize_article(chunk, api_key)
                    summaries.append(chunk_summary.strip())
                summary = "\n\n".join(summaries)
            else:
                summary = summarize_article(full_text, api_key)
        return transcription.segments, summary.strip()
    except Exception as e:
        return f"轉譯失敗:{str(e)}"

def create_srt_file(srt_content):
    with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".srt", encoding="utf-8") as temp_file:
        temp_file.write(srt_content)
    return temp_file.name

def clear_inputs():
    return None, None, None, None, None

custom_css = """

.center-aligned {

    text-align: center !important;

    color: #ff4081;

    text-shadow: 2px 2px 4px rgba(0,0,0,0.1);

    margin-bottom: 0 !important;

}

.gr-input, .gr-box, .gr-dropdown {

    border-radius: 10px !important;

    border: 2px solid #ff4081 !important;

    margin: 0 !important;

}

.gr-input:focus, .gr-box:focus, .gr-dropdown:focus {

    border-color: #f50057 !important;

    box-shadow: 0 0 0 2px rgba(245,0,87,0.2) !important;

}

.file-background {

    background-color: #B7E0FF !important;

    padding: 15px !important;

    border-radius: 10px !important;

    margin: 0 !important;

    height: auto;

}

.api-background {

    background-color: #FFCFB3 !important;

    padding: 15px !important;

    border-radius: 10px !important;

    margin: 0 !important;

}

.script-background {

    background-color: #FEF9D9 !important;

    padding: 15px !important;

    border-radius: 10px !important;

    margin: 0 !important;

}

.script-background textarea {

    font-size: 18px !important;

    background-color: #ffffff;

    border: 1px solid #f0f8ff;

    border-radius: 8px;

}

.srt-background {

    background-color: #FFF4B5 !important;

    padding: 5px !important;

    border-radius: 10px !important;

    margin: 0 !important;

}

.text-background {

    padding: 5px !important;

    border-radius: 10px !important;

    border: 2px solid #B7E0FF !important;

    margin: 0 !important;

}

.clear-button {

    border-radius: 10px !important;

    background-color: #333333 !important;

    color: white !important;

    font-weight: bold !important;

    transition: all 0.3s ease !important;

}

.clear-button:hover {

    background-color: #000000 !important;

    transform: scale(1.05);

}

"""

with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as demo:
    gr.Markdown("""

    # 📝 凊彩歐北寫 - 財政部財政資訊中心 🎵

    > ### **※ 玩轉聲音魅力,開拓更多可能性,自動生成 Note-taking Record,系統布署:江信宗,LLM:Whisper large-v3。**<br>依據 <a href="https://www.youtube.com/static?template=terms&hl=zh-Hant" style="color: black;">YouTube 的服務條款(ToS)</a>,請自行明確取得 YouTube 著作權人授權後再上傳影片進行轉譯!

    """, elem_classes="center-aligned")
    with gr.Row():
        file_input = gr.File(
            label="上傳影片或音訊檔",
            file_count="single",
            elem_classes="file-background"
        )
        with gr.Column():
            api_key_input = gr.Textbox(label="輸入您的 API Key", type="password", placeholder="API authentication key", elem_classes="api-background")
            Language = gr.Dropdown(
                choices = ["繁體中文","English","Japanese","Korean","German","French","Spanish","Arabic","Italian","Portuguese","Thai","Vietnamese","Malay","Indonesian","Hindi","Bengali","Russian"],
                value="繁體中文",
                label="媒體檔之音訊語言",
                interactive=True,
                elem_classes="api-background"
            )
    output_audio = gr.Audio(label="轉換後的 MP3", type="filepath", elem_classes="script-background")
    with gr.Row():
        srt_file_output = gr.File(label="下載 SRT 字幕檔", elem_classes="srt-background")
        output_text = gr.Textbox(label="音訊檔案大小", elem_classes="script-background")
        clear_button = gr.Button("清除", elem_classes="clear-button")
    gr.HTML(
        """

        <span style="font-size: 20px; color: black; font-weight:bold;">歡迎將轉譯結果製作為</span><a href="https://podcast.fiai.us.kg/" style="font-size: 20px; color: red; font-weight:bold;">財資歐北共 Podcast</a><span style="font-size: 20px; color: black;"> ,</span><span style="font-size: 20px; color: black;">重點摘要及RAG知識問答建議使用 </span><a href="https://notebooklm.google.com" style="font-size: 20px; color: red;">Google NotebookLM</a><span style="font-size: 20px; color: black;"> 更佳。</span>

        """
    )
    transcription_text = gr.Markdown(label="語音轉譯結果", elem_classes="text-background")
    file_input.upload(
        fn=validate_and_convert,
        inputs=[file_input, Language, api_key_input],
        outputs=[file_input, output_audio, output_text, transcription_text, srt_file_output]
    )
    clear_button.click(
        fn=clear_inputs,
        inputs=[],
        outputs=[file_input, output_audio, output_text, transcription_text, srt_file_output]
    )

if __name__ == "__main__":
    check_ffmpeg()
    if "SPACE_ID" in os.environ:
        demo.queue().launch()
    else:
        demo.queue().launch(share=True, show_api=False)