Spaces:
Running
Running
File size: 15,540 Bytes
06047fb c6c7e29 06047fb ab8c665 06047fb 073e826 06047fb c6c7e29 06047fb c6c7e29 06047fb ab8c665 06047fb b0ec3d3 06047fb 073e826 06047fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 |
# -*- coding: utf-8 -*-
# 財政部財政資訊中心 江信宗
import gradio as gr
import tempfile
import subprocess
from groq import Groq
from zhconv_rs import zhconv
from datetime import timedelta
import os
from pathlib import Path
def check_ffmpeg():
try:
subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True, text=True)
print("FFmpeg is installed and working.")
except subprocess.CalledProcessError:
print("Error: FFmpeg is not installed or not working properly.")
except FileNotFoundError:
print("Error: FFmpeg is not installed or not in the system PATH.")
def format_time(seconds):
td = timedelta(seconds=seconds)
hours, remainder = divmod(td.seconds, 3600)
minutes, seconds = divmod(remainder, 60)
milliseconds = round(td.microseconds / 1000)
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
def json_to_srt(segments):
srt_lines = []
for i, item in enumerate(segments, 1):
start_time = format_time(item['start'])
end_time = format_time(item['end'])
text = zhconv(item['text'], "zh-tw")
srt_lines.append(f"{i}\n{start_time} --> {end_time}\n{text}\n")
return "\n".join(srt_lines)
def validate_and_convert(file, Language, api_key):
try:
if not api_key:
os.remove(file.name)
gr.Warning("請輸入正確的 API Key!!")
return None, None, "請輸入正確的 API Key!!", None, None
if api_key == os.getenv("SPACE_ID"):
api_key = os.getenv("YOUR_API_KEY")
else:
os.remove(file.name)
gr.Warning("請輸入正確的 API Key!!")
return None, None, "請輸入正確的 API Key!!", None, None
except Exception as e:
os.remove(file.name)
gr.Warning("請輸入正確的 API Key!!")
return None, None, f"請輸入正確的 API Key!!", None, None
gr.Info("檔案上傳完成,開始轉換......")
MAX_FILE_SIZE = 200 * 1024 * 1024 # 200MB
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'm4a', 'wav', 'ogg', 'flac', 'webm', 'mpga']
if file is None:
return None, None, "沒有選擇文件", None, None
try:
file_path = Path(file.name)
if not file_path.exists():
return None, None, f"找不到上傳的檔案:{file.name}", None, None
file_extension = file_path.suffix[1:].lower()
if file_extension not in ALLOWED_EXTENSIONS:
return None, None, f"不支援的檔案類型!請上傳以下格式之一的檔案:{', '.join(ALLOWED_EXTENSIONS)}", None, None
file_size = file_path.stat().st_size
if file_size > MAX_FILE_SIZE:
return None, None, "檔案已超過200MB限制,請上傳較小的檔案。", None, None
show_info = file_size > 50 * 1024 * 1024
return convert_to_mp3(file, Language, api_key, show_info)
except Exception as e:
return None, None, f"檔案處理錯誤:{str(e)}", None, None
def convert_to_mp3(file, Language, api_key, show_info):
temp_dir = tempfile.gettempdir()
input_path = file.name
output_path = os.path.join(temp_dir, "output.mp3")
command = f"ffmpeg -i \"{input_path}\" -acodec libmp3lame -b:a 48k -y \"{output_path}\""
try:
if show_info:
gr.Info("開始轉換為音檔,請稍候......")
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
if os.path.exists(output_path):
file_size = os.path.getsize(output_path)
if file_size > 25 * 1024 * 1024:
return None, None, "轉譯限制:MP3檔案大小不得超過25MB!", None, None
if file_size > 5 * 1024 * 1024:
show_info = True
size_info = f"MP3 檔案大小: {file_size / 1024:.2f} KB"
transcription, transcription_text = transcribe_audio(output_path, Language, api_key, show_info)
if isinstance(transcription, list):
if show_info:
gr.Info("開始產製SRT字幕檔......")
srt_content = json_to_srt(transcription)
srt_file = create_srt_file(srt_content)
return file, output_path, size_info, transcription_text, srt_file
else:
return file, output_path, size_info, transcription, None
else:
return None, None, "轉換失敗:輸出文件不存在", None, None
except subprocess.CalledProcessError as e:
error_message = f"轉換失敗:{str(e)}"
return None, None, error_message, None, None
def summarize_article(trans_text, api_key):
try:
if not api_key:
api_key = os.getenv("YOUR_API_KEY")
client = Groq(api_key=api_key)
response = client.chat.completions.create(
model="llama-3.1-70b-versatile",
messages=[
{"role": "system", "content": """
你是一個精通繁體中文和臺灣用語的中文編輯。
使用者會提供給你一段逐字稿,請協助我檢查錯字、修正標點符號的使用,盡可能保持原意不變、保留內容細節(例如故事、提到的數字和案例)的情況下,提供優化中文排版後所有的逐字稿內容。
## 限制
提供時請分成數個段落,並替這段落下個合適的標題,並依照以下限制輸出。而且無論如何,都要提供所有的逐字稿內容,請不要擅自刪減或總結成段落!
### 中文錯字修正範例
1. 「罰還」改成「罰緩」
2. 「巧巧」改成「悄悄」
3. 「辯試」改成「辨識」
4. 「規護」改成「歸戶」
5. 「披頭」改成「劈頭」
6. 「查器」改成「查緝」
### 中文排版&標點符號的使用原則
1. 一律使用全形符號,例如用「」做引號,而不是 “”;用 "," 做為逗號,而不是 ","。
2. 省略號是……(兩個英文省略號),不是。。。,也不是......(六個點)
3. 中文與英文或數字之間需要增加半形空格。正確用法:「Apple 課程人數已經超過 2000 人了。」;錯誤用法:「Apple課程人數已經超過2000人了。」
4. 遇到完整的英文整句、特殊名詞,其內容使用半形標點。正確用法:「賈伯斯說過:"Stay hungry, stay foolish."」;錯誤用法:「賈伯斯說過:"Stay hungry,stay foolish。”」
5. 一律在中英文之間增加空格
6. 在中文與數字之間增加空格
7. 在數字與單位之間增加空格
8. 全形標點與其他字符之間不加空格。
### 其他限制
- 請注意,直接輸出結果給我,不需要有開頭招呼。
- 無論如何,都要提供所有的逐字稿內容,請不要擅自刪減或總結成段落!
## 輸出格式
**{總結後的段落重點}**
{文字段落內容}
**{總結後的段落重點}**
{文字段落內容}
"""},
{"role": "user", "content": trans_text}
],
temperature=0.2
)
return response.choices[0].message.content
except Exception as e:
return f"總結失敗:{str(e)}"
def transcribe_audio(filename, Language, api_key, show_info):
try:
if not api_key:
api_key = os.getenv("YOUR_API_KEY")
if show_info:
gr.Info("開始轉譯,請稍候......")
client = Groq(api_key=api_key)
language_dict = {"繁體中文": "zh", "English": "en", "German": "de", "Spanish": "es", "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi", "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la", "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn", "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu", "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw", "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am", "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo", "Haitian creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn", "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Myanmar": "my", "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as", "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha", "Bashkir": "ba", "Javanese": "jw", "Sundanese": "su"}
selected_language = language_dict.get(Language)
selected_model = "distil-whisper-large-v3-en" if Language == "English" else "whisper-large-v3"
with open(filename, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(filename, file.read()),
model=selected_model,
response_format="verbose_json",
language=selected_language,
temperature=0.0
)
if Language == "English":
summary = transcription.text
else:
full_text = zhconv(transcription.text, "zh-tw")
if show_info:
chunks = [full_text[i:i+1000] for i in range(0, len(full_text), 1000)]
summaries = []
for i, chunk in enumerate(chunks):
gr.Info(f"正在處理第 {i+1}/{len(chunks)} 部分...")
chunk_summary = summarize_article(chunk, api_key)
summaries.append(chunk_summary.strip())
summary = "\n\n".join(summaries)
else:
summary = summarize_article(full_text, api_key)
return transcription.segments, summary.strip()
except Exception as e:
return f"轉譯失敗:{str(e)}"
def create_srt_file(srt_content):
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".srt", encoding="utf-8") as temp_file:
temp_file.write(srt_content)
return temp_file.name
def clear_inputs():
return None, None, None, None, None
custom_css = """
.center-aligned {
text-align: center !important;
color: #ff4081;
text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
margin-bottom: 0 !important;
}
.gr-input, .gr-box, .gr-dropdown {
border-radius: 10px !important;
border: 2px solid #ff4081 !important;
margin: 0 !important;
}
.gr-input:focus, .gr-box:focus, .gr-dropdown:focus {
border-color: #f50057 !important;
box-shadow: 0 0 0 2px rgba(245,0,87,0.2) !important;
}
.file-background {
background-color: #B7E0FF !important;
padding: 15px !important;
border-radius: 10px !important;
margin: 0 !important;
height: auto;
}
.api-background {
background-color: #FFCFB3 !important;
padding: 15px !important;
border-radius: 10px !important;
margin: 0 !important;
}
.script-background {
background-color: #FEF9D9 !important;
padding: 15px !important;
border-radius: 10px !important;
margin: 0 !important;
}
.script-background textarea {
font-size: 18px !important;
background-color: #ffffff;
border: 1px solid #f0f8ff;
border-radius: 8px;
}
.srt-background {
background-color: #FFF4B5 !important;
padding: 5px !important;
border-radius: 10px !important;
margin: 0 !important;
}
.text-background {
padding: 5px !important;
border-radius: 10px !important;
border: 2px solid #B7E0FF !important;
margin: 0 !important;
}
.clear-button {
border-radius: 10px !important;
background-color: #333333 !important;
color: white !important;
font-weight: bold !important;
transition: all 0.3s ease !important;
}
.clear-button:hover {
background-color: #000000 !important;
transform: scale(1.05);
}
"""
with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as demo:
gr.Markdown("""
# 📝 凊彩歐北寫 - 財政部財政資訊中心 🎵
> ### **※ 玩轉聲音魅力,開拓更多可能性,自動生成 Note-taking Record,系統布署:江信宗,LLM:Whisper large-v3。**<br>依據 <a href="https://www.youtube.com/static?template=terms&hl=zh-Hant" style="color: black;">YouTube 的服務條款(ToS)</a>,請自行明確取得 YouTube 著作權人授權後再上傳影片進行轉譯!
""", elem_classes="center-aligned")
with gr.Row():
file_input = gr.File(
label="上傳影片或音訊檔",
file_count="single",
elem_classes="file-background"
)
with gr.Column():
api_key_input = gr.Textbox(label="輸入您的 API Key", type="password", placeholder="API authentication key", elem_classes="api-background")
Language = gr.Dropdown(
choices = ["繁體中文","English","Japanese","Korean","German","French","Spanish","Arabic","Italian","Portuguese","Thai","Vietnamese","Malay","Indonesian","Hindi","Bengali","Russian"],
value="繁體中文",
label="媒體檔之音訊語言",
interactive=True,
elem_classes="api-background"
)
output_audio = gr.Audio(label="轉換後的 MP3", type="filepath", elem_classes="script-background")
with gr.Row():
srt_file_output = gr.File(label="下載 SRT 字幕檔", elem_classes="srt-background")
output_text = gr.Textbox(label="音訊檔案大小", elem_classes="script-background")
clear_button = gr.Button("清除", elem_classes="clear-button")
gr.HTML(
"""
<span style="font-size: 20px; color: black; font-weight:bold;">歡迎將轉譯結果製作為</span><a href="https://podcast.fiai.us.kg/" style="font-size: 20px; color: red; font-weight:bold;">財資歐北共 Podcast</a><span style="font-size: 20px; color: black;"> ,</span><span style="font-size: 20px; color: black;">重點摘要及RAG知識問答建議使用 </span><a href="https://notebooklm.google.com" style="font-size: 20px; color: red;">Google NotebookLM</a><span style="font-size: 20px; color: black;"> 更佳。</span>
"""
)
transcription_text = gr.Markdown(label="語音轉譯結果", elem_classes="text-background")
file_input.upload(
fn=validate_and_convert,
inputs=[file_input, Language, api_key_input],
outputs=[file_input, output_audio, output_text, transcription_text, srt_file_output]
)
clear_button.click(
fn=clear_inputs,
inputs=[],
outputs=[file_input, output_audio, output_text, transcription_text, srt_file_output]
)
if __name__ == "__main__":
check_ffmpeg()
if "SPACE_ID" in os.environ:
demo.queue().launch()
else:
demo.queue().launch(share=True, show_api=False) |