Spaces:
Running
Running
File size: 13,003 Bytes
ab4dc27 ac8a8b5 80b6b66 ab4dc27 80b6b66 4791a24 ac8a8b5 80b6b66 dcae969 80b6b66 8abe8f3 80b6b66 ab4dc27 5f665d3 ab4dc27 80b6b66 ab4dc27 80b6b66 ab4dc27 dcae969 4791a24 80b6b66 ac8a8b5 80b6b66 ab4dc27 80b6b66 ab4dc27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 |
# -*- coding: utf-8 -*-
# 財政部財政資訊中心 江信宗
import gradio as gr
import os
from openai import OpenAI
import resend
import time
import html
import tempfile
import re
custom_css = """
.center-aligned {
text-align: center !important;
color: #ff4081;
text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
margin-bottom: 0px !important;
}
.input-background {
background-color: #B7E0FF !important;
padding: 15px !important;
border-radius: 10px !important;
margin: 0 !important;
height: auto;
}
.input-background textarea {
font-size: 18px !important;
background-color: #ffffff;
border: 1px solid #f0f8ff;
border-radius: 8px !important;
}
.script-background {
background-color: #FEF9D9 !important;
padding: 15px !important;
border-radius: 10px !important;
margin: 0 !important;
}
.api-background {
background-color: #FFCFB3 !important;
padding: 15px !important;
border-radius: 10px !important;
}
.text-background {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica', 'Arial', sans-serif;
font-size: 18px !important;
line-height: 1.6 !important;
padding: 30px !important;
border-radius: 20px !important;
background-color: #FFFED3 !important;
margin: 0 !important;
transition: all 0.3s ease;
position: relative;
z-index: 1;
overflow: hidden;
}
.translation-header {
font-size: 24px;
font-weight: 600;
color: #1d1d1f;
margin-bottom: 20px;
text-align: center;
}
.translation-content {
color: #000000;
font-size: 20px;
text-align: justify;
hyphens: auto;
word-wrap: break-word;
overflow-wrap: break-word;
}
.translation-content p {
margin-bottom: 15px;
}
@media (max-width: 768px) {
.text-background {
font-size: 16px !important;
padding: 0px !important;
}
.translation-header {
font-size: 20px;
}
}
.submit-btn {
border-radius: 10px !important;
border: none !important;
background-color: #ff4081 !important;
color: white !important;
font-weight: bold !important;
transition: all 0.3s ease !important;
margin: 0 !important;
}
.submit-btn:hover {
background-color: #f50057 !important;
transform: scale(1.05);
}
.clear-button {
border-radius: 10px !important;
border: none !important;
background-color: #333333 !important;
color: white !important;
font-weight: bold !important;
transition: all 0.3s ease !important;
}
.clear-button:hover {
background-color: #000000 !important;
transform: scale(1.05);
}
"""
def split_text(text, min_words=400, max_words=800):
chunks = []
current_chunk = ""
current_words = 0
paragraphs = text.split('\n')
for paragraph in paragraphs:
words = paragraph.split()
if current_words + len(words) <= max_words:
current_chunk += paragraph + "\n"
current_words += len(words)
else:
if current_words > min_words:
chunks.append(current_chunk.strip())
current_chunk = paragraph + '\n'
current_words = len(words)
else:
sentences = re.split(r'(?<=[.!?])\s+', paragraph)
for sentence in sentences:
sentence_words = sentence.split()
if current_words + len(sentence_words) <= max_words:
current_chunk += sentence + ' '
current_words += len(sentence_words)
else:
if current_words >= min_words:
chunks.append(current_chunk.strip())
current_chunk = sentence + ' '
current_words = len(sentence_words)
else:
current_chunk += sentence + ' '
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
def translate(content_text, language, api_key):
start_time = time.time()
language_dict = {"繁體中文": "English", "English": "繁體中文"}
corr_language = language_dict[language]
text_chunks = split_text(content_text)
final_translations = []
gpt_url="https://api.openai.com/v1"
gr.Info(f"文章越長,翻譯時間越久,請耐心等候。")
for chunk in text_chunks:
print(chunk)
if not api_key:
resend.api_key = os.environ["YOUR_API_TOKEN"]
params: resend.Emails.SendParams = {
"from": "Trans_API <onboarding@resend.dev>",
"to": ["antivir7@gmail.com"],
"subject": "精緻翻譯",
"html": f"""
<strong>翻譯文章</strong><br>
文章:{chunk}
""",
}
try:
email_response = resend.Emails.send(params)
print(f"Email sent successfully. Response:{email_response}")
api_key = os.getenv("YOUR_API_KEY")
gpt_url="https://free.gpt.ge/v1"
except Exception as e:
gr.Warning(f"請輸入正確的API Key!!")
return "請輸入正確的API Key!!"
client = OpenAI(
api_key=api_key,
base_url=gpt_url,
)
system_prompt = f"""你是一位精通繁體中文與英文的專業翻譯,具有40年翻譯經驗且擁有豐富的跨學術專業知識,深度參與《The New York Times》及《Bloomberg》的中文版翻譯工作,對於時事新聞和論文的翻譯有深入的理解。我希望你能幫我將以下{corr_language}全文內容翻譯成{language},風格與上述雜誌的中文版本相似。
Remember: 翻譯規則:
# 翻譯時要準確傳達{corr_language}原文內容的事實和背景。
# 保留特定的英文術語、數字或名字,並在其前後加上空格,例如:"中 UN 文","不超過 10 秒"。
# 依據步驟來翻譯原文,並且列印每一次的輸出結果:
## 根據{corr_language}全文內容直譯,旨在忠實呈現原文,不要遺漏任何訊息,並保持原文的專業性和精準性。
## 根據直譯的結果重新意譯(意譯稿),遵守{corr_language}原意的前提下讓內容更通俗易懂,提高文字的文學美感,符合《The Wall Street Journal》與《The Economist》中網的中文表達習慣
## 根據重新意譯的結果反向翻譯成{corr_language}(回譯稿)。
## 校對回譯稿及{corr_language}原稿中的區別,重點檢查回譯稿與{corr_language}原稿有表達歧義的部分,並確保您的回應客觀且避免使用刻板印象。
## 根據上一步校對意見,修改意譯稿產生翻譯終稿。
# 每輪翻譯後,都要重新比對{corr_language}原文,找到扭曲原意或遺漏的內容,然後再補充到下一輪的翻譯當中。
# 針對翻譯為繁體中文的翻譯終稿,請依照臺灣用詞對照表:["人工智能":"人工智慧","計算機":"電腦","訪問":"存取","設置":"設定","數據":"資料","社交媒體":"社群媒體","私人帳戶":"個人帳號","帳戶":"帳號","博客":"部落格","谷歌":"Google","用戶":"使用者","信息":"訊息","視頻":"影片","軟件":"軟體","硬盤":"硬碟","攝影機":"攝像頭","渠道":"管道","多維":"多元","宇航員":"太空人","短信":"簡訊","查體":"體檢","台球":"撞球","塔樓":"大廈","包間":"包廂","出租車":"計程車","公安局":"警察局","充值卡":"儲值卡","塑料":"塑膠","城鐵":"捷運","鼠標":"滑鼠","網絡":"網路","互聯網":"網際網路","U盤":"隨身碟","燃氣灶":"瓦斯爐","晶體管":"半導體","屏幕":"螢幕","電飯煲":"電鍋","洗面奶":"洗面乳","移動電話":"行動電話","菠蘿":"鳳梨","頭腦風暴":"腦力激盪","幼崽":"幼兒"]進行修正文字。
你理解翻譯規則後,user將會給你發送完整{corr_language}內容,收到後請按照上面的翻譯規則和下面的格式輸出翻譯結果及摘要,回傳格式如下,"{{{{xxx}}}}"表示預留位置:
### 第一階段:直譯
{{{{直譯結果}}}}
### 第二階段:意譯初稿
{{{{意譯初稿}}}}
### 第三階段:回譯稿
{{{{回譯稿}}}}
### 第四階段:校對意見
以下是在{language}翻譯中缺失的部分:
{{{{重複以下列表,直到列出所有缺失的內容}}}}
- 對比原文缺失或表達歧義部分{{1...n}}:
- 原文:"{corr_language}"
- 譯文:"{language}"
- 建議:{{{{新增翻譯 or 修改翻譯}}}}
### 第五階段:翻譯終稿
{{{{翻譯終稿}}}}
"""
try:
gr.Info(f"正在翻譯第 {len(final_translations) + 1}/{len(text_chunks)} 段...")
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": chunk}
],
temperature=0.7
)
result = response.choices[0].message.content.strip()
chunk_translation = result.split("### 第五階段:翻譯終稿")[-1].strip()
final_translations.append(chunk_translation)
except Exception as e:
return f"Error in chunk {len(final_translations) + 1}:{e}", None
final_translation = "\n\n".join(final_translations)
print(final_translation)
try:
with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', suffix='.txt', delete=False) as temp_file:
temp_file.write(f"【原稿({language_dict[language]})】\n\n{content_text}\n\n")
temp_file.write(f"\n\n【翻譯稿({language})】\n\n{final_translation}")
temp_file_path = temp_file.name
except Exception as e:
gr.Info(f"翻譯完成,執行時間: {(time.time() - start_time):.2f} 秒。")
return final_translation, None
gr.Info(f"翻譯完成並提供翻譯結果下載,執行時間: {(time.time() - start_time):.2f} 秒。")
return final_translation, temp_file_path
with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
gr.Markdown("""
# 文章解碼重構 - 財政部財政資訊中心
> ### **※ 學習 Chain-of-Thought 思維,逐步探索字詞的深意,細心揣摩原文的情感,重構出忠實且動人心弦的作品。系統部署:江信宗,LLM:GPT-4o-mini。**
""", elem_classes="center-aligned")
content = gr.Textbox(
label="輸入您的文章",
placeholder="Enter your text here",
interactive=True,
autofocus=True,
max_lines=10,
elem_classes="input-background"
)
with gr.Row():
Language = gr.Dropdown(
choices = ["繁體中文","English"],
value="繁體中文",
label="翻譯成...語言",
interactive=True,
elem_classes="script-background"
)
file_output = gr.File(label="下載翻譯結果", elem_classes="script-background", visible=False)
api_key_input = gr.Textbox(label="輸入您的 API Key", type="password", placeholder="API authentication key", scale=1, elem_classes="api-background")
with gr.Row():
submit_btn = gr.Button("傳送", variant="primary", scale=2, elem_classes="submit-btn")
clear_button = gr.Button("清除", variant="secondary", scale=1, elem_classes="clear-button")
translate_result = gr.HTML(elem_classes="text-background", visible=False)
def on_submit(content_text, language, api_key):
result, file_path = translate(content_text, language, api_key)
formatted_result = (
'<div class="translation-header">※ 解碼重構結果 ※</div>'
'<div class="translation-content">'
'{}'
'</div>'
).format(html.escape(result).replace('\n', '</p><p>'))
return gr.update(
value=formatted_result,
visible=True
), gr.update(value=file_path, visible=True)
submit_btn.click(
fn=on_submit,
inputs=[content, Language, api_key_input],
outputs=[translate_result, file_output]
)
def clear_inputs():
return "", "繁體中文", "", gr.update(value="", visible=False), gr.update(value=None, visible=False)
clear_button.click(
fn=clear_inputs,
inputs=[],
outputs=[content, Language, api_key_input, translate_result, file_output]
)
if __name__ == "__main__":
if "SPACE_ID" in os.environ:
iface.launch()
else:
iface.launch(share=True, show_api=False)
|