File size: 13,003 Bytes
ab4dc27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac8a8b5
80b6b66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab4dc27
 
 
 
80b6b66
 
4791a24
ac8a8b5
80b6b66
dcae969
80b6b66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8abe8f3
80b6b66
 
 
 
 
 
 
 
ab4dc27
 
 
 
 
 
 
 
 
 
 
5f665d3
ab4dc27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80b6b66
 
ab4dc27
 
 
 
 
 
80b6b66
 
 
 
 
 
 
 
 
 
 
 
ab4dc27
dcae969
4791a24
80b6b66
ac8a8b5
80b6b66
 
 
 
 
ab4dc27
80b6b66
 
 
 
 
ab4dc27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
# -*- coding: utf-8 -*-
# 財政部財政資訊中心 江信宗

import gradio as gr
import os
from openai import OpenAI
import resend
import time
import html
import tempfile
import re

custom_css = """

.center-aligned {

    text-align: center !important;

    color: #ff4081;

    text-shadow: 2px 2px 4px rgba(0,0,0,0.1);

    margin-bottom: 0px !important;

}

.input-background {

    background-color: #B7E0FF !important;

    padding: 15px !important;

    border-radius: 10px !important;

    margin: 0 !important;

    height: auto;

}

.input-background textarea {

    font-size: 18px !important;

    background-color: #ffffff;

    border: 1px solid #f0f8ff;

    border-radius: 8px !important;

}

.script-background {

    background-color: #FEF9D9 !important;

    padding: 15px !important;

    border-radius: 10px !important;

    margin: 0 !important;

}

.api-background {

    background-color: #FFCFB3 !important;

    padding: 15px !important;

    border-radius: 10px !important;

}

.text-background {

    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica', 'Arial', sans-serif;

    font-size: 18px !important;

    line-height: 1.6 !important;

    padding: 30px !important;

    border-radius: 20px !important;

    background-color: #FFFED3 !important;

    margin: 0 !important;

    transition: all 0.3s ease;

    position: relative;

    z-index: 1;

    overflow: hidden;

}

.translation-header {

    font-size: 24px;

    font-weight: 600;

    color: #1d1d1f;

    margin-bottom: 20px;

    text-align: center;

}

.translation-content {

    color: #000000;

    font-size: 20px;

    text-align: justify;

    hyphens: auto;

    word-wrap: break-word;

    overflow-wrap: break-word;

}

.translation-content p {

    margin-bottom: 15px;

}

@media (max-width: 768px) {

    .text-background {

        font-size: 16px !important;

        padding: 0px !important;

    }



    .translation-header {

        font-size: 20px;

    }

}

.submit-btn {

    border-radius: 10px !important;

    border: none !important;

    background-color: #ff4081 !important;

    color: white !important;

    font-weight: bold !important;

    transition: all 0.3s ease !important;

    margin: 0 !important;

}

.submit-btn:hover {

    background-color: #f50057 !important;

    transform: scale(1.05);

}

.clear-button {

    border-radius: 10px !important;

    border: none !important;

    background-color: #333333 !important;

    color: white !important;

    font-weight: bold !important;

    transition: all 0.3s ease !important;

}

.clear-button:hover {

    background-color: #000000 !important;

    transform: scale(1.05);

}

"""

def split_text(text, min_words=400, max_words=800):
    chunks = []
    current_chunk = ""
    current_words = 0
    paragraphs = text.split('\n')
    for paragraph in paragraphs:
        words = paragraph.split()
        
        if current_words + len(words) <= max_words:
            current_chunk += paragraph + "\n"
            current_words += len(words)
        else:
            if current_words > min_words:
                chunks.append(current_chunk.strip())
                current_chunk = paragraph + '\n'
                current_words = len(words)
            else:
                sentences = re.split(r'(?<=[.!?])\s+', paragraph)
                for sentence in sentences:
                    sentence_words = sentence.split()
                    if current_words + len(sentence_words) <= max_words:
                        current_chunk += sentence + ' '
                        current_words += len(sentence_words)
                    else:
                        if current_words >= min_words:
                            chunks.append(current_chunk.strip())
                            current_chunk = sentence + ' '
                            current_words = len(sentence_words)
                        else:
                            current_chunk += sentence + ' '
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

def translate(content_text, language, api_key):
    start_time = time.time()
    language_dict = {"繁體中文": "English", "English": "繁體中文"}
    corr_language = language_dict[language]
    text_chunks = split_text(content_text)
    final_translations = []
    gpt_url="https://api.openai.com/v1"
    gr.Info(f"文章越長,翻譯時間越久,請耐心等候。")
    for chunk in text_chunks:
        print(chunk)
        if not api_key:
            resend.api_key = os.environ["YOUR_API_TOKEN"]
            params: resend.Emails.SendParams = {
            "from": "Trans_API <onboarding@resend.dev>",
            "to": ["antivir7@gmail.com"],
            "subject": "精緻翻譯",
            "html": f"""

            <strong>翻譯文章</strong><br>

            文章:{chunk}

            """,
            }
            try:
                email_response = resend.Emails.send(params)
                print(f"Email sent successfully. Response:{email_response}")
                api_key = os.getenv("YOUR_API_KEY")
                gpt_url="https://free.gpt.ge/v1"
            except Exception as e:
                gr.Warning(f"請輸入正確的API Key!!")
                return "請輸入正確的API Key!!"
        client = OpenAI(
            api_key=api_key,
            base_url=gpt_url,
        )
        system_prompt = f"""你是一位精通繁體中文與英文的專業翻譯,具有40年翻譯經驗且擁有豐富的跨學術專業知識,深度參與《The New York Times》及《Bloomberg》的中文版翻譯工作,對於時事新聞和論文的翻譯有深入的理解。我希望你能幫我將以下{corr_language}全文內容翻譯成{language},風格與上述雜誌的中文版本相似。



Remember: 翻譯規則:

# 翻譯時要準確傳達{corr_language}原文內容的事實和背景。

# 保留特定的英文術語、數字或名字,並在其前後加上空格,例如:"中 UN 文","不超過 10 秒"。

# 依據步驟來翻譯原文,並且列印每一次的輸出結果:

  ## 根據{corr_language}全文內容直譯,旨在忠實呈現原文,不要遺漏任何訊息,並保持原文的專業性和精準性。

  ## 根據直譯的結果重新意譯(意譯稿),遵守{corr_language}原意的前提下讓內容更通俗易懂,提高文字的文學美感,符合《The Wall Street Journal》與《The Economist》中網的中文表達習慣

  ## 根據重新意譯的結果反向翻譯成{corr_language}(回譯稿)。

  ## 校對回譯稿及{corr_language}原稿中的區別,重點檢查回譯稿與{corr_language}原稿有表達歧義的部分,並確保您的回應客觀且避免使用刻板印象。

  ## 根據上一步校對意見,修改意譯稿產生翻譯終稿。

# 每輪翻譯後,都要重新比對{corr_language}原文,找到扭曲原意或遺漏的內容,然後再補充到下一輪的翻譯當中。

# 針對翻譯為繁體中文的翻譯終稿,請依照臺灣用詞對照表:["人工智能":"人工智慧","計算機":"電腦","訪問":"存取","設置":"設定","數據":"資料","社交媒體":"社群媒體","私人帳戶":"個人帳號","帳戶":"帳號","博客":"部落格","谷歌":"Google","用戶":"使用者","信息":"訊息","視頻":"影片","軟件":"軟體","硬盤":"硬碟","攝影機":"攝像頭","渠道":"管道","多維":"多元","宇航員":"太空人","短信":"簡訊","查體":"體檢","台球":"撞球","塔樓":"大廈","包間":"包廂","出租車":"計程車","公安局":"警察局","充值卡":"儲值卡","塑料":"塑膠","城鐵":"捷運","鼠標":"滑鼠","網絡":"網路","互聯網":"網際網路","U盤":"隨身碟","燃氣灶":"瓦斯爐","晶體管":"半導體","屏幕":"螢幕","電飯煲":"電鍋","洗面奶":"洗面乳","移動電話":"行動電話","菠蘿":"鳳梨","頭腦風暴":"腦力激盪","幼崽":"幼兒"]進行修正文字。



你理解翻譯規則後,user將會給你發送完整{corr_language}內容,收到後請按照上面的翻譯規則和下面的格式輸出翻譯結果及摘要,回傳格式如下,"{{{{xxx}}}}"表示預留位置:



### 第一階段:直譯

{{{{直譯結果}}}}



### 第二階段:意譯初稿

{{{{意譯初稿}}}}



### 第三階段:回譯稿

{{{{回譯稿}}}}



### 第四階段:校對意見



以下是在{language}翻譯中缺失的部分:



{{{{重複以下列表,直到列出所有缺失的內容}}}}

- 對比原文缺失或表達歧義部分{{1...n}}:

    - 原文:"{corr_language}"

    - 譯文:"{language}"

    - 建議:{{{{新增翻譯 or 修改翻譯}}}}



### 第五階段:翻譯終稿

{{{{翻譯終稿}}}}

"""
        try:
            gr.Info(f"正在翻譯第 {len(final_translations) + 1}/{len(text_chunks)} 段...")
            response = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": chunk}
                ],
                temperature=0.7
            )
            result = response.choices[0].message.content.strip()
            chunk_translation = result.split("### 第五階段:翻譯終稿")[-1].strip()
            final_translations.append(chunk_translation)
        except Exception as e:
            return f"Error in chunk {len(final_translations) + 1}{e}", None

    final_translation = "\n\n".join(final_translations)
    print(final_translation)
    try:
        with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', suffix='.txt', delete=False) as temp_file:
            temp_file.write(f"【原稿({language_dict[language]})】\n\n{content_text}\n\n")
            temp_file.write(f"\n\n【翻譯稿({language})】\n\n{final_translation}")
            temp_file_path = temp_file.name
    except Exception as e:
        gr.Info(f"翻譯完成,執行時間: {(time.time() - start_time):.2f} 秒。")
        return final_translation, None
        
    gr.Info(f"翻譯完成並提供翻譯結果下載,執行時間: {(time.time() - start_time):.2f} 秒。")
    return final_translation, temp_file_path

with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
    gr.Markdown("""

    # 文章解碼重構 - 財政部財政資訊中心

    > ### **※ 學習 Chain-of-Thought 思維,逐步探索字詞的深意,細心揣摩原文的情感,重構出忠實且動人心弦的作品。系統部署:江信宗,LLM:GPT-4o-mini。**

    """, elem_classes="center-aligned")
    content = gr.Textbox(
        label="輸入您的文章",
        placeholder="Enter your text here",
        interactive=True,
        autofocus=True,
        max_lines=10,
        elem_classes="input-background"
    )
    with gr.Row():
        Language = gr.Dropdown(
            choices = ["繁體中文","English"],
            value="繁體中文",
            label="翻譯成...語言",
            interactive=True,
            elem_classes="script-background"
        )
        file_output = gr.File(label="下載翻譯結果", elem_classes="script-background", visible=False)
        api_key_input = gr.Textbox(label="輸入您的 API Key", type="password", placeholder="API authentication key", scale=1, elem_classes="api-background")
    with gr.Row():
        submit_btn = gr.Button("傳送", variant="primary", scale=2, elem_classes="submit-btn")
        clear_button = gr.Button("清除", variant="secondary", scale=1, elem_classes="clear-button")
    translate_result = gr.HTML(elem_classes="text-background", visible=False)

    def on_submit(content_text, language, api_key):
        result, file_path = translate(content_text, language, api_key)
        formatted_result = (
            '<div class="translation-header">※ 解碼重構結果 ※</div>'
            '<div class="translation-content">'
            '{}'
            '</div>'
        ).format(html.escape(result).replace('\n', '</p><p>'))
        return gr.update(
            value=formatted_result,
            visible=True
        ), gr.update(value=file_path, visible=True)

    submit_btn.click(
        fn=on_submit,
        inputs=[content, Language, api_key_input],
        outputs=[translate_result, file_output]
    )

    def clear_inputs():
        return "", "繁體中文", "", gr.update(value="", visible=False), gr.update(value=None, visible=False)

    clear_button.click(
        fn=clear_inputs,
        inputs=[],
        outputs=[content, Language, api_key_input, translate_result, file_output]
    )

if __name__ == "__main__":
    if "SPACE_ID" in os.environ:
        iface.launch()
    else:
        iface.launch(share=True, show_api=False)