huangjunxin commited on
Commit
7d526e6
·
1 Parent(s): 74066db

Refactored; Updated call HKBU ChatGPT API

Browse files
app/main.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from app.routes.text_translator import text_translator
3
+ from app.routes.document_translator import document_translator
4
+
5
+ # Combine both interfaces into tabs
6
+ demo = gr.TabbedInterface(
7
+ title="FloweryTranslator",
8
+ interface_list=[text_translator, document_translator],
9
+ tab_names=["Text", "Document"]
10
+ )
11
+
12
+ if __name__ == "__main__":
13
+ demo.launch(show_api=False)
app/routes/document_translator.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from docx import Document
3
+ import fitz # PyMuPDF
4
+ from datetime import datetime
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ from app.routes.text_translator import translate_text
9
+
10
+ load_dotenv()
11
+ passcode_key = os.getenv("PASSCODE_KEY")
12
+
13
+
14
+ def process_uploaded_file(file):
15
+ content = ""
16
+ # Detect the file type based on its extension
17
+ file_extension = file.name.split('.')[-1].lower()
18
+
19
+ # Process each file according to its type
20
+ if file_extension == 'docx':
21
+ doc = Document(file)
22
+ full_text = [para.text for para in doc.paragraphs]
23
+ content = '\n'.join(full_text)
24
+ elif file_extension == 'pdf':
25
+ # Open the PDF file
26
+ with fitz.open(file) as pdf:
27
+ full_text = []
28
+ for page in pdf:
29
+ full_text.append(page.get_text("text"))
30
+ content = '\n'.join(full_text)
31
+ else:
32
+ raise ValueError(f"Unsupported file type: {file_extension}")
33
+
34
+ return content
35
+
36
+
37
+ def translate_document(source_language, target_language, original_doc, tone_of_voice, industry, model, passcode):
38
+ output_files = []
39
+
40
+ # Check if the passcode is correct
41
+ if passcode + "1" != passcode_key:
42
+ # Create a new .docx document
43
+ doc = Document()
44
+ doc.add_paragraph("The passcode is incorrect. Please try again.")
45
+
46
+ # Save the document
47
+ timestamp = datetime.now().strftime("%m%d%H%M")
48
+ output_filename = f"translated_text_{timestamp}.docx"
49
+ doc.save(output_filename)
50
+ output_files.append(output_filename)
51
+
52
+ # Return the list of output files
53
+ return output_files
54
+
55
+ # Generate translated text
56
+ # Process the uploaded files
57
+ original_doc_content = ""
58
+
59
+ if original_doc is not None:
60
+ original_doc_content = process_uploaded_file(original_doc)
61
+
62
+ # Translate the uploaded files
63
+ translated_text = translate_text(source_language, target_language, original_doc_content, tone_of_voice, industry,
64
+ model, passcode)
65
+
66
+ # Create a new .docx document
67
+ doc = Document()
68
+ doc.add_paragraph(f"{translated_text}")
69
+
70
+ # Save the document
71
+ timestamp = datetime.now().strftime("%m%d%H%M")
72
+ output_filename = f"translated_text_{timestamp}.docx"
73
+ doc.save(output_filename)
74
+ output_files.append(output_filename)
75
+
76
+ # Return the list of output files
77
+ return output_files
78
+
79
+
80
+ # Interface for Document Translator
81
+ document_translator = gr.Interface(
82
+ fn=translate_document,
83
+ inputs=[
84
+ gr.Dropdown(
85
+ label="Source Language",
86
+ choices=["Chinese", "English (UK)", "English (US)", "Vietnamese", "Japanese", "Korean", "French", "German",
87
+ "Spanish", "Portuguese (Brazilian)", "Portuguese (European)", "Italian", "Dutch", "Polish",
88
+ "Russian"],
89
+ value="Chinese"
90
+ ),
91
+ gr.Dropdown(
92
+ label="Target Language",
93
+ choices=["Chinese", "English (UK)", "English (US)", "Vietnamese", "Japanese", "Korean", "French", "German",
94
+ "Spanish", "Portuguese (Brazilian)", "Portuguese (European)", "Italian", "Dutch", "Polish",
95
+ "Russian"],
96
+ value="Vietnamese"
97
+ ),
98
+ gr.File(
99
+ label="Original Text Document",
100
+ file_types=[".docx", ".pdf"],
101
+ file_count="single"
102
+ ),
103
+ gr.Radio(
104
+ label="Tone of Voice",
105
+ choices=["Standard", "Formal", "Informal"],
106
+ value="Standard"
107
+ ),
108
+ gr.Dropdown(
109
+ label="Industry Sector",
110
+ choices=["General Fields", "Academic Papers", "Biomedicine", "Information Technology",
111
+ "Finance and Economics", "News and Information", "Aerospace", "Mechanical Manufacturing",
112
+ "Laws and Regulations", "Humanities and Social Sciences"],
113
+ value="General Fields"
114
+ ),
115
+ gr.Dropdown(
116
+ label="Model Provider (Model Name)",
117
+ choices=["DeepL", "Volcengine", "HKBU ChatGPT (gpt-35-turbo-16k)", "HKBU ChatGPT (gpt-4-turbo)",
118
+ "OpenAI (gpt-3.5-turbo-1106)", "OpenAI (gpt-4-0125-preview)", "Google Gemini (gemini-pro)",
119
+ "Baichuan AI (Baichuan2)", "Zhipu AI (glm-3-turbo)", "Zhipu AI (glm-4)"],
120
+ value="OpenAI (gpt-3.5-turbo-1106)"
121
+ ),
122
+ gr.Textbox(
123
+ label="Passcode",
124
+ placeholder="Enter the passcode here",
125
+ type="password",
126
+ lines=1,
127
+ max_lines=1
128
+ )
129
+ ],
130
+ outputs=[
131
+ gr.File(label="Translated Text Document", file_count="single", type="filepath")
132
+ ],
133
+ title="FloweryTranslator - Document Translator"
134
+ )
main.py → app/routes/text_translator.py RENAMED
@@ -1,17 +1,12 @@
1
- import gradio as gr
2
- from docx import Document
3
- import fitz # PyMuPDF
4
- from datetime import datetime
5
  import os
6
- import tempfile
7
- from zipfile import ZipFile
8
- from utils import translate_by_deepl_api
9
- from utils import translate_by_volcengine_api
10
- from utils import translate_by_openai_api
11
- from utils import translate_by_baichuan_api
12
- from utils import translate_by_hkbu_chatgpt_api
13
- from utils import translate_by_google_api
14
- from utils import translate_by_zhipuai_api
15
  from dotenv import load_dotenv
16
 
17
  load_dotenv()
@@ -42,9 +37,9 @@ def translate_text(source_language, target_language, original_text, tone_of_voic
42
  translation_sample, translated_text = translate_by_hkbu_chatgpt_api(
43
  source_language, target_language, original_text, tone_of_voice, industry, "gpt-35-turbo-16k"
44
  )
45
- elif model == "HKBU ChatGPT (gpt-4)":
46
  translation_sample, translated_text = translate_by_hkbu_chatgpt_api(
47
- source_language, target_language, original_text, tone_of_voice, industry, "gpt-4"
48
  )
49
  elif model == "OpenAI (gpt-3.5-turbo-1106)":
50
  translation_sample, translated_text = translate_by_openai_api(
@@ -74,72 +69,6 @@ def translate_text(source_language, target_language, original_text, tone_of_voic
74
  return translated_text
75
 
76
 
77
- def process_uploaded_file(file):
78
- content = ""
79
- # Detect the file type based on its extension
80
- file_extension = file.name.split('.')[-1].lower()
81
-
82
- # Process each file according to its type
83
- if file_extension == 'docx':
84
- doc = Document(file)
85
- full_text = [para.text for para in doc.paragraphs]
86
- content = '\n'.join(full_text)
87
- elif file_extension == 'pdf':
88
- # Open the PDF file
89
- with fitz.open(file) as pdf:
90
- full_text = []
91
- for page in pdf:
92
- full_text.append(page.get_text("text"))
93
- content = '\n'.join(full_text)
94
- else:
95
- raise ValueError(f"Unsupported file type: {file_extension}")
96
-
97
- return content
98
-
99
-
100
- def translate_document(source_language, target_language, original_doc, tone_of_voice, industry, model, passcode):
101
- output_files = []
102
-
103
- # Check if the passcode is correct
104
- if passcode + "1" != passcode_key:
105
- # Create a new .docx document
106
- doc = Document()
107
- doc.add_paragraph("The passcode is incorrect. Please try again.")
108
-
109
- # Save the document
110
- timestamp = datetime.now().strftime("%m%d%H%M")
111
- output_filename = f"translated_text_{timestamp}.docx"
112
- doc.save(output_filename)
113
- output_files.append(output_filename)
114
-
115
- # Return the list of output files
116
- return output_files
117
-
118
- # Generate translated text
119
- # Process the uploaded files
120
- original_doc_content = ""
121
-
122
- if original_doc is not None:
123
- original_doc_content = process_uploaded_file(original_doc)
124
-
125
- # Translate the uploaded files
126
- translated_text = translate_text(source_language, target_language, original_doc_content, tone_of_voice, industry,
127
- model, passcode)
128
-
129
- # Create a new .docx document
130
- doc = Document()
131
- doc.add_paragraph(f"{translated_text}")
132
-
133
- # Save the document
134
- timestamp = datetime.now().strftime("%m%d%H%M")
135
- output_filename = f"translated_text_{timestamp}.docx"
136
- doc.save(output_filename)
137
- output_files.append(output_filename)
138
-
139
- # Return the list of output files
140
- return output_files
141
-
142
-
143
  # Interface for Text Translator
144
  text_translator = gr.Interface(
145
  fn=translate_text,
@@ -178,7 +107,7 @@ text_translator = gr.Interface(
178
  ),
179
  gr.Dropdown(
180
  label="Model Provider (Model Name)",
181
- choices=["DeepL", "Volcengine", "HKBU ChatGPT (gpt-35-turbo-16k)", "HKBU ChatGPT (gpt-4)",
182
  "OpenAI (gpt-3.5-turbo-1106)", "OpenAI (gpt-4-0125-preview)", "Google Gemini (gemini-pro)",
183
  "Baichuan AI (Baichuan2)", "Zhipu AI (glm-3-turbo)", "Zhipu AI (glm-4)"],
184
  value="OpenAI (gpt-3.5-turbo-1106)"
@@ -196,69 +125,3 @@ text_translator = gr.Interface(
196
  ],
197
  title="FloweryTranslator - Text Translator"
198
  )
199
-
200
- # Interface for Document Translator
201
- document_translator = gr.Interface(
202
- fn=translate_document,
203
- inputs=[
204
- gr.Dropdown(
205
- label="Source Language",
206
- choices=["Chinese", "English (UK)", "English (US)", "Vietnamese", "Japanese", "Korean", "French", "German",
207
- "Spanish", "Portuguese (Brazilian)", "Portuguese (European)", "Italian", "Dutch", "Polish",
208
- "Russian"],
209
- value="Chinese"
210
- ),
211
- gr.Dropdown(
212
- label="Target Language",
213
- choices=["Chinese", "English (UK)", "English (US)", "Vietnamese", "Japanese", "Korean", "French", "German",
214
- "Spanish", "Portuguese (Brazilian)", "Portuguese (European)", "Italian", "Dutch", "Polish",
215
- "Russian"],
216
- value="Vietnamese"
217
- ),
218
- gr.File(
219
- label="Original Text Document",
220
- file_types=[".docx", ".pdf"],
221
- file_count="single"
222
- ),
223
- gr.Radio(
224
- label="Tone of Voice",
225
- choices=["Standard", "Formal", "Informal"],
226
- value="Standard"
227
- ),
228
- gr.Dropdown(
229
- label="Industry Sector",
230
- choices=["General Fields", "Academic Papers", "Biomedicine", "Information Technology",
231
- "Finance and Economics", "News and Information", "Aerospace", "Mechanical Manufacturing",
232
- "Laws and Regulations", "Humanities and Social Sciences"],
233
- value="General Fields"
234
- ),
235
- gr.Dropdown(
236
- label="Model Provider (Model Name)",
237
- choices=["DeepL", "Volcengine", "HKBU ChatGPT (gpt-35-turbo-16k)", "HKBU ChatGPT (gpt-4)",
238
- "OpenAI (gpt-3.5-turbo-1106)", "OpenAI (gpt-4-0125-preview)", "Google Gemini (gemini-pro)",
239
- "Baichuan AI (Baichuan2)", "Zhipu AI (glm-3-turbo)", "Zhipu AI (glm-4)"],
240
- value="OpenAI (gpt-3.5-turbo-1106)"
241
- ),
242
- gr.Textbox(
243
- label="Passcode",
244
- placeholder="Enter the passcode here",
245
- type="password",
246
- lines=1,
247
- max_lines=1
248
- )
249
- ],
250
- outputs=[
251
- gr.File(label="Translated Text Document", file_count="single", type="filepath")
252
- ],
253
- title="FloweryTranslator - Document Translator"
254
- )
255
-
256
- # Combine both interfaces into tabs
257
- demo = gr.TabbedInterface(
258
- title="FloweryTranslator",
259
- interface_list=[text_translator, document_translator],
260
- tab_names=["Text", "Document"]
261
- )
262
-
263
- if __name__ == "__main__":
264
- demo.launch(show_api=False)
 
 
 
 
 
1
  import os
2
+ import gradio as gr
3
+ from utils.translate.translate_deepl import translate_by_deepl_api
4
+ from utils.translate.translate_volcengine import translate_by_volcengine_api
5
+ from utils.translate.translate_hkbu_chatgpt import translate_by_hkbu_chatgpt_api
6
+ from utils.translate.translate_openai import translate_by_openai_api
7
+ from utils.translate.translate_google import translate_by_google_api
8
+ from utils.translate.translate_baichuan import translate_by_baichuan_api
9
+ from utils.translate.translate_zhipuai import translate_by_zhipuai_api
 
10
  from dotenv import load_dotenv
11
 
12
  load_dotenv()
 
37
  translation_sample, translated_text = translate_by_hkbu_chatgpt_api(
38
  source_language, target_language, original_text, tone_of_voice, industry, "gpt-35-turbo-16k"
39
  )
40
+ elif model == "HKBU ChatGPT (gpt-4-turbo)":
41
  translation_sample, translated_text = translate_by_hkbu_chatgpt_api(
42
+ source_language, target_language, original_text, tone_of_voice, industry, "gpt-4-turbo"
43
  )
44
  elif model == "OpenAI (gpt-3.5-turbo-1106)":
45
  translation_sample, translated_text = translate_by_openai_api(
 
69
  return translated_text
70
 
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  # Interface for Text Translator
73
  text_translator = gr.Interface(
74
  fn=translate_text,
 
107
  ),
108
  gr.Dropdown(
109
  label="Model Provider (Model Name)",
110
+ choices=["DeepL", "Volcengine", "HKBU ChatGPT (gpt-35-turbo-16k)", "HKBU ChatGPT (gpt-4-turbo)",
111
  "OpenAI (gpt-3.5-turbo-1106)", "OpenAI (gpt-4-0125-preview)", "Google Gemini (gemini-pro)",
112
  "Baichuan AI (Baichuan2)", "Zhipu AI (glm-3-turbo)", "Zhipu AI (glm-4)"],
113
  value="OpenAI (gpt-3.5-turbo-1106)"
 
125
  ],
126
  title="FloweryTranslator - Text Translator"
127
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/prompts/translation_prompt.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.translate.translate_deepl import translate_by_deepl_api
2
+ from utils.translate.translate_volcengine import translate_by_volcengine_api
3
+ from utils.utils.other_utils import get_language_code
4
+
5
+
6
+ # Translation prompt
7
+ def generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry):
8
+ languages_should_use_deepl = ["Chinese", "English (UK)", "English (US)", "French", "German", "Spanish",
9
+ "Portuguese (Brazilian)", "Portuguese (European)", "Italian", "Dutch", "Polish",
10
+ "Russian"]
11
+ # Print the source language, target language
12
+ print(f"Source language: {source_language}, Language code: {get_language_code(source_language)}, Should use DeepL: {source_language in languages_should_use_deepl}")
13
+ print(f"Target language: {target_language}, Language code: {get_language_code(target_language)}, Should use DeepL: {target_language in languages_should_use_deepl}")
14
+ # Generate the translation sample
15
+ if source_language in languages_should_use_deepl and target_language in languages_should_use_deepl:
16
+ print("Using DeepL API")
17
+ translation_sample = translate_by_deepl_api(source_language, target_language, original_text)
18
+ else:
19
+ print("Using Volcengine API")
20
+ translation_sample = translate_by_volcengine_api(source_language, target_language, original_text)
21
+
22
+ # Generate the translation prompt
23
+ translation_prompt = f"""{source_language}:
24
+ ```
25
+ {original_text}
26
+ ```
27
+
28
+ {target_language} translation sample:
29
+ ```
30
+ {translation_sample}
31
+ ```
32
+
33
+ As a bilingual {source_language}-{target_language} native speaker and seasoned translator, your task is to proofread the {target_language} translation sample for errors based on the {source_language} text above. The translated text should be in the tone of voice of {tone_of_voice.lower()}, and should be suitable for the {industry.lower()} industry. Before providing a proofread version, please provide suggestions for corrections (if any) to the above translation sample.
34
+
35
+ Your response should be formatted as follows:
36
+ ```
37
+ Rationale:
38
+ {target_language} translation (proofread):
39
+ ```"""
40
+
41
+ print(translation_prompt)
42
+
43
+ return translation_sample, translation_prompt
utils/translate/translate_baichuan.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_community.chat_models import ChatBaichuan
3
+ from langchain.schema import HumanMessage
4
+ from dotenv import load_dotenv
5
+
6
+ from utils.prompts.translation_prompt import generate_translation_prompt
7
+ from utils.utils.other_utils import extract_content_from_response
8
+
9
+ load_dotenv()
10
+ baichuan_api_key = os.environ.get("BAICHUAN_API_KEY")
11
+ baichuan_secret_key = os.environ.get("BAICHUAN_SECRET_KEY")
12
+
13
+
14
+ def translate_by_baichuan_api(source_language, target_language, original_text, tone_of_voice, industry):
15
+ # Prompt to provide translation
16
+ translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
17
+ # Translate by accessing Baichuan API
18
+ chat = ChatBaichuan(temperature=0.7, baichuan_api_key=baichuan_api_key, baichuan_secret_key=baichuan_secret_key, model='Baichuan2')
19
+ res = chat(
20
+ [
21
+ HumanMessage(content=translation_prompt)
22
+ ]
23
+ )
24
+ res_content = res.content
25
+ print(res_content)
26
+ rationale, translated_text = extract_content_from_response(target_language, res_content)
27
+
28
+ return translation_sample, translated_text
utils/translate/translate_deepl.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import deepl
3
+ from dotenv import load_dotenv
4
+
5
+ from utils.utils.other_utils import get_language_code
6
+
7
+ load_dotenv()
8
+ deepl_api_key = os.environ.get("DEEPL_API_KEY")
9
+
10
+
11
+ def translate_by_deepl_api(source_language, target_language, original_text):
12
+ target_language_code = get_language_code(target_language).upper()
13
+ deepl_client = deepl.Translator(deepl_api_key)
14
+ translated_text = deepl_client.translate_text(original_text, target_lang=target_language_code)
15
+
16
+ return translated_text.text
utils/translate/translate_google.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from dotenv import load_dotenv
4
+
5
+ from utils.prompts.translation_prompt import generate_translation_prompt
6
+ from utils.utils.other_utils import extract_content_from_response
7
+
8
+ load_dotenv()
9
+ google_api_key = os.environ.get("GOOGLE_API_KEY")
10
+
11
+
12
+ def translate_by_google_api(source_language, target_language, original_text, tone_of_voice, industry):
13
+ # Prompt to provide translation
14
+ translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
15
+ # Translate by accessing Google API
16
+ chat = ChatGoogleGenerativeAI(temperature=0.7, model="gemini-pro")
17
+ res = chat.invoke(translation_prompt)
18
+ res_content = res.content
19
+ print(res_content)
20
+ rationale, translated_text = extract_content_from_response(target_language, res_content)
21
+
22
+ return translation_sample, translated_text
utils/translate/translate_hkbu_chatgpt.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import requests
4
+
5
+ from utils.prompts.translation_prompt import generate_translation_prompt
6
+ from utils.utils.other_utils import extract_content_from_response
7
+
8
+ load_dotenv()
9
+ hkbu_chatgpt_api_key = os.environ.get("HKBU_CHATGPT_API_KEY")
10
+
11
+
12
+ def call_hkbu_chatgpt_api(conversation_list, model_name="gpt-35-turbo-16k", temperature=0.7):
13
+ basic_url = "https://chatgpt.hkbu.edu.hk/general/rest"
14
+ api_version = "2024-02-15-preview"
15
+ url = basic_url + "/deployments/" + model_name + "/chat/completions/?api-version=" + api_version
16
+ headers = {'Content-Type': 'application/json', 'api-key': hkbu_chatgpt_api_key}
17
+ payload = {'messages': conversation_list, 'temperature': temperature}
18
+
19
+ try:
20
+ response = requests.post(url, json=payload, headers=headers)
21
+
22
+ if response.status_code == 200:
23
+ data = response.json()
24
+ return data
25
+ else:
26
+ return 'Error', response.status_code, response.text
27
+ except requests.RequestException as e:
28
+ return 'Error:', e
29
+
30
+
31
+ def translate_by_hkbu_chatgpt_api(source_language, target_language, original_text, tone_of_voice, industry, model_name="gpt-35-turbo-16k"):
32
+ # Prompt to provide translation
33
+ translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
34
+ res = ""
35
+ try:
36
+ # Translate by accessing HKBU ChatGPT API
37
+ res = call_hkbu_chatgpt_api(
38
+ conversation_list=[
39
+ {"role": "user", "content": translation_prompt}
40
+ ],
41
+ model_name=model_name,
42
+ temperature=0.7
43
+ )
44
+ res_content = res["choices"][0]["message"]["content"]
45
+ except Exception as e:
46
+ res_content = 'Error:', e, res
47
+ return res_content
48
+
49
+ print(res_content)
50
+ rationale, translated_text = extract_content_from_response(target_language, res_content)
51
+
52
+ return translation_sample, translated_text
utils/translate/translate_openai.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_community.chat_models import ChatOpenAI
3
+ from langchain.schema import HumanMessage
4
+ from dotenv import load_dotenv
5
+
6
+ from utils.prompts.translation_prompt import generate_translation_prompt
7
+ from utils.utils.other_utils import extract_content_from_response
8
+
9
+ load_dotenv()
10
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
11
+
12
+
13
+ def translate_by_openai_api(source_language, target_language, original_text, tone_of_voice, industry, model_name="gpt-3.5-turbo-1106"):
14
+ # Prompt to provide translation
15
+ translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
16
+ # Translate by accessing OpenAI API
17
+ chat = ChatOpenAI(temperature=0.7, openai_api_key=openai_api_key, model_name=model_name)
18
+ res = chat(
19
+ [
20
+ HumanMessage(content=translation_prompt)
21
+ ]
22
+ )
23
+ res_content = res.content
24
+ print(res_content)
25
+ rationale, translated_text = extract_content_from_response(target_language, res_content)
26
+
27
+ return translation_sample, translated_text
utils/translate/translate_volcengine.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from volcengine.ApiInfo import ApiInfo
4
+ from volcengine.Credentials import Credentials
5
+ from volcengine.ServiceInfo import ServiceInfo
6
+ from volcengine.base.Service import Service
7
+ from dotenv import load_dotenv
8
+
9
+ from utils.utils.other_utils import get_language_code
10
+
11
+ load_dotenv()
12
+ k_access_key = os.environ.get("K_ACCESS_KEY")
13
+ k_secret_key = os.environ.get("K_SECRET_KEY")
14
+
15
+
16
+ def translate_by_volcengine_api(source_language, target_language, original_text):
17
+ source_language_code = get_language_code(source_language)
18
+ target_language_code = get_language_code(target_language)
19
+
20
+ k_service_info = ServiceInfo(
21
+ 'translate.volcengineapi.com',
22
+ {'Content-Type': 'application/json'},
23
+ Credentials(k_access_key, k_secret_key, 'translate', 'cn-north-1'),
24
+ 5,
25
+ 5
26
+ )
27
+ k_query = {
28
+ 'Action': 'TranslateText',
29
+ 'Version': '2020-06-01'
30
+ }
31
+ k_api_info = {
32
+ 'translate': ApiInfo('POST', '/', k_query, {}, {})
33
+ }
34
+ service = Service(k_service_info, k_api_info)
35
+ body = {
36
+ 'TargetLanguage': target_language_code,
37
+ 'TextList': [original_text],
38
+ }
39
+ res = service.json('translate', {}, json.dumps(body))
40
+ print(res)
41
+ translated_dict = json.loads(res)
42
+ translated_text = translated_dict["TranslationList"][0]["Translation"]
43
+
44
+ return translated_text
utils/translate/translate_zhipuai.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from zhipuai import ZhipuAI
3
+ from dotenv import load_dotenv
4
+
5
+ from utils.prompts.translation_prompt import generate_translation_prompt
6
+ from utils.utils.other_utils import extract_content_from_response
7
+
8
+ load_dotenv()
9
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
10
+ baichuan_api_key = os.environ.get("BAICHUAN_API_KEY")
11
+ baichuan_secret_key = os.environ.get("BAICHUAN_SECRET_KEY")
12
+ hkbu_chatgpt_api_key = os.environ.get("HKBU_CHATGPT_API_KEY")
13
+ google_api_key = os.environ.get("GOOGLE_API_KEY")
14
+ zhipuai_api_key = os.environ.get("ZHIPUAI_API_KEY")
15
+
16
+
17
+ def translate_by_zhipuai_api(source_language, target_language, original_text, tone_of_voice, industry, model_name="glm-3-turbo"):
18
+ # Prompt to provide translation
19
+ translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
20
+ # Translate by accessing ZhipuAI API
21
+ chat = ZhipuAI(api_key=zhipuai_api_key)
22
+ res = chat.chat.completions.create(
23
+ model=model_name,
24
+ messages=[
25
+ {"role": "user", "content": translation_prompt}
26
+ ],
27
+ temperature=0.7
28
+ )
29
+ res_content = res.choices[0].message.content
30
+ print(res_content)
31
+ rationale, translated_text = extract_content_from_response(target_language, res_content)
32
+
33
+ return translation_sample, translated_text
utils.py → utils/utils/other_utils.py RENAMED
@@ -1,31 +1,7 @@
1
- # utils.py
2
- import os
3
- import json
4
  import re
5
- import deepl
6
- from volcengine.ApiInfo import ApiInfo
7
- from volcengine.Credentials import Credentials
8
- from volcengine.ServiceInfo import ServiceInfo
9
- from volcengine.base.Service import Service
10
- from langchain_community.chat_models import ChatOpenAI, ChatBaichuan
11
- from langchain.schema import HumanMessage, SystemMessage
12
- from langchain_google_genai import ChatGoogleGenerativeAI
13
- from zhipuai import ZhipuAI
14
- from dotenv import load_dotenv
15
- import requests
16
-
17
- load_dotenv()
18
- deepl_api_key = os.environ.get("DEEPL_API_KEY")
19
- k_access_key = os.environ.get("K_ACCESS_KEY")
20
- k_secret_key = os.environ.get("K_SECRET_KEY")
21
- openai_api_key = os.environ.get("OPENAI_API_KEY")
22
- baichuan_api_key = os.environ.get("BAICHUAN_API_KEY")
23
- baichuan_secret_key = os.environ.get("BAICHUAN_SECRET_KEY")
24
- hkbu_chatgpt_api_key = os.environ.get("HKBU_CHATGPT_API_KEY")
25
- google_api_key = os.environ.get("GOOGLE_API_KEY")
26
- zhipuai_api_key = os.environ.get("ZHIPUAI_API_KEY")
27
 
28
 
 
29
  def get_language_code(language_name):
30
  language_mapping = {
31
  "Chinese": "zh",
@@ -167,85 +143,7 @@ def get_language_code(language_name):
167
  return language_mapping.get(language_name, "Unknown Language Code")
168
 
169
 
170
- def translate_by_deepl_api(source_language, target_language, original_text):
171
- target_language_code = get_language_code(target_language).upper()
172
- deepl_client = deepl.Translator(deepl_api_key)
173
- translated_text = deepl_client.translate_text(original_text, target_lang=target_language_code)
174
-
175
- return translated_text.text
176
-
177
-
178
- def translate_by_volcengine_api(source_language, target_language, original_text):
179
- source_language_code = get_language_code(source_language)
180
- target_language_code = get_language_code(target_language)
181
-
182
- k_service_info = ServiceInfo(
183
- 'translate.volcengineapi.com',
184
- {'Content-Type': 'application/json'},
185
- Credentials(k_access_key, k_secret_key, 'translate', 'cn-north-1'),
186
- 5,
187
- 5
188
- )
189
- k_query = {
190
- 'Action': 'TranslateText',
191
- 'Version': '2020-06-01'
192
- }
193
- k_api_info = {
194
- 'translate': ApiInfo('POST', '/', k_query, {}, {})
195
- }
196
- service = Service(k_service_info, k_api_info)
197
- body = {
198
- 'TargetLanguage': target_language_code,
199
- 'TextList': [original_text],
200
- }
201
- res = service.json('translate', {}, json.dumps(body))
202
- print(res)
203
- translated_dict = json.loads(res)
204
- translated_text = translated_dict["TranslationList"][0]["Translation"]
205
-
206
- return translated_text
207
-
208
-
209
- # Translation prompt
210
- def generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry):
211
- languages_should_use_deepl = ["Chinese", "English (UK)", "English (US)", "French", "German", "Spanish",
212
- "Portuguese (Brazilian)", "Portuguese (European)", "Italian", "Dutch", "Polish",
213
- "Russian"]
214
- # Print the source language, target language
215
- print(f"Source language: {source_language}, Language code: {get_language_code(source_language)}, Should use DeepL: {source_language in languages_should_use_deepl}")
216
- print(f"Target language: {target_language}, Language code: {get_language_code(target_language)}, Should use DeepL: {target_language in languages_should_use_deepl}")
217
- # Generate the translation sample
218
- if source_language in languages_should_use_deepl and target_language in languages_should_use_deepl:
219
- print("Using DeepL API")
220
- translation_sample = translate_by_deepl_api(source_language, target_language, original_text)
221
- else:
222
- print("Using Volcengine API")
223
- translation_sample = translate_by_volcengine_api(source_language, target_language, original_text)
224
-
225
- # Generate the translation prompt
226
- translation_prompt = f"""{source_language}:
227
- ```
228
- {original_text}
229
- ```
230
-
231
- {target_language} translation sample:
232
- ```
233
- {translation_sample}
234
- ```
235
-
236
- As a bilingual {source_language}-{target_language} native speaker and seasoned translator, your task is to proofread the {target_language} translation sample for errors based on the {source_language} text above. The translated text should be in the tone of voice of {tone_of_voice.lower()}, and should be suitable for the {industry.lower()} industry. Before providing a proofread version, please provide suggestions for corrections (if any) to the above translation sample.
237
-
238
- Your response should be formatted as follows:
239
- ```
240
- Rationale:
241
- {target_language} translation (proofread):
242
- ```"""
243
-
244
- print(translation_prompt)
245
-
246
- return translation_sample, translation_prompt
247
-
248
-
249
  def extract_content_from_response(target_language, response):
250
  if "(" in target_language and ")" in target_language:
251
  target_language = target_language.replace("(", "\(")
@@ -263,116 +161,3 @@ def extract_content_from_response(target_language, response):
263
  translation = translation_match.group(1).strip().strip("```").strip() if translation_match else None
264
 
265
  return rationale, translation
266
-
267
-
268
- def translate_by_openai_api(source_language, target_language, original_text, tone_of_voice, industry, model_name="gpt-3.5-turbo-1106"):
269
- # Prompt to provide translation
270
- translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
271
- # Translate by accessing OpenAI API
272
- chat = ChatOpenAI(temperature=0.7, openai_api_key=openai_api_key, model_name=model_name)
273
- res = chat(
274
- [
275
- HumanMessage(content=translation_prompt)
276
- ]
277
- )
278
- res_content = res.content
279
- print(res_content)
280
- rationale, translated_text = extract_content_from_response(target_language, res_content)
281
-
282
- return translation_sample, translated_text
283
-
284
-
285
- def translate_by_baichuan_api(source_language, target_language, original_text, tone_of_voice, industry):
286
- # Prompt to provide translation
287
- translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
288
- # Translate by accessing Baichuan API
289
- chat = ChatBaichuan(temperature=0.7, baichuan_api_key=baichuan_api_key, baichuan_secret_key=baichuan_secret_key, model='Baichuan2')
290
- res = chat(
291
- [
292
- HumanMessage(content=translation_prompt)
293
- ]
294
- )
295
- res_content = res.content
296
- print(res_content)
297
- rationale, translated_text = extract_content_from_response(target_language, res_content)
298
-
299
- return translation_sample, translated_text
300
-
301
-
302
- def call_hkbu_chatgpt_api(conversation_list, model_name="gpt-35-turbo-16k", temperature=0.7):
303
- basic_url = "https://chatgpt.hkbu.edu.hk/general/rest"
304
- api_version = "2023-08-01-preview"
305
- if model_name == "gpt-35-turbo-16k":
306
- api_version = "2023-08-01-preview"
307
- elif model_name == "gpt-4-turbo":
308
- api_version = "2023-12-01-preview"
309
- url = basic_url + "/deployments/" + model_name + "/chat/completions/?api-version=" + api_version
310
- headers = {'Content-Type': 'application/json', 'api-key': hkbu_chatgpt_api_key}
311
- payload = {'messages': conversation_list, 'temperature': temperature}
312
-
313
- try:
314
- response = requests.post(url, json=payload, headers=headers)
315
-
316
- if response.status_code == 200:
317
- data = response.json()
318
- return data
319
- else:
320
- return 'Error', response.status_code, response.text
321
- except requests.RequestException as e:
322
- return 'Error:', e
323
-
324
-
325
- def translate_by_hkbu_chatgpt_api(source_language, target_language, original_text, tone_of_voice, industry, model_name="gpt-35-turbo-16k"):
326
- # Prompt to provide translation
327
- translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
328
- res = ""
329
- try:
330
- # Translate by accessing HKBU ChatGPT API
331
- res = call_hkbu_chatgpt_api(
332
- conversation_list=[
333
- {"role": "user", "content": translation_prompt}
334
- ],
335
- model_name=model_name,
336
- temperature=0.7
337
- )
338
- res_content = res["choices"][0]["message"]["content"]
339
- except Exception as e:
340
- res_content = 'Error:', e, res
341
- return res_content
342
-
343
- print(res_content)
344
- rationale, translated_text = extract_content_from_response(target_language, res_content)
345
-
346
- return translation_sample, translated_text
347
-
348
-
349
- def translate_by_google_api(source_language, target_language, original_text, tone_of_voice, industry):
350
- # Prompt to provide translation
351
- translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
352
- # Translate by accessing Google API
353
- chat = ChatGoogleGenerativeAI(temperature=0.7, model="gemini-pro")
354
- res = chat.invoke(translation_prompt)
355
- res_content = res.content
356
- print(res_content)
357
- rationale, translated_text = extract_content_from_response(target_language, res_content)
358
-
359
- return translation_sample, translated_text
360
-
361
-
362
- def translate_by_zhipuai_api(source_language, target_language, original_text, tone_of_voice, industry, model_name="glm-3-turbo"):
363
- # Prompt to provide translation
364
- translation_sample, translation_prompt = generate_translation_prompt(source_language, target_language, original_text, tone_of_voice, industry)
365
- # Translate by accessing ZhipuAI API
366
- chat = ZhipuAI(api_key=zhipuai_api_key)
367
- res = chat.chat.completions.create(
368
- model=model_name,
369
- messages=[
370
- {"role": "user", "content": translation_prompt}
371
- ],
372
- temperature=0.7
373
- )
374
- res_content = res.choices[0].message.content
375
- print(res_content)
376
- rationale, translated_text = extract_content_from_response(target_language, res_content)
377
-
378
- return translation_sample, translated_text
 
 
 
 
1
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
+ # Get the language code from the language name
5
  def get_language_code(language_name):
6
  language_mapping = {
7
  "Chinese": "zh",
 
143
  return language_mapping.get(language_name, "Unknown Language Code")
144
 
145
 
146
+ # Extract the rationale and translation from the response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  def extract_content_from_response(target_language, response):
148
  if "(" in target_language and ")" in target_language:
149
  target_language = target_language.replace("(", "\(")
 
161
  translation = translation_match.group(1).strip().strip("```").strip() if translation_match else None
162
 
163
  return rationale, translation