Amamiyaren commited on
Commit
272deef
·
verified ·
1 Parent(s): 18b13c0

Upload 2 files

Browse files
Files changed (2) hide show
  1. requirements.txt +0 -0
  2. server.py +497 -495
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
server.py CHANGED
@@ -1,495 +1,497 @@
1
- from flask import Flask, request, jsonify
2
- import os
3
- import base64
4
- import subprocess
5
- import copy
6
- from flask import Flask, send_file, abort
7
- from pypdf import PdfWriter, PdfReader
8
- from pypdf.generic import RectangleObject
9
- import sys
10
- import shutil
11
- import string
12
-
13
-
14
- ######################################## 默认配置 ########################################
15
- port_num = int(os.environ.get("PORT", 8888)) # 设置端口号: 默认为8888
16
- pdf2zh = "babeldoc" # 设置pdf2zh指令: 默认为'pdf2zh'
17
-
18
- ######### 可以在Zotero偏好设置中配置以下参数, Zotero配置会覆盖本文件中的配置参数 #########
19
- thread_num = 4 # 设置线程: 默认为4
20
- service = "bing" # 设置翻译服务: 默认为bing
21
- translated_dir = "./translated/" # 设置翻译文件的输出路径(临时路径, 可以在翻译后删除)
22
- config_path = "./config.toml" # 设置PDF2zh配置文件路径
23
- source_languages = "en" # 设置源语言
24
- target_languages = "zh" # 设置目标语言
25
- global_translated_dir = translated_dir
26
-
27
- # 从环境变量读取OpenAI配置
28
- openai_base_url = os.environ.get("OPENAI_BASE_URL", "")
29
- openai_model = os.environ.get("OPENAI_MODEL", "gpt-4o")
30
- openai_api_key = os.environ.get("OPENAI_API_KEY", "")
31
- model_type = openai_model # 用于判断模型类型
32
- claude_api_key = os.environ.get("CLAUDE_API_KEY", "")
33
- ##########################################################################################
34
-
35
-
36
- class Config:
37
- def __init__(self, request):
38
- self.thread_num = request.get_json().get("threadNum")
39
- if self.thread_num == None or self.thread_num == "":
40
- self.thread_num = thread_num
41
-
42
- self.service = request.get_json().get("engine")
43
- if self.service == None or self.service == "":
44
- self.service = service
45
-
46
- self.source_languages = request.get_json().get("sourceLanguages")
47
- if self.source_languages == None or self.source_languages == "":
48
- self.source_languages = source_languages
49
-
50
- self.target_languages = request.get_json().get("targetLanguages")
51
- if self.target_languages == None or self.target_languages == "":
52
- self.target_languages = target_languages
53
-
54
- self.translated_dir = request.get_json().get("outputPath")
55
- if self.translated_dir == None or self.translated_dir == "":
56
- self.translated_dir = translated_dir
57
- self.translated_dir = get_absolute_path(self.translated_dir)
58
- os.makedirs(self.translated_dir, exist_ok=True)
59
-
60
- self.config_path = request.get_json().get("configPath")
61
- if self.config_path == None or self.config_path == "":
62
- self.config_path = config_path
63
- self.config_path = get_absolute_path(self.config_path)
64
-
65
- self.mono_cut = request.get_json().get("mono_cut")
66
- self.dual_cut = request.get_json().get("dual_cut")
67
- self.compare = request.get_json().get("compare")
68
-
69
- print("outputPath: ", self.translated_dir)
70
- print("configPath: ", self.config_path)
71
-
72
- global global_translated_dir
73
- global_translated_dir = self.translated_dir
74
-
75
-
76
- def get_absolute_path(path):
77
- if os.path.isabs(path):
78
- return path
79
- else:
80
- return os.path.abspath(path)
81
-
82
-
83
- def get_file_from_request(request):
84
- config = Config(request)
85
- data = request.get_json()
86
- path = data.get("filePath")
87
- print("filePath: ", path)
88
- path = path.replace('\\', '/') # 把所有反斜杠\替换为正斜杠/ (Windows->Linux/MacOS)
89
- file_content = data.get("fileContent")
90
- input_path = os.path.join(config.translated_dir, os.path.basename(path))
91
- input_path = get_absolute_path(input_path)
92
- print("input path: ", input_path)
93
- if file_content:
94
- if file_content.startswith(
95
- "data:application/pdf;base64,"
96
- ): # 移除 Base64 编码中的前缀(如果有)
97
- file_content = file_content[len("data:application/pdf;base64,") :]
98
- file_data = base64.b64decode(file_content) # 解码 Base64 内容
99
- with open(input_path, "wb") as f:
100
- f.write(file_data)
101
- return input_path, config
102
-
103
-
104
- def translate_pdf(input_path, config):
105
- print("\n############# Translating #############")
106
- print("## translate file path ## : ", input_path)
107
- print("## translated_dir ## : ", config.translated_dir)
108
- print("## config_path ## : ", config.config_path)
109
-
110
- try:
111
- # 检查是否存在本地配置文件
112
- local_config_exists = os.path.exists(config.config_path)
113
- print("## local_config_exists ## : ", local_config_exists)
114
-
115
- # 判断是否在部署环境中运行(通过检查环境变量)
116
- is_deployed_env = os.environ.get("OPENAI_BASE_URL") or os.environ.get("OPENAI_API_KEY")
117
- print("## is_deployed_env ## : ", is_deployed_env)
118
-
119
- # 如果在部署环境中运行且有环境变量配置,则使用环境变量生成config.toml
120
- if is_deployed_env and not local_config_exists:
121
- try:
122
- # 如果环境变量中有配置,则使用环境变量中的配置生成config.toml
123
- template_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.toml.template")
124
- print("## template_path ## : ", template_path)
125
-
126
- with open(template_path, "r") as template_file:
127
- template_content = template_file.read()
128
-
129
- # 替换模板中的环境变量
130
- template = string.Template(template_content)
131
- config_content = template.substitute({
132
- "OPENAI_BASE_URL": openai_base_url,
133
- "OPENAI_MODEL": openai_model,
134
- "OPENAI_API_KEY": openai_api_key
135
- })
136
-
137
- # 写入配置文件
138
- config_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), config_path)
139
- print("## config_file_path ## : ", config_file_path)
140
-
141
- with open(config_file_path, "w") as config_file:
142
- config_file.write(config_content)
143
-
144
- # 确保使用配置文件
145
- config.config_path = config_file_path
146
- print("## 配置文件已生成 ## : ", config.config_path)
147
- except Exception as e:
148
- print(f"## 生成配置文件时出错 ## : {str(e)}")
149
- raise
150
-
151
- # 确保翻译目录存在
152
- os.makedirs(config.translated_dir, exist_ok=True)
153
- print(f"## 确保翻译目录存在 ## : {config.translated_dir}")
154
-
155
- # 执行pdf2zh翻译, 用户可以自定义命令内容:
156
- if not os.path.exists(config.config_path):
157
- command = [
158
- pdf2zh,
159
- input_path,
160
- "--t",
161
- str(config.thread_num),
162
- "--output",
163
- config.translated_dir,
164
- "--service",
165
- config.service,
166
- "--lang-in",
167
- config.source_languages,
168
- "--lang-out",
169
- config.target_languages,
170
- ]
171
-
172
- # 如果设置了API密钥,添加到命令中
173
- if model_type.lower().startswith("gpt") and openai_api_key:
174
- command.extend(["--openai-api-key", openai_api_key, "--openai-model", model_type])
175
- elif model_type.lower().startswith("claude") and claude_api_key:
176
- command.extend(["--claude-api-key", claude_api_key, "--claude-model", model_type])
177
- else:
178
- command = [pdf2zh, "-c", config.config_path, "--files", input_path]
179
-
180
- print("## 执行命令 ## : ", " ".join(command))
181
-
182
- # 使用Popen而不是run,以便实时显示输出(包括进度条)
183
- process = subprocess.Popen(
184
- command,
185
- stdout=subprocess.PIPE,
186
- stderr=subprocess.PIPE,
187
- text=True,
188
- bufsize=1,
189
- universal_newlines=True
190
- )
191
-
192
- # 实时读取并显示输出
193
- stdout_lines = []
194
- stderr_lines = []
195
-
196
- while True:
197
- stdout_line = process.stdout.readline()
198
- stderr_line = process.stderr.readline()
199
-
200
- if stdout_line:
201
- print(stdout_line.strip())
202
- stdout_lines.append(stdout_line)
203
- if stderr_line:
204
- print(stderr_line.strip())
205
- stderr_lines.append(stderr_line)
206
-
207
- # 检查进程是否结束
208
- if process.poll() is not None:
209
- # 读取剩余输出
210
- for line in process.stdout:
211
- print(line.strip())
212
- stdout_lines.append(line)
213
- for line in process.stderr:
214
- print(line.strip())
215
- stderr_lines.append(line)
216
- break
217
-
218
- # 获取返回码
219
- returncode = process.returncode
220
- stdout = ''.join(stdout_lines)
221
- stderr = ''.join(stderr_lines)
222
-
223
- print(f"## 命令执行结果 ## : 返回码={returncode}")
224
-
225
- if returncode != 0:
226
- error_msg = f"命令执行失败,返回码: {returncode}, 错误: {stderr}"
227
- print(f"## 详细错误信息 ## : {error_msg}")
228
- raise Exception(error_msg)
229
-
230
- # 检查输出文件
231
- expected_mono = os.path.join(
232
- config.translated_dir,
233
- os.path.basename(input_path).replace(".pdf", ".zh.mono.pdf"),
234
- )
235
- expected_dual = os.path.join(
236
- config.translated_dir,
237
- os.path.basename(input_path).replace(".pdf", ".zh.dual.pdf"),
238
- )
239
-
240
- print(f"## 检查输出文件 ## : mono={expected_mono}, dual={expected_dual}")
241
- print(f"## 文件存在检查 ## : mono存在={os.path.exists(expected_mono)}, dual存在={os.path.exists(expected_dual)}")
242
-
243
- # 执行 mv 命令
244
- mono = os.path.join(
245
- config.translated_dir, os.path.basename(input_path).replace(".pdf", "-mono.pdf")
246
- )
247
- dual = os.path.join(
248
- config.translated_dir, os.path.basename(input_path).replace(".pdf", "-dual.pdf")
249
- )
250
-
251
- try:
252
- if os.path.exists(expected_mono):
253
- shutil.move(expected_mono, mono)
254
- print(f"## 移动文件成功 ## : {expected_mono} -> {mono}")
255
- else:
256
- raise Exception(f"源文件不存在: {expected_mono}")
257
-
258
- if os.path.exists(expected_dual):
259
- shutil.move(expected_dual, dual)
260
- print(f"## 移动文件成功 ## : {expected_dual} -> {dual}")
261
- else:
262
- raise Exception(f"源文件不存在: {expected_dual}")
263
- except Exception as e:
264
- print(f"## 移动文件时出错 ## : {str(e)}")
265
- raise
266
-
267
- if not os.path.exists(mono) or not os.path.exists(dual):
268
- raise Exception("[Failed to generate translated files]: " + mono + ", " + dual)
269
-
270
- print("[mono file generated]: ", mono)
271
- print("[dual file generated]: ", dual)
272
- return mono, dual
273
- except Exception as e:
274
- print(f"## translate_pdf函数出错 ## : {str(e)}")
275
- # 重新抛出异常,以便上层函数可以捕获
276
- raise
277
-
278
-
279
- app = Flask(__name__)
280
-
281
-
282
- @app.route("/translate", methods=["POST"])
283
- def translate():
284
- try:
285
- print("\n############# 开始翻译请求处理 #############")
286
- input_path, config = get_file_from_request(request)
287
- print(f"## 获取到输入文件 ## : {input_path}")
288
-
289
- mono, dual = translate_pdf(input_path, config)
290
- print(f"## 翻译完成 ## : mono={mono}, dual={dual}")
291
-
292
- if config.mono_cut and config.mono_cut == "true":
293
- try:
294
- path = mono.replace("-mono.pdf", "-mono-cut.pdf")
295
- print(f"## 开始切割mono文件 ## : {mono} -> {path}")
296
- split_and_merge_pdf(mono, path, compare=False)
297
- if not os.path.exists(path):
298
- raise Exception("[Failed to generate cutted files]: " + path)
299
- print("[mono-cut file generated]: ", path)
300
- except Exception as e:
301
- print(f"## 切割mono文件出错 ## : {str(e)}")
302
- raise
303
-
304
- if config.dual_cut and config.dual_cut == "true":
305
- try:
306
- path = dual.replace("-dual.pdf", "-dual-cut.pdf")
307
- print(f"## 开始切割dual文件 ## : {dual} -> {path}")
308
- split_and_merge_pdf(dual, path, compare=False)
309
- if not os.path.exists(path):
310
- raise Exception("[Failed to generate cutted files]: " + path)
311
- print("[dual-cut file generated]: ", path)
312
- except Exception as e:
313
- print(f"## 切割dual文件出错 ## : {str(e)}")
314
- raise
315
-
316
- if config.compare and config.compare == "true":
317
- try:
318
- path = dual.replace("-dual.pdf", "-compare.pdf")
319
- print(f"## 开始生成对比文件 ## : {dual} -> {path}")
320
- split_and_merge_pdf(dual, path, compare=True)
321
- if not os.path.exists(path):
322
- raise Exception("[Failed to generate compare files]: " + path)
323
- print("[compare file generated]: ", path)
324
- except Exception as e:
325
- print(f"## 生成对比文件出错 ## : {str(e)}")
326
- raise
327
-
328
- return jsonify({"status": "success"}), 200
329
- except Exception as e:
330
- print(f"[Translate Error]: {e}")
331
- # 返回更详细的错误信息
332
- error_message = str(e)
333
- traceback_info = sys.exc_info()
334
- if traceback_info[2]:
335
- import traceback
336
- traceback_str = "".join(traceback.format_tb(traceback_info[2]))
337
- print(f"## 错误堆栈 ## : {traceback_str}")
338
- error_message = f"{error_message}\n{traceback_str}"
339
- return jsonify({"status": "error", "message": error_message}), 500
340
-
341
-
342
- @app.route("/translatedFile/<filename>")
343
- def download(filename):
344
- print("\n############# Downloading #############")
345
- file_path = os.path.join(get_absolute_path(global_translated_dir), filename)
346
- if not os.path.isfile(file_path):
347
- print("[Download File not found]: ", file_path)
348
- return "[Download File not found]: " + file_path, 404
349
- print("[Download file]: ", file_path)
350
- return send_file(file_path, as_attachment=True, download_name=filename)
351
-
352
-
353
- # 工具函数, 用于切割双栏pdf文件
354
- def split_and_merge_pdf(input_pdf, output_pdf, compare=False):
355
- writer = PdfWriter()
356
- if "dual" in input_pdf:
357
- readers = [PdfReader(input_pdf) for _ in range(4)]
358
- for i in range(0, len(readers[0].pages), 2):
359
- original_media_box = readers[0].pages[i].mediabox
360
- width = original_media_box.width
361
- height = original_media_box.height
362
-
363
- left_page_1 = readers[0].pages[i]
364
- for box in ["mediabox", "cropbox", "trimbox", "bleedbox", "artbox"]:
365
- setattr(left_page_1, box, RectangleObject((0, 0, width / 2, height)))
366
-
367
- left_page_2 = readers[1].pages[i + 1]
368
- for box in ["mediabox", "cropbox", "trimbox", "bleedbox", "artbox"]:
369
- setattr(left_page_2, box, RectangleObject((0, 0, width / 2, height)))
370
-
371
- right_page_1 = readers[2].pages[i]
372
- for box in ["mediabox", "cropbox", "trimbox", "bleedbox", "artbox"]:
373
- setattr(
374
- right_page_1, box, RectangleObject((width / 2, 0, width, height))
375
- )
376
-
377
- right_page_2 = readers[3].pages[i + 1]
378
- for box in ["mediabox", "cropbox", "trimbox", "bleedbox", "artbox"]:
379
- setattr(
380
- right_page_2, box, RectangleObject((width / 2, 0, width, height))
381
- )
382
-
383
- if compare == True:
384
- blank_page_1 = writer.add_blank_page(width, height)
385
- blank_page_1.merge_transformed_page(left_page_1, (1, 0, 0, 1, 0, 0))
386
- blank_page_1.merge_transformed_page(
387
- left_page_2, (1, 0, 0, 1, width / 2, 0)
388
- )
389
- blank_page_2 = writer.add_blank_page(width, height)
390
- blank_page_2.merge_transformed_page(
391
- right_page_1, (1, 0, 0, 1, -width / 2, 0)
392
- )
393
- blank_page_2.merge_transformed_page(right_page_2, (1, 0, 0, 1, 0, 0))
394
- else:
395
- writer.add_page(left_page_1)
396
- writer.add_page(left_page_2)
397
- writer.add_page(right_page_1)
398
- writer.add_page(right_page_2)
399
- else:
400
- readers = [PdfReader(input_pdf) for _ in range(2)]
401
- for i in range(len(readers[0].pages)):
402
- page = readers[0].pages[i]
403
-
404
- original_media_box = page.mediabox
405
- width = original_media_box.width
406
- height = original_media_box.height
407
-
408
- left_page = readers[0].pages[i]
409
- left_page.mediabox = RectangleObject((0, 0, width / 2, height))
410
- right_page = readers[1].pages[i]
411
- right_page.mediabox = RectangleObject((width / 2, 0, width, height))
412
-
413
- writer.add_page(left_page)
414
- writer.add_page(right_page)
415
-
416
- with open(output_pdf, "wb") as output_file:
417
- writer.write(output_file)
418
-
419
-
420
- # 用于切割双栏pdf文件
421
- @app.route("/cut", methods=["POST"])
422
- def cut():
423
- print("\n############# Cutting #############")
424
- input_path, config = get_file_from_request(request)
425
- try:
426
- translated_path = os.path.join(
427
- config.translated_dir,
428
- os.path.basename(input_path).replace(".pdf", "-cut.pdf"),
429
- )
430
- split_and_merge_pdf(input_path, translated_path)
431
-
432
- if not os.path.exists(translated_path):
433
- raise Exception("[Failed to generate cut files]: ", translated_path)
434
- print("[Cut file generated]: ", translated_path)
435
- return jsonify({"status": "success"}), 200
436
- except Exception as e:
437
- print(f"[Cut File Error]: {e}")
438
- # 返回更详细的错误信息
439
- error_message = str(e)
440
- traceback_info = sys.exc_info()
441
- if traceback_info[2]:
442
- import traceback
443
- traceback_str = "".join(traceback.format_tb(traceback_info[2]))
444
- print(f"## 错误堆栈 ## : {traceback_str}")
445
- error_message = f"{error_message}\n{traceback_str}"
446
- return jsonify({"status": "error", "message": error_message}), 500
447
-
448
-
449
- # 用于生成中英对照文件
450
- @app.route("/cut-compare", methods=["POST"])
451
- def cut_compare():
452
- print("\n############# Comparing #############")
453
- try:
454
- input_path, config = get_file_from_request(request)
455
- print(f"## 获取到输入文件 ## : {input_path}")
456
-
457
- if "dual" in input_path:
458
- translated_path = os.path.join(
459
- config.translated_dir,
460
- os.path.basename(input_path).replace(".pdf", "-compare.pdf"),
461
- )
462
- print(f"## 直接生成对比文件 ## : {input_path} -> {translated_path}")
463
-
464
- # 确保翻译目录存在
465
- os.makedirs(os.path.dirname(translated_path), exist_ok=True)
466
-
467
- split_and_merge_pdf(input_path, translated_path, compare=True)
468
- else:
469
- print(f"## 需要先翻译再生成对比文件 ## : {input_path}")
470
- _, dual = translate_pdf(input_path, config)
471
- translated_path = dual.replace("-dual.pdf", "-compare.pdf")
472
- print(f"## 生成对比文件 ## : {dual} -> {translated_path}")
473
- split_and_merge_pdf(dual, translated_path, compare=True)
474
-
475
- if not os.path.exists(translated_path):
476
- raise Exception("[Failed to generate cutted file]: " + translated_path)
477
- print("[Compare file generated]: ", translated_path)
478
- return jsonify({"status": "success"}), 200
479
- except Exception as e:
480
- print(f"[cut_compare() Error]: {e}")
481
- # 返回更详细的错误信息
482
- error_message = str(e)
483
- traceback_info = sys.exc_info()
484
- if traceback_info[2]:
485
- import traceback
486
- traceback_str = "".join(traceback.format_tb(traceback_info[2]))
487
- print(f"## 错误堆栈 ## : {traceback_str}")
488
- error_message = f"{error_message}\n{traceback_str}"
489
- return jsonify({"status": "error", "message": error_message}), 500
490
-
491
-
492
- if __name__ == "__main__":
493
- if len(sys.argv) > 1:
494
- port_num = int(sys.argv[1])
495
- app.run(host="0.0.0.0", port=port_num)
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import os
3
+ import base64
4
+ import subprocess
5
+ import copy
6
+ from flask import Flask, send_file, abort
7
+ from pypdf import PdfWriter, PdfReader
8
+ from pypdf.generic import RectangleObject
9
+ import sys
10
+ import shutil
11
+ import string
12
+ from flask_cors import CORS
13
+
14
+
15
+ ######################################## 默认配置 ########################################
16
+ port_num = int(os.environ.get("PORT", 8888)) # 设置端口号: 默认为8888
17
+ pdf2zh = "babeldoc" # 设置pdf2zh指令: 默认为'pdf2zh'
18
+
19
+ ######### 可以在Zotero偏好设置中配置以下参, Zotero配置会覆盖本文件中的配置参数 #########
20
+ thread_num = 4 # 设置线程数: 默认为4
21
+ service = "bing" # 设置翻译服务: 默认为bing
22
+ translated_dir = "./translated/" # 设置翻译文件的输出路径(临时路径, 可以在翻译后删除)
23
+ config_path = "./config.toml" # 设置PDF2zh配置文件路径
24
+ source_languages = "en" # 设置语言
25
+ target_languages = "zh" # 设置目标语言
26
+ global_translated_dir = translated_dir
27
+
28
+ # 从环境变量读取OpenAI配置
29
+ openai_base_url = os.environ.get("OPENAI_BASE_URL", "")
30
+ openai_model = os.environ.get("OPENAI_MODEL", "gpt-4o")
31
+ openai_api_key = os.environ.get("OPENAI_API_KEY", "")
32
+ model_type = openai_model # 用于判断模型类型
33
+ claude_api_key = os.environ.get("CLAUDE_API_KEY", "")
34
+ ##########################################################################################
35
+
36
+
37
+ class Config:
38
+ def __init__(self, request):
39
+ self.thread_num = request.get_json().get("threadNum")
40
+ if self.thread_num == None or self.thread_num == "":
41
+ self.thread_num = thread_num
42
+
43
+ self.service = request.get_json().get("engine")
44
+ if self.service == None or self.service == "":
45
+ self.service = service
46
+
47
+ self.source_languages = request.get_json().get("sourceLanguages")
48
+ if self.source_languages == None or self.source_languages == "":
49
+ self.source_languages = source_languages
50
+
51
+ self.target_languages = request.get_json().get("targetLanguages")
52
+ if self.target_languages == None or self.target_languages == "":
53
+ self.target_languages = target_languages
54
+
55
+ self.translated_dir = request.get_json().get("outputPath")
56
+ if self.translated_dir == None or self.translated_dir == "":
57
+ self.translated_dir = translated_dir
58
+ self.translated_dir = get_absolute_path(self.translated_dir)
59
+ os.makedirs(self.translated_dir, exist_ok=True)
60
+
61
+ self.config_path = request.get_json().get("configPath")
62
+ if self.config_path == None or self.config_path == "":
63
+ self.config_path = config_path
64
+ self.config_path = get_absolute_path(self.config_path)
65
+
66
+ self.mono_cut = request.get_json().get("mono_cut")
67
+ self.dual_cut = request.get_json().get("dual_cut")
68
+ self.compare = request.get_json().get("compare")
69
+
70
+ print("outputPath: ", self.translated_dir)
71
+ print("configPath: ", self.config_path)
72
+
73
+ global global_translated_dir
74
+ global_translated_dir = self.translated_dir
75
+
76
+
77
+ def get_absolute_path(path):
78
+ if os.path.isabs(path):
79
+ return path
80
+ else:
81
+ return os.path.abspath(path)
82
+
83
+
84
+ def get_file_from_request(request):
85
+ config = Config(request)
86
+ data = request.get_json()
87
+ path = data.get("filePath")
88
+ print("filePath: ", path)
89
+ path = path.replace('\\', '/') # 把所有反斜杠\替换为正斜杠/ (Windows->Linux/MacOS)
90
+ file_content = data.get("fileContent")
91
+ input_path = os.path.join(config.translated_dir, os.path.basename(path))
92
+ input_path = get_absolute_path(input_path)
93
+ print("input path: ", input_path)
94
+ if file_content:
95
+ if file_content.startswith(
96
+ "data:application/pdf;base64,"
97
+ ): # 移除 Base64 编码中的前缀(如果有)
98
+ file_content = file_content[len("data:application/pdf;base64,") :]
99
+ file_data = base64.b64decode(file_content) # 解码 Base64 内容
100
+ with open(input_path, "wb") as f:
101
+ f.write(file_data)
102
+ return input_path, config
103
+
104
+
105
+ def translate_pdf(input_path, config):
106
+ print("\n############# Translating #############")
107
+ print("## translate file path ## : ", input_path)
108
+ print("## translated_dir ## : ", config.translated_dir)
109
+ print("## config_path ## : ", config.config_path)
110
+
111
+ try:
112
+ # 检查是否存在本地配置文件
113
+ local_config_exists = os.path.exists(config.config_path)
114
+ print("## local_config_exists ## : ", local_config_exists)
115
+
116
+ # 判断是否在部署环境中运行(通过检查环境变量)
117
+ is_deployed_env = os.environ.get("OPENAI_BASE_URL") or os.environ.get("OPENAI_API_KEY")
118
+ print("## is_deployed_env ## : ", is_deployed_env)
119
+
120
+ # 如果在部署环境中运行且有环境变量配置,则使用环境变量生成config.toml
121
+ if is_deployed_env and not local_config_exists:
122
+ try:
123
+ # 如果环境变量中有配置,则使用环境变量中的配置生成config.toml
124
+ template_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.toml.template")
125
+ print("## template_path ## : ", template_path)
126
+
127
+ with open(template_path, "r") as template_file:
128
+ template_content = template_file.read()
129
+
130
+ # 替换模板中的环境变量
131
+ template = string.Template(template_content)
132
+ config_content = template.substitute({
133
+ "OPENAI_BASE_URL": openai_base_url,
134
+ "OPENAI_MODEL": openai_model,
135
+ "OPENAI_API_KEY": openai_api_key
136
+ })
137
+
138
+ # 写入配置文件
139
+ config_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), config_path)
140
+ print("## config_file_path ## : ", config_file_path)
141
+
142
+ with open(config_file_path, "w") as config_file:
143
+ config_file.write(config_content)
144
+
145
+ # 确保使用配置文件
146
+ config.config_path = config_file_path
147
+ print("## 配置文件已生成 ## : ", config.config_path)
148
+ except Exception as e:
149
+ print(f"## 生成配置文件时出错 ## : {str(e)}")
150
+ raise
151
+
152
+ # 确保翻译目录存在
153
+ os.makedirs(config.translated_dir, exist_ok=True)
154
+ print(f"## 确保翻译目录存在 ## : {config.translated_dir}")
155
+
156
+ # 执行pdf2zh翻译, 用户可以自定义命令内容:
157
+ if not os.path.exists(config.config_path):
158
+ command = [
159
+ pdf2zh,
160
+ input_path,
161
+ "--t",
162
+ str(config.thread_num),
163
+ "--output",
164
+ config.translated_dir,
165
+ "--service",
166
+ config.service,
167
+ "--lang-in",
168
+ config.source_languages,
169
+ "--lang-out",
170
+ config.target_languages,
171
+ ]
172
+
173
+ # 如果设置了API密钥,添加到命令中
174
+ if model_type.lower().startswith("gpt") and openai_api_key:
175
+ command.extend(["--openai-api-key", openai_api_key, "--openai-model", model_type])
176
+ elif model_type.lower().startswith("claude") and claude_api_key:
177
+ command.extend(["--claude-api-key", claude_api_key, "--claude-model", model_type])
178
+ else:
179
+ command = [pdf2zh, "-c", config.config_path, "--files", input_path]
180
+
181
+ print("## 执行命令 ## : ", " ".join(command))
182
+
183
+ # 使用Popen而不是run,以便实时显示输出(包括进度条)
184
+ process = subprocess.Popen(
185
+ command,
186
+ stdout=subprocess.PIPE,
187
+ stderr=subprocess.PIPE,
188
+ text=True,
189
+ bufsize=1,
190
+ universal_newlines=True
191
+ )
192
+
193
+ # 实时读取并显示输出
194
+ stdout_lines = []
195
+ stderr_lines = []
196
+
197
+ while True:
198
+ stdout_line = process.stdout.readline()
199
+ stderr_line = process.stderr.readline()
200
+
201
+ if stdout_line:
202
+ print(stdout_line.strip())
203
+ stdout_lines.append(stdout_line)
204
+ if stderr_line:
205
+ print(stderr_line.strip())
206
+ stderr_lines.append(stderr_line)
207
+
208
+ # 检查进程是否结束
209
+ if process.poll() is not None:
210
+ # 读取剩余输出
211
+ for line in process.stdout:
212
+ print(line.strip())
213
+ stdout_lines.append(line)
214
+ for line in process.stderr:
215
+ print(line.strip())
216
+ stderr_lines.append(line)
217
+ break
218
+
219
+ # 获取返回码
220
+ returncode = process.returncode
221
+ stdout = ''.join(stdout_lines)
222
+ stderr = ''.join(stderr_lines)
223
+
224
+ print(f"## 命令执行结果 ## : 返回码={returncode}")
225
+
226
+ if returncode != 0:
227
+ error_msg = f"命令执行失败,返回码: {returncode}, 错误: {stderr}"
228
+ print(f"## 详细错误信息 ## : {error_msg}")
229
+ raise Exception(error_msg)
230
+
231
+ # 检查输出文件
232
+ expected_mono = os.path.join(
233
+ config.translated_dir,
234
+ os.path.basename(input_path).replace(".pdf", ".zh.mono.pdf"),
235
+ )
236
+ expected_dual = os.path.join(
237
+ config.translated_dir,
238
+ os.path.basename(input_path).replace(".pdf", ".zh.dual.pdf"),
239
+ )
240
+
241
+ print(f"## 检查输出文件 ## : mono={expected_mono}, dual={expected_dual}")
242
+ print(f"## 文件存在检查 ## : mono存在={os.path.exists(expected_mono)}, dual存在={os.path.exists(expected_dual)}")
243
+
244
+ # 执行 mv 命令
245
+ mono = os.path.join(
246
+ config.translated_dir, os.path.basename(input_path).replace(".pdf", "-mono.pdf")
247
+ )
248
+ dual = os.path.join(
249
+ config.translated_dir, os.path.basename(input_path).replace(".pdf", "-dual.pdf")
250
+ )
251
+
252
+ try:
253
+ if os.path.exists(expected_mono):
254
+ shutil.move(expected_mono, mono)
255
+ print(f"## 移动文件成功 ## : {expected_mono} -> {mono}")
256
+ else:
257
+ raise Exception(f"源文件不存在: {expected_mono}")
258
+
259
+ if os.path.exists(expected_dual):
260
+ shutil.move(expected_dual, dual)
261
+ print(f"## 移动文件成功 ## : {expected_dual} -> {dual}")
262
+ else:
263
+ raise Exception(f"源文件不存在: {expected_dual}")
264
+ except Exception as e:
265
+ print(f"## 移动文件时出错 ## : {str(e)}")
266
+ raise
267
+
268
+ if not os.path.exists(mono) or not os.path.exists(dual):
269
+ raise Exception("[Failed to generate translated files]: " + mono + ", " + dual)
270
+
271
+ print("[mono file generated]: ", mono)
272
+ print("[dual file generated]: ", dual)
273
+ return mono, dual
274
+ except Exception as e:
275
+ print(f"## translate_pdf函数出错 ## : {str(e)}")
276
+ # 重新抛出异常,以便上层函数可以捕获
277
+ raise
278
+
279
+
280
+ app = Flask(__name__)
281
+ CORS(app, resources={r"/*": {"origins": "*"}}) # 允许所有来源的跨域请求
282
+
283
+
284
+ @app.route("/translate", methods=["POST"])
285
+ def translate():
286
+ try:
287
+ print("\n############# 开始翻译请求处理 #############")
288
+ input_path, config = get_file_from_request(request)
289
+ print(f"## 获取到输入文件 ## : {input_path}")
290
+
291
+ mono, dual = translate_pdf(input_path, config)
292
+ print(f"## 翻译完成 ## : mono={mono}, dual={dual}")
293
+
294
+ if config.mono_cut and config.mono_cut == "true":
295
+ try:
296
+ path = mono.replace("-mono.pdf", "-mono-cut.pdf")
297
+ print(f"## 开始切割mono文件 ## : {mono} -> {path}")
298
+ split_and_merge_pdf(mono, path, compare=False)
299
+ if not os.path.exists(path):
300
+ raise Exception("[Failed to generate cutted files]: " + path)
301
+ print("[mono-cut file generated]: ", path)
302
+ except Exception as e:
303
+ print(f"## 切割mono文件出错 ## : {str(e)}")
304
+ raise
305
+
306
+ if config.dual_cut and config.dual_cut == "true":
307
+ try:
308
+ path = dual.replace("-dual.pdf", "-dual-cut.pdf")
309
+ print(f"## 开始切割dual文件 ## : {dual} -> {path}")
310
+ split_and_merge_pdf(dual, path, compare=False)
311
+ if not os.path.exists(path):
312
+ raise Exception("[Failed to generate cutted files]: " + path)
313
+ print("[dual-cut file generated]: ", path)
314
+ except Exception as e:
315
+ print(f"## 切割dual文件出错 ## : {str(e)}")
316
+ raise
317
+
318
+ if config.compare and config.compare == "true":
319
+ try:
320
+ path = dual.replace("-dual.pdf", "-compare.pdf")
321
+ print(f"## 开始生成对比文件 ## : {dual} -> {path}")
322
+ split_and_merge_pdf(dual, path, compare=True)
323
+ if not os.path.exists(path):
324
+ raise Exception("[Failed to generate compare files]: " + path)
325
+ print("[compare file generated]: ", path)
326
+ except Exception as e:
327
+ print(f"## 生成对比文件出错 ## : {str(e)}")
328
+ raise
329
+
330
+ return jsonify({"status": "success"}), 200
331
+ except Exception as e:
332
+ print(f"[Translate Error]: {e}")
333
+ # 返回更详细的错误信息
334
+ error_message = str(e)
335
+ traceback_info = sys.exc_info()
336
+ if traceback_info[2]:
337
+ import traceback
338
+ traceback_str = "".join(traceback.format_tb(traceback_info[2]))
339
+ print(f"## 错误堆栈 ## : {traceback_str}")
340
+ error_message = f"{error_message}\n{traceback_str}"
341
+ return jsonify({"status": "error", "message": error_message}), 500
342
+
343
+
344
+ @app.route("/translatedFile/<filename>")
345
+ def download(filename):
346
+ print("\n############# Downloading #############")
347
+ file_path = os.path.join(get_absolute_path(global_translated_dir), filename)
348
+ if not os.path.isfile(file_path):
349
+ print("[Download File not found]: ", file_path)
350
+ return "[Download File not found]: " + file_path, 404
351
+ print("[Download file]: ", file_path)
352
+ return send_file(file_path, as_attachment=True, download_name=filename)
353
+
354
+
355
+ # 工具函数, 用于切割双栏pdf文件
356
+ def split_and_merge_pdf(input_pdf, output_pdf, compare=False):
357
+ writer = PdfWriter()
358
+ if "dual" in input_pdf:
359
+ readers = [PdfReader(input_pdf) for _ in range(4)]
360
+ for i in range(0, len(readers[0].pages), 2):
361
+ original_media_box = readers[0].pages[i].mediabox
362
+ width = original_media_box.width
363
+ height = original_media_box.height
364
+
365
+ left_page_1 = readers[0].pages[i]
366
+ for box in ["mediabox", "cropbox", "trimbox", "bleedbox", "artbox"]:
367
+ setattr(left_page_1, box, RectangleObject((0, 0, width / 2, height)))
368
+
369
+ left_page_2 = readers[1].pages[i + 1]
370
+ for box in ["mediabox", "cropbox", "trimbox", "bleedbox", "artbox"]:
371
+ setattr(left_page_2, box, RectangleObject((0, 0, width / 2, height)))
372
+
373
+ right_page_1 = readers[2].pages[i]
374
+ for box in ["mediabox", "cropbox", "trimbox", "bleedbox", "artbox"]:
375
+ setattr(
376
+ right_page_1, box, RectangleObject((width / 2, 0, width, height))
377
+ )
378
+
379
+ right_page_2 = readers[3].pages[i + 1]
380
+ for box in ["mediabox", "cropbox", "trimbox", "bleedbox", "artbox"]:
381
+ setattr(
382
+ right_page_2, box, RectangleObject((width / 2, 0, width, height))
383
+ )
384
+
385
+ if compare == True:
386
+ blank_page_1 = writer.add_blank_page(width, height)
387
+ blank_page_1.merge_transformed_page(left_page_1, (1, 0, 0, 1, 0, 0))
388
+ blank_page_1.merge_transformed_page(
389
+ left_page_2, (1, 0, 0, 1, width / 2, 0)
390
+ )
391
+ blank_page_2 = writer.add_blank_page(width, height)
392
+ blank_page_2.merge_transformed_page(
393
+ right_page_1, (1, 0, 0, 1, -width / 2, 0)
394
+ )
395
+ blank_page_2.merge_transformed_page(right_page_2, (1, 0, 0, 1, 0, 0))
396
+ else:
397
+ writer.add_page(left_page_1)
398
+ writer.add_page(left_page_2)
399
+ writer.add_page(right_page_1)
400
+ writer.add_page(right_page_2)
401
+ else:
402
+ readers = [PdfReader(input_pdf) for _ in range(2)]
403
+ for i in range(len(readers[0].pages)):
404
+ page = readers[0].pages[i]
405
+
406
+ original_media_box = page.mediabox
407
+ width = original_media_box.width
408
+ height = original_media_box.height
409
+
410
+ left_page = readers[0].pages[i]
411
+ left_page.mediabox = RectangleObject((0, 0, width / 2, height))
412
+ right_page = readers[1].pages[i]
413
+ right_page.mediabox = RectangleObject((width / 2, 0, width, height))
414
+
415
+ writer.add_page(left_page)
416
+ writer.add_page(right_page)
417
+
418
+ with open(output_pdf, "wb") as output_file:
419
+ writer.write(output_file)
420
+
421
+
422
+ # 用于切割双栏pdf文件
423
+ @app.route("/cut", methods=["POST"])
424
+ def cut():
425
+ print("\n############# Cutting #############")
426
+ input_path, config = get_file_from_request(request)
427
+ try:
428
+ translated_path = os.path.join(
429
+ config.translated_dir,
430
+ os.path.basename(input_path).replace(".pdf", "-cut.pdf"),
431
+ )
432
+ split_and_merge_pdf(input_path, translated_path)
433
+
434
+ if not os.path.exists(translated_path):
435
+ raise Exception("[Failed to generate cut files]: ", translated_path)
436
+ print("[Cut file generated]: ", translated_path)
437
+ return jsonify({"status": "success"}), 200
438
+ except Exception as e:
439
+ print(f"[Cut File Error]: {e}")
440
+ # 返回更详细的错误信息
441
+ error_message = str(e)
442
+ traceback_info = sys.exc_info()
443
+ if traceback_info[2]:
444
+ import traceback
445
+ traceback_str = "".join(traceback.format_tb(traceback_info[2]))
446
+ print(f"## 错误堆栈 ## : {traceback_str}")
447
+ error_message = f"{error_message}\n{traceback_str}"
448
+ return jsonify({"status": "error", "message": error_message}), 500
449
+
450
+
451
+ # 用于生成中英对照文件
452
+ @app.route("/cut-compare", methods=["POST"])
453
+ def cut_compare():
454
+ print("\n############# Comparing #############")
455
+ try:
456
+ input_path, config = get_file_from_request(request)
457
+ print(f"## 获取到输入文件 ## : {input_path}")
458
+
459
+ if "dual" in input_path:
460
+ translated_path = os.path.join(
461
+ config.translated_dir,
462
+ os.path.basename(input_path).replace(".pdf", "-compare.pdf"),
463
+ )
464
+ print(f"## 直接生成对比文件 ## : {input_path} -> {translated_path}")
465
+
466
+ # 确保翻译目录存在
467
+ os.makedirs(os.path.dirname(translated_path), exist_ok=True)
468
+
469
+ split_and_merge_pdf(input_path, translated_path, compare=True)
470
+ else:
471
+ print(f"## 需要先翻译再生成对比文件 ## : {input_path}")
472
+ _, dual = translate_pdf(input_path, config)
473
+ translated_path = dual.replace("-dual.pdf", "-compare.pdf")
474
+ print(f"## 生成对比文件 ## : {dual} -> {translated_path}")
475
+ split_and_merge_pdf(dual, translated_path, compare=True)
476
+
477
+ if not os.path.exists(translated_path):
478
+ raise Exception("[Failed to generate cutted file]: " + translated_path)
479
+ print("[Compare file generated]: ", translated_path)
480
+ return jsonify({"status": "success"}), 200
481
+ except Exception as e:
482
+ print(f"[cut_compare() Error]: {e}")
483
+ # 返回更详细的错误信息
484
+ error_message = str(e)
485
+ traceback_info = sys.exc_info()
486
+ if traceback_info[2]:
487
+ import traceback
488
+ traceback_str = "".join(traceback.format_tb(traceback_info[2]))
489
+ print(f"## 错误堆栈 ## : {traceback_str}")
490
+ error_message = f"{error_message}\n{traceback_str}"
491
+ return jsonify({"status": "error", "message": error_message}), 500
492
+
493
+
494
+ if __name__ == "__main__":
495
+ if len(sys.argv) > 1:
496
+ port_num = int(sys.argv[1])
497
+ app.run(host="0.0.0.0", port=port_num)