letterm commited on
Commit
9875b72
·
verified ·
1 Parent(s): 8acb2f9

Upload 8 files

Browse files
Files changed (8) hide show
  1. .py +70 -0
  2. Dockerfile +12 -0
  3. app.py +120 -0
  4. config.yaml +9 -0
  5. requirements.txt +6 -0
  6. search.py +109 -0
  7. tag_extractor.py +272 -0
  8. translations_converted.json +0 -0
.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import argparse
4
+
5
+ def extract_pairs(obj):
6
+ """
7
+ 递归提取 obj 中所有键值对(仅当键和值均为字符串时)。
8
+ 返回一个字典,包含所有提取到的键值对。
9
+ """
10
+ pairs = {}
11
+ if isinstance(obj, dict):
12
+ for key, value in obj.items():
13
+ # 如果键和值均为字符串,则记录该对
14
+ if isinstance(key, str) and isinstance(value, str):
15
+ if key in pairs and pairs[key] != value:
16
+ print(f"警告:键 '{key}' 重复,但值不同:'{pairs[key]}' 与 '{value}'。保留首次出现的值。")
17
+ else:
18
+ pairs[key] = value
19
+ # 无论 value 是否为字符串,都递归检查其内部结构
20
+ sub_pairs = extract_pairs(value)
21
+ for sub_key, sub_value in sub_pairs.items():
22
+ if sub_key in pairs and pairs[sub_key] != sub_value:
23
+ print(f"警告:键 '{sub_key}' 重复,但值不同:'{pairs[sub_key]}' 与 '{sub_value}'。保留首次出现的值。")
24
+ else:
25
+ pairs[sub_key] = sub_value
26
+ elif isinstance(obj, list):
27
+ for item in obj:
28
+ sub_pairs = extract_pairs(item)
29
+ for sub_key, sub_value in sub_pairs.items():
30
+ if sub_key in pairs and pairs[sub_key] != sub_value:
31
+ print(f"警告:键 '{sub_key}' 重复,但值不同:'{pairs[sub_key]}' 与 '{sub_value}'。保留首次出现的值。")
32
+ else:
33
+ pairs[sub_key] = sub_value
34
+ return pairs
35
+
36
+ def merge_json_pairs(directory, output_file):
37
+ """
38
+ 遍历指定目录下所有 JSON 文件,
39
+ 提取每个文件中所有层级的键值对(仅当键和值均为字符串时),
40
+ 并将它们合并到一个平面字典中,最后写入 output_file。
41
+ """
42
+ merged_pairs = {}
43
+
44
+ for filename in os.listdir(directory):
45
+ if filename.endswith(".json"):
46
+ file_path = os.path.join(directory, filename)
47
+ try:
48
+ with open(file_path, "r", encoding="utf-8") as f:
49
+ data = json.load(f)
50
+ file_pairs = extract_pairs(data)
51
+ for key, value in file_pairs.items():
52
+ if key in merged_pairs and merged_pairs[key] != value:
53
+ print(f"警告:文件 '{filename}' 中键 '{key}' 的值 '{value}' 与之前值 '{merged_pairs[key]}' 不同,保留首次出现的值。")
54
+ else:
55
+ merged_pairs[key] = value
56
+ except Exception as e:
57
+ print(f"读取文件 '{filename}' 时发生错误:{e}")
58
+
59
+ try:
60
+ with open(output_file, "w", encoding="utf-8") as out_f:
61
+ json.dump(merged_pairs, out_f, ensure_ascii=False, indent=4)
62
+ print(f"合并后的键值对已写入:{output_file}")
63
+ except Exception as e:
64
+ print(f"写入输出文件 '{output_file}' 时发生错误:{e}")
65
+
66
+ if __name__ == '__main__':
67
+ directory = 'public\TagJson'
68
+ output_file = 'TagJson.json'
69
+
70
+ merge_json_pairs(directory, output_file)
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN pip install Flask==2.3.3 requests==2.31.0 beautifulsoup4==4.13.0 PyYAML==6.0.1 jieba==0.42.1 python-Levenshtein==0.21.1
6
+
7
+ COPY . .
8
+
9
+ ENV PORT=3000
10
+ EXPOSE 3000
11
+
12
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+ from flask import Flask, jsonify, send_from_directory, request, abort
5
+ from flask_cors import CORS
6
+
7
+ from tag_extractor import tag_extractorbp
8
+ from search import search_blueprint
9
+
10
+ app = Flask(__name__)
11
+ CORS(app) # 允许所有跨域请求
12
+ TAG_JSON_DIR = os.path.join(os.getcwd(), 'public', 'TagJson')
13
+
14
+ TURNSTILE_SECRET_KEY = "xxx"
15
+
16
+ app.register_blueprint(search_blueprint, url_prefix='/search')
17
+ app.register_blueprint(tag_extractorbp, url_prefix='/api')
18
+
19
+ @app.route('/')
20
+ def index():
21
+ return send_from_directory('static/frontend', 'index.html')
22
+
23
+ # 用于Turnstile验证的新接口
24
+ @app.route('/api/verify-turnstile', methods=['POST'])
25
+ def verify_turnstile():
26
+ data = request.get_json()
27
+ token = data.get('token')
28
+
29
+ if not token:
30
+ return jsonify({"success": False, "message": "缺少Token。"}), 400
31
+
32
+ # 使用Cloudflare进行验证
33
+ try:
34
+ response = requests.post(
35
+ 'https://challenges.cloudflare.com/turnstile/v0/siteverify',
36
+ data={
37
+ 'secret': TURNSTILE_SECRET_KEY,
38
+ 'response': token,
39
+ }
40
+ )
41
+ response.raise_for_status() # 如果请求失败 (状态码 4xx or 5xx), 抛出异常
42
+ result = response.json()
43
+
44
+ if result.get('success'):
45
+ return jsonify({"success": True, "message": "验证成功。"}), 200
46
+ else:
47
+ error_codes = result.get('error-codes', [])
48
+ return jsonify({"success": False, "message": "验证失败。", "error-codes": error_codes}), 400
49
+
50
+ except requests.exceptions.RequestException as e:
51
+ return jsonify({"success": False, "message": f"连接验证服务器时出错: {e}"}), 500
52
+
53
+
54
+ # 列出TagJson目录中的所有JSON文件
55
+ @app.route('/api/json-files', methods=['GET'])
56
+ def get_json_files():
57
+ try:
58
+ # 获取目录下的所有文件名
59
+ files = [f for f in os.listdir(TAG_JSON_DIR) if f.endswith('.json')]
60
+ return jsonify(files), 200
61
+ except Exception as e:
62
+ return jsonify({"error": str(e)}), 500
63
+
64
+
65
+ # 获取指定JSON文件中的字典键
66
+ @app.route('/api/json-files/<filename>/keys', methods=['GET'])
67
+ def get_json_file_keys(filename):
68
+ if not filename.endswith('.json'):
69
+ abort(400, description="Invalid file extension")
70
+
71
+ file_path = os.path.join(TAG_JSON_DIR, filename)
72
+
73
+ # 检查文件是否存在
74
+ if not os.path.exists(file_path):
75
+ abort(404, description="File not found")
76
+
77
+ try:
78
+ # 打开文件并提取字典的键
79
+ with open(file_path, 'r', encoding='utf-8') as f:
80
+ data = json.load(f)
81
+ keys = list(data.keys()) # 获取字典的所有键
82
+ return jsonify(keys), 200
83
+ except Exception as e:
84
+ return jsonify({"error": str(e)}), 500
85
+
86
+
87
+ # 获取指定JSON文件和字典键的内容
88
+ @app.route('/api/json-files/<filename>/keys/<key>', methods=['GET'])
89
+ def get_json_key_content(filename, key):
90
+ if not filename.endswith('.json'):
91
+ abort(400, description="Invalid file extension")
92
+
93
+ file_path = os.path.join(TAG_JSON_DIR, filename)
94
+
95
+ # 检查文件是否存在
96
+ if not os.path.exists(file_path):
97
+ abort(404, description="File not found")
98
+
99
+ try:
100
+ # 打开文件并获取指定字典键的内容
101
+ with open(file_path, 'r', encoding='utf-8') as f:
102
+ data = json.load(f)
103
+
104
+ if key not in data:
105
+ abort(404, description="Key not found in JSON file")
106
+
107
+ return jsonify(data[key]), 200
108
+ except Exception as e:
109
+ return jsonify({"error": str(e)}), 500
110
+
111
+
112
+ # 设置Flask的静态文件目录
113
+ @app.route('/public/TagJson/<filename>')
114
+ def serve_json_file(filename):
115
+ return send_from_directory(TAG_JSON_DIR, filename)
116
+
117
+
118
+ if __name__ == '__main__':
119
+ # 运行Flask应用
120
+ app.run(debug=True)
config.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ baidu_translate_url: 'https://fanyi-api.baidu.com/api/trans/vip/translate'
2
+ tencent_translate_url: "https://tmt.tencentcloudapi.com"
3
+ tencent_secret_id: "tencent_secret_id"
4
+ tencent_secret_key: "tencent_secret_key"
5
+ baidu_translate_credentials:
6
+ - app_id: 'app_id'
7
+ secret_key: 'secret_key'
8
+ - app_id: 'app_id'
9
+ secret_key: 'secret_key'
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Flask==2.3.3
2
+ requests==2.31.0
3
+ beautifulsoup4==4.13.0
4
+ PyYAML==6.0.1
5
+ jieba==0.42.1
6
+ python-Levenshtein==0.21.1
search.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # search.py
2
+ # -*- coding: utf-8 -*-
3
+ from flask import Flask, jsonify, request, Blueprint
4
+ import json
5
+ import re
6
+ import jieba
7
+ import Levenshtein as lev
8
+
9
+ search_blueprint = Blueprint('search', __name__)
10
+
11
+ # 加载 JSON 数据
12
+ def load_json(file_path):
13
+ try:
14
+ with open(file_path, 'r', encoding='utf-8') as file:
15
+ return json.load(file)
16
+ except Exception as e:
17
+ print(f"Error loading JSON file: {e}")
18
+ return None
19
+
20
+ data = load_json('translations_converted.json')
21
+ if data is None:
22
+ raise Exception("Failed to load JSON data. Please check the file path and format.")
23
+
24
+ # 预先对所有键和值进行分词,提升模糊搜索的性能
25
+ segmented_data = {}
26
+ for key, value in data.items():
27
+ segmented_data[key] = {
28
+ "key_words": list(jieba.cut(str(key))),
29
+ "value_words": list(jieba.cut(str(value)))
30
+ }
31
+
32
+ # 正则匹配搜索函数
33
+ def search_keywords(data, query, max_results):
34
+ results = []
35
+ # 构建正则模式,将查询的每个字符之间用 .* 连接
36
+ pattern = '.*'.join(map(re.escape, query))
37
+ regex = re.compile(pattern, re.IGNORECASE)
38
+ for key, value in data.items():
39
+ # 将键和值转换为字符串进行匹配
40
+ if regex.search(str(key)) or regex.search(str(value)):
41
+ results.append({key: value})
42
+ if len(results) >= max_results:
43
+ break
44
+ return results
45
+
46
+ # 精确匹配搜索函数
47
+ def exact_search(data, query, max_results):
48
+ results = []
49
+ query_lower = query.lower()
50
+ for key, value in data.items():
51
+ if str(key).lower() == query_lower or str(value).lower() == query_lower:
52
+ results.append({key: value})
53
+ if len(results) >= max_results:
54
+ break
55
+ return results
56
+
57
+ # 模糊匹配搜索函数
58
+ def fuzzy_search(data, query, max_distance, max_results):
59
+ results = []
60
+ query_words = list(jieba.cut(query))
61
+ for key, value in data.items():
62
+ seg = segmented_data[key]
63
+ key_words = seg["key_words"]
64
+ value_words = seg["value_words"]
65
+ # 当查询中所有词在键或值中均有匹配时,认为匹配成功
66
+ key_match = all(any(lev.distance(qw, kw) <= max_distance for kw in key_words) for qw in query_words)
67
+ value_match = all(any(lev.distance(qw, vw) <= max_distance for vw in value_words) for qw in query_words)
68
+ if key_match or value_match:
69
+ results.append({key: value})
70
+ if len(results) >= max_results:
71
+ break
72
+ return results
73
+
74
+ # 限制返回最大数量不超过300
75
+ def limit_max_results(max_results):
76
+ if max_results is None or max_results > 300:
77
+ return 300
78
+ return max_results
79
+
80
+ @search_blueprint.route('/regular_expression', methods=['GET'])
81
+ def regular_expression_api():
82
+ query = request.args.get('query')
83
+ max_results = request.args.get('max_results', type=int)
84
+ if not query:
85
+ return jsonify({"error": "No query provided"}), 400
86
+ max_results = limit_max_results(max_results)
87
+ results = search_keywords(data, query, max_results)
88
+ return jsonify(results)
89
+
90
+ @search_blueprint.route('/fuzzy_search', methods=['GET'])
91
+ def fuzzy_search_api():
92
+ query = request.args.get('query')
93
+ max_results = request.args.get('max_results', type=int)
94
+ if not query:
95
+ return jsonify({"error": "No query provided"}), 400
96
+ max_results = limit_max_results(max_results)
97
+ # max_distance 可根据需求调整
98
+ results = fuzzy_search(data, query, max_distance=1, max_results=max_results)
99
+ return jsonify(results)
100
+
101
+ @search_blueprint.route('/exact_search', methods=['GET'])
102
+ def exact_search_api():
103
+ query = request.args.get('query')
104
+ max_results = request.args.get('max_results', type=int)
105
+ if not query:
106
+ return jsonify({"error": "No query provided"}), 400
107
+ max_results = limit_max_results(max_results)
108
+ results = exact_search(data, query, max_results)
109
+ return jsonify(results)
tag_extractor.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import hmac
3
+ import json
4
+ import random
5
+ import time
6
+ from datetime import datetime
7
+ import requests
8
+ from flask import Blueprint, request, jsonify
9
+ import yaml
10
+
11
+ # 从yaml文件加载配置
12
+ def load_config(yaml_file):
13
+ with open(yaml_file, 'r') as file:
14
+ return yaml.safe_load(file)
15
+
16
+ config = load_config('config.yaml')
17
+
18
+ # 百度翻译API信息
19
+ BAIDU_TRANSLATE_URL = config['baidu_translate_url']
20
+ BAIDU_TRANSLATE_CREDENTIALS = config['baidu_translate_credentials']
21
+
22
+ # 腾讯翻译API信息
23
+ TENCENT_SECRET_ID = config['tencent_secret_id']
24
+ TENCENT_SECRET_KEY = config['tencent_secret_key']
25
+ TENCENT_TRANSLATE_URL = config['tencent_translate_url']
26
+
27
+ # 用于轮询的索引
28
+ current_index = 0
29
+
30
+ def get_next_credentials():
31
+ """
32
+ 获取下一个 APP_ID 和 SECRET_KEY 的组合,自动轮询。
33
+ """
34
+ global current_index
35
+ credentials = BAIDU_TRANSLATE_CREDENTIALS[current_index]
36
+ current_index = (current_index + 1) % len(BAIDU_TRANSLATE_CREDENTIALS)
37
+ return credentials
38
+
39
+ def sign(key, msg):
40
+ """
41
+ 使用HMAC-SHA256算法生成签名。
42
+ """
43
+ return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
44
+
45
+ def generate_tc3_signature(secret_key, date, service, string_to_sign):
46
+ """
47
+ 生成腾讯云TC3-HMAC-SHA256签名。
48
+ """
49
+ secret_date = sign(("TC3" + secret_key).encode("utf-8"), date)
50
+ secret_service = sign(secret_date, service)
51
+ secret_signing = sign(secret_service, "tc3_request")
52
+ return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
53
+
54
+ def translate_with_tencent(texts, from_lang='auto', to_lang='zh'):
55
+ """
56
+ 使用腾讯翻译API翻译文本列表。
57
+ """
58
+ service = "tmt"
59
+ host = "tmt.tencentcloudapi.com"
60
+ action = "TextTranslate"
61
+ version = "2018-03-21"
62
+ region = "ap-beijing"
63
+ timestamp = int(time.time())
64
+ date = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")
65
+ algorithm = "TC3-HMAC-SHA256" # 在这里定义 algorithm
66
+
67
+ # 构造请求参数
68
+ payload = {
69
+ "SourceText": "\n".join(texts),
70
+ "Source": from_lang,
71
+ "Target": to_lang,
72
+ "ProjectId": 0
73
+ }
74
+ payload_str = json.dumps(payload)
75
+
76
+ # ************* 步骤 1:拼接规范请求串 *************
77
+ http_request_method = "POST"
78
+ canonical_uri = "/"
79
+ canonical_querystring = ""
80
+ ct = "application/json; charset=utf-8"
81
+ canonical_headers = f"content-type:{ct}\nhost:{host}\nx-tc-action:{action.lower()}\n"
82
+ signed_headers = "content-type;host;x-tc-action"
83
+ hashed_request_payload = hashlib.sha256(payload_str.encode("utf-8")).hexdigest()
84
+ canonical_request = (http_request_method + "\n" +
85
+ canonical_uri + "\n" +
86
+ canonical_querystring + "\n" +
87
+ canonical_headers + "\n" +
88
+ signed_headers + "\n" +
89
+ hashed_request_payload)
90
+
91
+ # ************* 步骤 2:拼接待签名字符串 *************
92
+ credential_scope = date + "/" + service + "/" + "tc3_request"
93
+ hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
94
+ string_to_sign = (algorithm + "\n" +
95
+ str(timestamp) + "\n" +
96
+ credential_scope + "\n" +
97
+ hashed_canonical_request)
98
+
99
+ # ************* 步骤 3:计算签名 *************
100
+ signature = generate_tc3_signature(TENCENT_SECRET_KEY, date, service, string_to_sign)
101
+
102
+ # ************* 步骤 4:拼接 Authorization *************
103
+ authorization = (algorithm + " " +
104
+ "Credential=" + TENCENT_SECRET_ID + "/" + credential_scope + ", " +
105
+ "SignedHeaders=" + signed_headers + ", " +
106
+ "Signature=" + signature)
107
+
108
+ # ************* 步骤 5:构造并发起请求 *************
109
+ headers = {
110
+ "Authorization": authorization,
111
+ "Content-Type": ct,
112
+ "Host": host,
113
+ "X-TC-Action": action,
114
+ "X-TC-Timestamp": str(timestamp),
115
+ "X-TC-Version": version,
116
+ "X-TC-Region": region
117
+ }
118
+
119
+ try:
120
+ response = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str)
121
+ response.raise_for_status()
122
+ result = response.json()
123
+ if "Response" in result and "TargetText" in result["Response"]:
124
+ return result["Response"]["TargetText"].split("\n")
125
+ else:
126
+ return None
127
+ except Exception as e:
128
+ print(f"腾讯翻译API请求失败: {e}")
129
+ return None
130
+
131
+ def translate_with_baidu(texts, from_lang='auto', to_lang='zh'):
132
+ """
133
+ 使用百度翻译API翻译文本列表。
134
+ """
135
+ credentials = get_next_credentials()
136
+ app_id = credentials['app_id']
137
+ secret_key = credentials['secret_key']
138
+
139
+ salt = random.randint(32768, 65536)
140
+ query = '\n'.join(texts)
141
+ sign_str = app_id + query + str(salt) + secret_key
142
+ sign = hashlib.md5(sign_str.encode('utf-8')).hexdigest()
143
+
144
+ params = {
145
+ 'q': query,
146
+ 'from': from_lang,
147
+ 'to': to_lang,
148
+ 'appid': app_id,
149
+ 'salt': salt,
150
+ 'sign': sign
151
+ }
152
+
153
+ try:
154
+ response = requests.get(BAIDU_TRANSLATE_URL, params=params)
155
+ response.raise_for_status()
156
+ result = response.json()
157
+ if 'trans_result' in result:
158
+ return [item['dst'] for item in result['trans_result']]
159
+ else:
160
+ return None
161
+ except Exception as e:
162
+ print(f"百度翻译API请求失败: {e}")
163
+ return None
164
+
165
+ def translate_texts(texts, from_lang='auto', to_lang='zh'):
166
+ """
167
+ 优先使用腾讯翻译API翻译文本列表,失败后使用百度翻译API。
168
+ 如果两者都失败,则返回未翻译的原始文本。
169
+ """
170
+ # 优先使用腾讯翻译API
171
+ translated_texts = translate_with_tencent(texts, from_lang, to_lang)
172
+ if translated_texts is not None:
173
+ return translated_texts
174
+
175
+ # 腾讯翻译失败后使用百度翻译API
176
+ translated_texts = translate_with_baidu(texts, from_lang, to_lang)
177
+ if translated_texts is not None:
178
+ return translated_texts
179
+
180
+ # 两者都失败,返回原始文本
181
+ return texts
182
+
183
+ # 创建蓝图
184
+ tag_extractorbp = Blueprint('tag_extractor', __name__)
185
+
186
+ # 移除原来的 extract_tags 接口,因为现在前端直接获取和解析HTML
187
+
188
+ @tag_extractorbp.route('/Tagtranslate', methods=['POST'])
189
+ def translate():
190
+ """
191
+ 翻译文本列表接口
192
+ 接收格式: {"texts": ["text1", "text2", ...]}
193
+ 返回格式: {"translated_texts": ["译文1", "译文2", ...]}
194
+ """
195
+ try:
196
+ data = request.get_json()
197
+ if not data:
198
+ return jsonify({"error": "请求体为空"}), 400
199
+
200
+ texts = data.get('texts')
201
+ if not texts:
202
+ return jsonify({"error": "缺少texts参数"}), 400
203
+
204
+ if not isinstance(texts, list):
205
+ return jsonify({"error": "texts参数必须是数组"}), 400
206
+
207
+ if len(texts) == 0:
208
+ return jsonify({"translated_texts": []}), 200
209
+
210
+ # 过滤空字符串
211
+ valid_texts = [text.strip() for text in texts if text and text.strip()]
212
+ if len(valid_texts) == 0:
213
+ return jsonify({"translated_texts": []}), 200
214
+
215
+ print(f"开始翻译 {len(valid_texts)} 个文本...")
216
+ translated_texts = translate_texts(valid_texts)
217
+ print(f"翻译完成")
218
+
219
+ return jsonify({"translated_texts": translated_texts})
220
+
221
+ except Exception as e:
222
+ print(f"翻译接口错误: {e}")
223
+ return jsonify({"error": f"服务器内部错误: {str(e)}"}), 500
224
+
225
+ @tag_extractorbp.route('/translate_batch', methods=['POST'])
226
+ def translate_batch():
227
+ """
228
+ 批量翻译接口,支持更多参数
229
+ 接收格式: {
230
+ "texts": ["text1", "text2", ...],
231
+ "from_lang": "auto", // 可选,默认auto
232
+ "to_lang": "zh" // 可选,默认zh
233
+ }
234
+ 返回格式: {"translated_texts": ["译文1", "译文2", ...]}
235
+ """
236
+ try:
237
+ data = request.get_json()
238
+ if not data:
239
+ return jsonify({"error": "请求体为空"}), 400
240
+
241
+ texts = data.get('texts')
242
+ if not texts:
243
+ return jsonify({"error": "缺少texts参数"}), 400
244
+
245
+ if not isinstance(texts, list):
246
+ return jsonify({"error": "texts参数必须是数组"}), 400
247
+
248
+ from_lang = data.get('from_lang', 'auto')
249
+ to_lang = data.get('to_lang', 'zh')
250
+
251
+ if len(texts) == 0:
252
+ return jsonify({"translated_texts": []}), 200
253
+
254
+ # 过滤空字符串
255
+ valid_texts = [text.strip() for text in texts if text and text.strip()]
256
+ if len(valid_texts) == 0:
257
+ return jsonify({"translated_texts": []}), 200
258
+
259
+ print(f"开始批量翻译 {len(valid_texts)} 个文本 ({from_lang} -> {to_lang})...")
260
+ translated_texts = translate_texts(valid_texts, from_lang, to_lang)
261
+ print(f"批量翻译完成")
262
+
263
+ return jsonify({
264
+ "translated_texts": translated_texts,
265
+ "from_lang": from_lang,
266
+ "to_lang": to_lang,
267
+ "count": len(translated_texts)
268
+ })
269
+
270
+ except Exception as e:
271
+ print(f"批量翻译接口错误: {e}")
272
+ return jsonify({"error": f"服务器内部错误: {str(e)}"}), 500
translations_converted.json ADDED
The diff for this file is too large to render. See raw diff