Spaces:
Running
Running
Upload 8 files
Browse files- .py +70 -0
- Dockerfile +12 -0
- app.py +120 -0
- config.yaml +9 -0
- requirements.txt +6 -0
- search.py +109 -0
- tag_extractor.py +272 -0
- translations_converted.json +0 -0
.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import argparse
|
| 4 |
+
|
| 5 |
+
def extract_pairs(obj):
|
| 6 |
+
"""
|
| 7 |
+
递归提取 obj 中所有键值对(仅当键和值均为字符串时)。
|
| 8 |
+
返回一个字典,包含所有提取到的键值对。
|
| 9 |
+
"""
|
| 10 |
+
pairs = {}
|
| 11 |
+
if isinstance(obj, dict):
|
| 12 |
+
for key, value in obj.items():
|
| 13 |
+
# 如果键和值均为字符串,则记录该对
|
| 14 |
+
if isinstance(key, str) and isinstance(value, str):
|
| 15 |
+
if key in pairs and pairs[key] != value:
|
| 16 |
+
print(f"警告:键 '{key}' 重复,但值不同:'{pairs[key]}' 与 '{value}'。保留首次出现的值。")
|
| 17 |
+
else:
|
| 18 |
+
pairs[key] = value
|
| 19 |
+
# 无论 value 是否为字符串,都递归检查其内部结构
|
| 20 |
+
sub_pairs = extract_pairs(value)
|
| 21 |
+
for sub_key, sub_value in sub_pairs.items():
|
| 22 |
+
if sub_key in pairs and pairs[sub_key] != sub_value:
|
| 23 |
+
print(f"警告:键 '{sub_key}' 重复,但值不同:'{pairs[sub_key]}' 与 '{sub_value}'。保留首次出现的值。")
|
| 24 |
+
else:
|
| 25 |
+
pairs[sub_key] = sub_value
|
| 26 |
+
elif isinstance(obj, list):
|
| 27 |
+
for item in obj:
|
| 28 |
+
sub_pairs = extract_pairs(item)
|
| 29 |
+
for sub_key, sub_value in sub_pairs.items():
|
| 30 |
+
if sub_key in pairs and pairs[sub_key] != sub_value:
|
| 31 |
+
print(f"警告:键 '{sub_key}' 重复,但值不同:'{pairs[sub_key]}' 与 '{sub_value}'。保留首次出现的值。")
|
| 32 |
+
else:
|
| 33 |
+
pairs[sub_key] = sub_value
|
| 34 |
+
return pairs
|
| 35 |
+
|
| 36 |
+
def merge_json_pairs(directory, output_file):
|
| 37 |
+
"""
|
| 38 |
+
遍历指定目录下所有 JSON 文件,
|
| 39 |
+
提取每个文件中所有层级的键值对(仅当键和值均为字符串时),
|
| 40 |
+
并将它们合并到一个平面字典中,最后写入 output_file。
|
| 41 |
+
"""
|
| 42 |
+
merged_pairs = {}
|
| 43 |
+
|
| 44 |
+
for filename in os.listdir(directory):
|
| 45 |
+
if filename.endswith(".json"):
|
| 46 |
+
file_path = os.path.join(directory, filename)
|
| 47 |
+
try:
|
| 48 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 49 |
+
data = json.load(f)
|
| 50 |
+
file_pairs = extract_pairs(data)
|
| 51 |
+
for key, value in file_pairs.items():
|
| 52 |
+
if key in merged_pairs and merged_pairs[key] != value:
|
| 53 |
+
print(f"警告:文件 '{filename}' 中键 '{key}' 的值 '{value}' 与之前值 '{merged_pairs[key]}' 不同,保留首次出现的值。")
|
| 54 |
+
else:
|
| 55 |
+
merged_pairs[key] = value
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"读取文件 '{filename}' 时发生错误:{e}")
|
| 58 |
+
|
| 59 |
+
try:
|
| 60 |
+
with open(output_file, "w", encoding="utf-8") as out_f:
|
| 61 |
+
json.dump(merged_pairs, out_f, ensure_ascii=False, indent=4)
|
| 62 |
+
print(f"合并后的键值对已写入:{output_file}")
|
| 63 |
+
except Exception as e:
|
| 64 |
+
print(f"写入输出文件 '{output_file}' 时发生错误:{e}")
|
| 65 |
+
|
| 66 |
+
if __name__ == '__main__':
|
| 67 |
+
directory = 'public\TagJson'
|
| 68 |
+
output_file = 'TagJson.json'
|
| 69 |
+
|
| 70 |
+
merge_json_pairs(directory, output_file)
|
Dockerfile
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
RUN pip install Flask==2.3.3 requests==2.31.0 beautifulsoup4==4.13.0 PyYAML==6.0.1 jieba==0.42.1 python-Levenshtein==0.21.1
|
| 6 |
+
|
| 7 |
+
COPY . .
|
| 8 |
+
|
| 9 |
+
ENV PORT=3000
|
| 10 |
+
EXPOSE 3000
|
| 11 |
+
|
| 12 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import requests
|
| 4 |
+
from flask import Flask, jsonify, send_from_directory, request, abort
|
| 5 |
+
from flask_cors import CORS
|
| 6 |
+
|
| 7 |
+
from tag_extractor import tag_extractorbp
|
| 8 |
+
from search import search_blueprint
|
| 9 |
+
|
| 10 |
+
app = Flask(__name__)
|
| 11 |
+
CORS(app) # 允许所有跨域请求
|
| 12 |
+
TAG_JSON_DIR = os.path.join(os.getcwd(), 'public', 'TagJson')
|
| 13 |
+
|
| 14 |
+
TURNSTILE_SECRET_KEY = "xxx"
|
| 15 |
+
|
| 16 |
+
app.register_blueprint(search_blueprint, url_prefix='/search')
|
| 17 |
+
app.register_blueprint(tag_extractorbp, url_prefix='/api')
|
| 18 |
+
|
| 19 |
+
@app.route('/')
|
| 20 |
+
def index():
|
| 21 |
+
return send_from_directory('static/frontend', 'index.html')
|
| 22 |
+
|
| 23 |
+
# 用于Turnstile验证的新接口
|
| 24 |
+
@app.route('/api/verify-turnstile', methods=['POST'])
|
| 25 |
+
def verify_turnstile():
|
| 26 |
+
data = request.get_json()
|
| 27 |
+
token = data.get('token')
|
| 28 |
+
|
| 29 |
+
if not token:
|
| 30 |
+
return jsonify({"success": False, "message": "缺少Token。"}), 400
|
| 31 |
+
|
| 32 |
+
# 使用Cloudflare进行验证
|
| 33 |
+
try:
|
| 34 |
+
response = requests.post(
|
| 35 |
+
'https://challenges.cloudflare.com/turnstile/v0/siteverify',
|
| 36 |
+
data={
|
| 37 |
+
'secret': TURNSTILE_SECRET_KEY,
|
| 38 |
+
'response': token,
|
| 39 |
+
}
|
| 40 |
+
)
|
| 41 |
+
response.raise_for_status() # 如果请求失败 (状态码 4xx or 5xx), 抛出异常
|
| 42 |
+
result = response.json()
|
| 43 |
+
|
| 44 |
+
if result.get('success'):
|
| 45 |
+
return jsonify({"success": True, "message": "验证成功。"}), 200
|
| 46 |
+
else:
|
| 47 |
+
error_codes = result.get('error-codes', [])
|
| 48 |
+
return jsonify({"success": False, "message": "验证失败。", "error-codes": error_codes}), 400
|
| 49 |
+
|
| 50 |
+
except requests.exceptions.RequestException as e:
|
| 51 |
+
return jsonify({"success": False, "message": f"连接验证服务器时出错: {e}"}), 500
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# 列出TagJson目录中的所有JSON文件
|
| 55 |
+
@app.route('/api/json-files', methods=['GET'])
|
| 56 |
+
def get_json_files():
|
| 57 |
+
try:
|
| 58 |
+
# 获取目录下的所有文件名
|
| 59 |
+
files = [f for f in os.listdir(TAG_JSON_DIR) if f.endswith('.json')]
|
| 60 |
+
return jsonify(files), 200
|
| 61 |
+
except Exception as e:
|
| 62 |
+
return jsonify({"error": str(e)}), 500
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# 获取指定JSON文件中的字典键
|
| 66 |
+
@app.route('/api/json-files/<filename>/keys', methods=['GET'])
|
| 67 |
+
def get_json_file_keys(filename):
|
| 68 |
+
if not filename.endswith('.json'):
|
| 69 |
+
abort(400, description="Invalid file extension")
|
| 70 |
+
|
| 71 |
+
file_path = os.path.join(TAG_JSON_DIR, filename)
|
| 72 |
+
|
| 73 |
+
# 检查文件是否存在
|
| 74 |
+
if not os.path.exists(file_path):
|
| 75 |
+
abort(404, description="File not found")
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
# 打开文件并提取字典的键
|
| 79 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 80 |
+
data = json.load(f)
|
| 81 |
+
keys = list(data.keys()) # 获取字典的所有键
|
| 82 |
+
return jsonify(keys), 200
|
| 83 |
+
except Exception as e:
|
| 84 |
+
return jsonify({"error": str(e)}), 500
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# 获取指定JSON文件和字典键的内容
|
| 88 |
+
@app.route('/api/json-files/<filename>/keys/<key>', methods=['GET'])
|
| 89 |
+
def get_json_key_content(filename, key):
|
| 90 |
+
if not filename.endswith('.json'):
|
| 91 |
+
abort(400, description="Invalid file extension")
|
| 92 |
+
|
| 93 |
+
file_path = os.path.join(TAG_JSON_DIR, filename)
|
| 94 |
+
|
| 95 |
+
# 检查文件是否存在
|
| 96 |
+
if not os.path.exists(file_path):
|
| 97 |
+
abort(404, description="File not found")
|
| 98 |
+
|
| 99 |
+
try:
|
| 100 |
+
# 打开文件并获取指定字典键的内容
|
| 101 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 102 |
+
data = json.load(f)
|
| 103 |
+
|
| 104 |
+
if key not in data:
|
| 105 |
+
abort(404, description="Key not found in JSON file")
|
| 106 |
+
|
| 107 |
+
return jsonify(data[key]), 200
|
| 108 |
+
except Exception as e:
|
| 109 |
+
return jsonify({"error": str(e)}), 500
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# 设置Flask的静态文件目录
|
| 113 |
+
@app.route('/public/TagJson/<filename>')
|
| 114 |
+
def serve_json_file(filename):
|
| 115 |
+
return send_from_directory(TAG_JSON_DIR, filename)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
if __name__ == '__main__':
|
| 119 |
+
# 运行Flask应用
|
| 120 |
+
app.run(debug=True)
|
config.yaml
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
baidu_translate_url: 'https://fanyi-api.baidu.com/api/trans/vip/translate'
|
| 2 |
+
tencent_translate_url: "https://tmt.tencentcloudapi.com"
|
| 3 |
+
tencent_secret_id: "tencent_secret_id"
|
| 4 |
+
tencent_secret_key: "tencent_secret_key"
|
| 5 |
+
baidu_translate_credentials:
|
| 6 |
+
- app_id: 'app_id'
|
| 7 |
+
secret_key: 'secret_key'
|
| 8 |
+
- app_id: 'app_id'
|
| 9 |
+
secret_key: 'secret_key'
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==2.3.3
|
| 2 |
+
requests==2.31.0
|
| 3 |
+
beautifulsoup4==4.13.0
|
| 4 |
+
PyYAML==6.0.1
|
| 5 |
+
jieba==0.42.1
|
| 6 |
+
python-Levenshtein==0.21.1
|
search.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# search.py
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
from flask import Flask, jsonify, request, Blueprint
|
| 4 |
+
import json
|
| 5 |
+
import re
|
| 6 |
+
import jieba
|
| 7 |
+
import Levenshtein as lev
|
| 8 |
+
|
| 9 |
+
search_blueprint = Blueprint('search', __name__)
|
| 10 |
+
|
| 11 |
+
# 加载 JSON 数据
|
| 12 |
+
def load_json(file_path):
|
| 13 |
+
try:
|
| 14 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
| 15 |
+
return json.load(file)
|
| 16 |
+
except Exception as e:
|
| 17 |
+
print(f"Error loading JSON file: {e}")
|
| 18 |
+
return None
|
| 19 |
+
|
| 20 |
+
data = load_json('translations_converted.json')
|
| 21 |
+
if data is None:
|
| 22 |
+
raise Exception("Failed to load JSON data. Please check the file path and format.")
|
| 23 |
+
|
| 24 |
+
# 预先对所有键和值进行分词,提升模糊搜索的性能
|
| 25 |
+
segmented_data = {}
|
| 26 |
+
for key, value in data.items():
|
| 27 |
+
segmented_data[key] = {
|
| 28 |
+
"key_words": list(jieba.cut(str(key))),
|
| 29 |
+
"value_words": list(jieba.cut(str(value)))
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# 正则匹配搜索函数
|
| 33 |
+
def search_keywords(data, query, max_results):
|
| 34 |
+
results = []
|
| 35 |
+
# 构建正则模式,将查询的每个字符之间用 .* 连接
|
| 36 |
+
pattern = '.*'.join(map(re.escape, query))
|
| 37 |
+
regex = re.compile(pattern, re.IGNORECASE)
|
| 38 |
+
for key, value in data.items():
|
| 39 |
+
# 将键和值转换为字符串进行匹配
|
| 40 |
+
if regex.search(str(key)) or regex.search(str(value)):
|
| 41 |
+
results.append({key: value})
|
| 42 |
+
if len(results) >= max_results:
|
| 43 |
+
break
|
| 44 |
+
return results
|
| 45 |
+
|
| 46 |
+
# 精确匹配搜索函数
|
| 47 |
+
def exact_search(data, query, max_results):
|
| 48 |
+
results = []
|
| 49 |
+
query_lower = query.lower()
|
| 50 |
+
for key, value in data.items():
|
| 51 |
+
if str(key).lower() == query_lower or str(value).lower() == query_lower:
|
| 52 |
+
results.append({key: value})
|
| 53 |
+
if len(results) >= max_results:
|
| 54 |
+
break
|
| 55 |
+
return results
|
| 56 |
+
|
| 57 |
+
# 模糊匹配搜索函数
|
| 58 |
+
def fuzzy_search(data, query, max_distance, max_results):
|
| 59 |
+
results = []
|
| 60 |
+
query_words = list(jieba.cut(query))
|
| 61 |
+
for key, value in data.items():
|
| 62 |
+
seg = segmented_data[key]
|
| 63 |
+
key_words = seg["key_words"]
|
| 64 |
+
value_words = seg["value_words"]
|
| 65 |
+
# 当查询中所有词在键或值中均有匹配时,认为匹配成功
|
| 66 |
+
key_match = all(any(lev.distance(qw, kw) <= max_distance for kw in key_words) for qw in query_words)
|
| 67 |
+
value_match = all(any(lev.distance(qw, vw) <= max_distance for vw in value_words) for qw in query_words)
|
| 68 |
+
if key_match or value_match:
|
| 69 |
+
results.append({key: value})
|
| 70 |
+
if len(results) >= max_results:
|
| 71 |
+
break
|
| 72 |
+
return results
|
| 73 |
+
|
| 74 |
+
# 限制返回最大数量不超过300
|
| 75 |
+
def limit_max_results(max_results):
|
| 76 |
+
if max_results is None or max_results > 300:
|
| 77 |
+
return 300
|
| 78 |
+
return max_results
|
| 79 |
+
|
| 80 |
+
@search_blueprint.route('/regular_expression', methods=['GET'])
|
| 81 |
+
def regular_expression_api():
|
| 82 |
+
query = request.args.get('query')
|
| 83 |
+
max_results = request.args.get('max_results', type=int)
|
| 84 |
+
if not query:
|
| 85 |
+
return jsonify({"error": "No query provided"}), 400
|
| 86 |
+
max_results = limit_max_results(max_results)
|
| 87 |
+
results = search_keywords(data, query, max_results)
|
| 88 |
+
return jsonify(results)
|
| 89 |
+
|
| 90 |
+
@search_blueprint.route('/fuzzy_search', methods=['GET'])
|
| 91 |
+
def fuzzy_search_api():
|
| 92 |
+
query = request.args.get('query')
|
| 93 |
+
max_results = request.args.get('max_results', type=int)
|
| 94 |
+
if not query:
|
| 95 |
+
return jsonify({"error": "No query provided"}), 400
|
| 96 |
+
max_results = limit_max_results(max_results)
|
| 97 |
+
# max_distance 可根据需求调整
|
| 98 |
+
results = fuzzy_search(data, query, max_distance=1, max_results=max_results)
|
| 99 |
+
return jsonify(results)
|
| 100 |
+
|
| 101 |
+
@search_blueprint.route('/exact_search', methods=['GET'])
|
| 102 |
+
def exact_search_api():
|
| 103 |
+
query = request.args.get('query')
|
| 104 |
+
max_results = request.args.get('max_results', type=int)
|
| 105 |
+
if not query:
|
| 106 |
+
return jsonify({"error": "No query provided"}), 400
|
| 107 |
+
max_results = limit_max_results(max_results)
|
| 108 |
+
results = exact_search(data, query, max_results)
|
| 109 |
+
return jsonify(results)
|
tag_extractor.py
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import hashlib
|
| 2 |
+
import hmac
|
| 3 |
+
import json
|
| 4 |
+
import random
|
| 5 |
+
import time
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
import requests
|
| 8 |
+
from flask import Blueprint, request, jsonify
|
| 9 |
+
import yaml
|
| 10 |
+
|
| 11 |
+
# 从yaml文件加载配置
|
| 12 |
+
def load_config(yaml_file):
|
| 13 |
+
with open(yaml_file, 'r') as file:
|
| 14 |
+
return yaml.safe_load(file)
|
| 15 |
+
|
| 16 |
+
config = load_config('config.yaml')
|
| 17 |
+
|
| 18 |
+
# 百度翻译API信息
|
| 19 |
+
BAIDU_TRANSLATE_URL = config['baidu_translate_url']
|
| 20 |
+
BAIDU_TRANSLATE_CREDENTIALS = config['baidu_translate_credentials']
|
| 21 |
+
|
| 22 |
+
# 腾讯翻译API信息
|
| 23 |
+
TENCENT_SECRET_ID = config['tencent_secret_id']
|
| 24 |
+
TENCENT_SECRET_KEY = config['tencent_secret_key']
|
| 25 |
+
TENCENT_TRANSLATE_URL = config['tencent_translate_url']
|
| 26 |
+
|
| 27 |
+
# 用于轮询的索引
|
| 28 |
+
current_index = 0
|
| 29 |
+
|
| 30 |
+
def get_next_credentials():
|
| 31 |
+
"""
|
| 32 |
+
获取下一个 APP_ID 和 SECRET_KEY 的组合,自动轮询。
|
| 33 |
+
"""
|
| 34 |
+
global current_index
|
| 35 |
+
credentials = BAIDU_TRANSLATE_CREDENTIALS[current_index]
|
| 36 |
+
current_index = (current_index + 1) % len(BAIDU_TRANSLATE_CREDENTIALS)
|
| 37 |
+
return credentials
|
| 38 |
+
|
| 39 |
+
def sign(key, msg):
|
| 40 |
+
"""
|
| 41 |
+
使用HMAC-SHA256算法生成签名。
|
| 42 |
+
"""
|
| 43 |
+
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
|
| 44 |
+
|
| 45 |
+
def generate_tc3_signature(secret_key, date, service, string_to_sign):
|
| 46 |
+
"""
|
| 47 |
+
生成腾讯云TC3-HMAC-SHA256签名。
|
| 48 |
+
"""
|
| 49 |
+
secret_date = sign(("TC3" + secret_key).encode("utf-8"), date)
|
| 50 |
+
secret_service = sign(secret_date, service)
|
| 51 |
+
secret_signing = sign(secret_service, "tc3_request")
|
| 52 |
+
return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
| 53 |
+
|
| 54 |
+
def translate_with_tencent(texts, from_lang='auto', to_lang='zh'):
|
| 55 |
+
"""
|
| 56 |
+
使用腾讯翻译API翻译文本列表。
|
| 57 |
+
"""
|
| 58 |
+
service = "tmt"
|
| 59 |
+
host = "tmt.tencentcloudapi.com"
|
| 60 |
+
action = "TextTranslate"
|
| 61 |
+
version = "2018-03-21"
|
| 62 |
+
region = "ap-beijing"
|
| 63 |
+
timestamp = int(time.time())
|
| 64 |
+
date = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")
|
| 65 |
+
algorithm = "TC3-HMAC-SHA256" # 在这里定义 algorithm
|
| 66 |
+
|
| 67 |
+
# 构造请求参数
|
| 68 |
+
payload = {
|
| 69 |
+
"SourceText": "\n".join(texts),
|
| 70 |
+
"Source": from_lang,
|
| 71 |
+
"Target": to_lang,
|
| 72 |
+
"ProjectId": 0
|
| 73 |
+
}
|
| 74 |
+
payload_str = json.dumps(payload)
|
| 75 |
+
|
| 76 |
+
# ************* 步骤 1:拼接规范请求串 *************
|
| 77 |
+
http_request_method = "POST"
|
| 78 |
+
canonical_uri = "/"
|
| 79 |
+
canonical_querystring = ""
|
| 80 |
+
ct = "application/json; charset=utf-8"
|
| 81 |
+
canonical_headers = f"content-type:{ct}\nhost:{host}\nx-tc-action:{action.lower()}\n"
|
| 82 |
+
signed_headers = "content-type;host;x-tc-action"
|
| 83 |
+
hashed_request_payload = hashlib.sha256(payload_str.encode("utf-8")).hexdigest()
|
| 84 |
+
canonical_request = (http_request_method + "\n" +
|
| 85 |
+
canonical_uri + "\n" +
|
| 86 |
+
canonical_querystring + "\n" +
|
| 87 |
+
canonical_headers + "\n" +
|
| 88 |
+
signed_headers + "\n" +
|
| 89 |
+
hashed_request_payload)
|
| 90 |
+
|
| 91 |
+
# ************* 步骤 2:拼接待签名字符串 *************
|
| 92 |
+
credential_scope = date + "/" + service + "/" + "tc3_request"
|
| 93 |
+
hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
|
| 94 |
+
string_to_sign = (algorithm + "\n" +
|
| 95 |
+
str(timestamp) + "\n" +
|
| 96 |
+
credential_scope + "\n" +
|
| 97 |
+
hashed_canonical_request)
|
| 98 |
+
|
| 99 |
+
# ************* 步骤 3:计算签名 *************
|
| 100 |
+
signature = generate_tc3_signature(TENCENT_SECRET_KEY, date, service, string_to_sign)
|
| 101 |
+
|
| 102 |
+
# ************* 步骤 4:拼接 Authorization *************
|
| 103 |
+
authorization = (algorithm + " " +
|
| 104 |
+
"Credential=" + TENCENT_SECRET_ID + "/" + credential_scope + ", " +
|
| 105 |
+
"SignedHeaders=" + signed_headers + ", " +
|
| 106 |
+
"Signature=" + signature)
|
| 107 |
+
|
| 108 |
+
# ************* 步骤 5:构造并发起请求 *************
|
| 109 |
+
headers = {
|
| 110 |
+
"Authorization": authorization,
|
| 111 |
+
"Content-Type": ct,
|
| 112 |
+
"Host": host,
|
| 113 |
+
"X-TC-Action": action,
|
| 114 |
+
"X-TC-Timestamp": str(timestamp),
|
| 115 |
+
"X-TC-Version": version,
|
| 116 |
+
"X-TC-Region": region
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
try:
|
| 120 |
+
response = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str)
|
| 121 |
+
response.raise_for_status()
|
| 122 |
+
result = response.json()
|
| 123 |
+
if "Response" in result and "TargetText" in result["Response"]:
|
| 124 |
+
return result["Response"]["TargetText"].split("\n")
|
| 125 |
+
else:
|
| 126 |
+
return None
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f"腾讯翻译API请求失败: {e}")
|
| 129 |
+
return None
|
| 130 |
+
|
| 131 |
+
def translate_with_baidu(texts, from_lang='auto', to_lang='zh'):
|
| 132 |
+
"""
|
| 133 |
+
使用百度翻译API翻译文本列表。
|
| 134 |
+
"""
|
| 135 |
+
credentials = get_next_credentials()
|
| 136 |
+
app_id = credentials['app_id']
|
| 137 |
+
secret_key = credentials['secret_key']
|
| 138 |
+
|
| 139 |
+
salt = random.randint(32768, 65536)
|
| 140 |
+
query = '\n'.join(texts)
|
| 141 |
+
sign_str = app_id + query + str(salt) + secret_key
|
| 142 |
+
sign = hashlib.md5(sign_str.encode('utf-8')).hexdigest()
|
| 143 |
+
|
| 144 |
+
params = {
|
| 145 |
+
'q': query,
|
| 146 |
+
'from': from_lang,
|
| 147 |
+
'to': to_lang,
|
| 148 |
+
'appid': app_id,
|
| 149 |
+
'salt': salt,
|
| 150 |
+
'sign': sign
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
try:
|
| 154 |
+
response = requests.get(BAIDU_TRANSLATE_URL, params=params)
|
| 155 |
+
response.raise_for_status()
|
| 156 |
+
result = response.json()
|
| 157 |
+
if 'trans_result' in result:
|
| 158 |
+
return [item['dst'] for item in result['trans_result']]
|
| 159 |
+
else:
|
| 160 |
+
return None
|
| 161 |
+
except Exception as e:
|
| 162 |
+
print(f"百度翻译API请求失败: {e}")
|
| 163 |
+
return None
|
| 164 |
+
|
| 165 |
+
def translate_texts(texts, from_lang='auto', to_lang='zh'):
|
| 166 |
+
"""
|
| 167 |
+
优先使用腾讯翻译API翻译文本列表,失败后使用百度翻译API。
|
| 168 |
+
如果两者都失败,则返回未翻译的原始文本。
|
| 169 |
+
"""
|
| 170 |
+
# 优先使用腾讯翻译API
|
| 171 |
+
translated_texts = translate_with_tencent(texts, from_lang, to_lang)
|
| 172 |
+
if translated_texts is not None:
|
| 173 |
+
return translated_texts
|
| 174 |
+
|
| 175 |
+
# 腾讯翻译失败后使用百度翻译API
|
| 176 |
+
translated_texts = translate_with_baidu(texts, from_lang, to_lang)
|
| 177 |
+
if translated_texts is not None:
|
| 178 |
+
return translated_texts
|
| 179 |
+
|
| 180 |
+
# 两者都失败,返回原始文本
|
| 181 |
+
return texts
|
| 182 |
+
|
| 183 |
+
# 创建蓝图
|
| 184 |
+
tag_extractorbp = Blueprint('tag_extractor', __name__)
|
| 185 |
+
|
| 186 |
+
# 移除原来的 extract_tags 接口,因为现在前端直接获取和解析HTML
|
| 187 |
+
|
| 188 |
+
@tag_extractorbp.route('/Tagtranslate', methods=['POST'])
|
| 189 |
+
def translate():
|
| 190 |
+
"""
|
| 191 |
+
翻译文本列表接口
|
| 192 |
+
接收格式: {"texts": ["text1", "text2", ...]}
|
| 193 |
+
返回格式: {"translated_texts": ["译文1", "译文2", ...]}
|
| 194 |
+
"""
|
| 195 |
+
try:
|
| 196 |
+
data = request.get_json()
|
| 197 |
+
if not data:
|
| 198 |
+
return jsonify({"error": "请求体为空"}), 400
|
| 199 |
+
|
| 200 |
+
texts = data.get('texts')
|
| 201 |
+
if not texts:
|
| 202 |
+
return jsonify({"error": "缺少texts参数"}), 400
|
| 203 |
+
|
| 204 |
+
if not isinstance(texts, list):
|
| 205 |
+
return jsonify({"error": "texts参数必须是数组"}), 400
|
| 206 |
+
|
| 207 |
+
if len(texts) == 0:
|
| 208 |
+
return jsonify({"translated_texts": []}), 200
|
| 209 |
+
|
| 210 |
+
# 过滤空字符串
|
| 211 |
+
valid_texts = [text.strip() for text in texts if text and text.strip()]
|
| 212 |
+
if len(valid_texts) == 0:
|
| 213 |
+
return jsonify({"translated_texts": []}), 200
|
| 214 |
+
|
| 215 |
+
print(f"开始翻译 {len(valid_texts)} 个文本...")
|
| 216 |
+
translated_texts = translate_texts(valid_texts)
|
| 217 |
+
print(f"翻译完成")
|
| 218 |
+
|
| 219 |
+
return jsonify({"translated_texts": translated_texts})
|
| 220 |
+
|
| 221 |
+
except Exception as e:
|
| 222 |
+
print(f"翻译接口错误: {e}")
|
| 223 |
+
return jsonify({"error": f"服务器内部错误: {str(e)}"}), 500
|
| 224 |
+
|
| 225 |
+
@tag_extractorbp.route('/translate_batch', methods=['POST'])
|
| 226 |
+
def translate_batch():
|
| 227 |
+
"""
|
| 228 |
+
批量翻译接口,支持更多参数
|
| 229 |
+
接收格式: {
|
| 230 |
+
"texts": ["text1", "text2", ...],
|
| 231 |
+
"from_lang": "auto", // 可选,默认auto
|
| 232 |
+
"to_lang": "zh" // 可选,默认zh
|
| 233 |
+
}
|
| 234 |
+
返回格式: {"translated_texts": ["译文1", "译文2", ...]}
|
| 235 |
+
"""
|
| 236 |
+
try:
|
| 237 |
+
data = request.get_json()
|
| 238 |
+
if not data:
|
| 239 |
+
return jsonify({"error": "请求体为空"}), 400
|
| 240 |
+
|
| 241 |
+
texts = data.get('texts')
|
| 242 |
+
if not texts:
|
| 243 |
+
return jsonify({"error": "缺少texts参数"}), 400
|
| 244 |
+
|
| 245 |
+
if not isinstance(texts, list):
|
| 246 |
+
return jsonify({"error": "texts参数必须是数组"}), 400
|
| 247 |
+
|
| 248 |
+
from_lang = data.get('from_lang', 'auto')
|
| 249 |
+
to_lang = data.get('to_lang', 'zh')
|
| 250 |
+
|
| 251 |
+
if len(texts) == 0:
|
| 252 |
+
return jsonify({"translated_texts": []}), 200
|
| 253 |
+
|
| 254 |
+
# 过滤空字符串
|
| 255 |
+
valid_texts = [text.strip() for text in texts if text and text.strip()]
|
| 256 |
+
if len(valid_texts) == 0:
|
| 257 |
+
return jsonify({"translated_texts": []}), 200
|
| 258 |
+
|
| 259 |
+
print(f"开始批量翻译 {len(valid_texts)} 个文本 ({from_lang} -> {to_lang})...")
|
| 260 |
+
translated_texts = translate_texts(valid_texts, from_lang, to_lang)
|
| 261 |
+
print(f"批量翻译完成")
|
| 262 |
+
|
| 263 |
+
return jsonify({
|
| 264 |
+
"translated_texts": translated_texts,
|
| 265 |
+
"from_lang": from_lang,
|
| 266 |
+
"to_lang": to_lang,
|
| 267 |
+
"count": len(translated_texts)
|
| 268 |
+
})
|
| 269 |
+
|
| 270 |
+
except Exception as e:
|
| 271 |
+
print(f"批量翻译接口错误: {e}")
|
| 272 |
+
return jsonify({"error": f"服务器内部错误: {str(e)}"}), 500
|
translations_converted.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|