Spaces:
Paused
Paused
feat: 添加PDF翻译Web工具完整项目结构
Browse files实现PDF翻译Web工具的核心功能,包括:
- FastAPI REST API服务器
- Gradio Web界面
- Python客户端SDK
- Docker部署支持
- 完整文档和测试用例
新增配置文件和环境变量支持,优化项目结构便于维护和扩展
- Dockerfile +110 -0
- README.md +8 -4
- login.html +68 -0
- on_startup.sh +5 -0
- packages.txt +1 -0
- pdftranslate_web/.dockerignore +97 -0
- pdftranslate_web/Dockerfile +47 -0
- pdftranslate_web/README.md +385 -0
- pdftranslate_web/docker-compose.yml +38 -0
- pdftranslate_web/docker/.ipynb_checkpoints/start-checkpoint.sh +25 -0
- pdftranslate_web/docker/start.sh +25 -0
- pdftranslate_web/docs/API_USAGE.md +208 -0
- pdftranslate_web/docs/GRADIO_USAGE.md +205 -0
- pdftranslate_web/pdftranslate-mcp-server/.dockerignore +85 -0
- pdftranslate_web/pdftranslate-mcp-server/.env.docker +38 -0
- pdftranslate_web/pdftranslate-mcp-server/Dockerfile +74 -0
- pdftranslate_web/pdftranslate-mcp-server/PRD.MD +255 -0
- pdftranslate_web/pdftranslate-mcp-server/README.md +771 -0
- pdftranslate_web/pdftranslate-mcp-server/config.ini +5 -0
- pdftranslate_web/pdftranslate-mcp-server/docker-compose.yml +63 -0
- pdftranslate_web/pdftranslate-mcp-server/main.py +1100 -0
- pdftranslate_web/pdftranslate-mcp-server/pyproject.toml +141 -0
- pdftranslate_web/pdftranslate-mcp-server/pyproject_scnet.toml +141 -0
- pdftranslate_web/pdftranslate-mcp-server/uv.lock +0 -0
- pdftranslate_web/pyproject.toml +174 -0
- pdftranslate_web/pyproject_scnet.toml +176 -0
- pdftranslate_web/scripts/run_gradio.py +17 -0
- pdftranslate_web/scripts/run_server.py +29 -0
- pdftranslate_web/src/pdftranslate_web/__init__.py +23 -0
- pdftranslate_web/src/pdftranslate_web/api_client.py +273 -0
- pdftranslate_web/src/pdftranslate_web/api_server.py +298 -0
- pdftranslate_web/src/pdftranslate_web/gradio_client.py +749 -0
- pdftranslate_web/tests/test2.py +4 -0
- pdftranslate_web/tests/test_structure.py +30 -0
- pdftranslate_web/uv.lock +0 -0
- requirements.txt +4 -0
- start_server.sh +19 -0
Dockerfile
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-py311-torch2.3.1-1.28.0
|
| 2 |
+
|
| 3 |
+
ENV DEBIAN_FRONTEND=noninteractive \
|
| 4 |
+
TZ=Asia/Shanghai
|
| 5 |
+
|
| 6 |
+
# Remove any third-party apt sources and install basic utilities
|
| 7 |
+
RUN rm -f /etc/apt/sources.list.d/*.list && \
|
| 8 |
+
apt-get update && apt-get install -y --no-install-recommends \
|
| 9 |
+
curl \
|
| 10 |
+
ca-certificates \
|
| 11 |
+
sudo \
|
| 12 |
+
git \
|
| 13 |
+
wget \
|
| 14 |
+
procps \
|
| 15 |
+
git-lfs \
|
| 16 |
+
zip \
|
| 17 |
+
unzip \
|
| 18 |
+
htop \
|
| 19 |
+
vim \
|
| 20 |
+
nano \
|
| 21 |
+
bzip2 \
|
| 22 |
+
libx11-6 \
|
| 23 |
+
build-essential \
|
| 24 |
+
libsndfile-dev \
|
| 25 |
+
software-properties-common \
|
| 26 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 27 |
+
|
| 28 |
+
# Add cloudflare gpg key
|
| 29 |
+
RUN mkdir -p --mode=0755 /usr/share/keyrings && \
|
| 30 |
+
curl -fsSL https://pkg.cloudflare.com/cloudflare-main.gpg | tee /usr/share/keyrings/cloudflare-main.gpg >/dev/null
|
| 31 |
+
|
| 32 |
+
# Add this repo to your apt repositories
|
| 33 |
+
RUN echo 'deb [signed-by=/usr/share/keyrings/cloudflare-main.gpg] https://pkg.cloudflare.com/cloudflared any main' | tee /etc/apt/sources.list.d/cloudflared.list
|
| 34 |
+
|
| 35 |
+
# install cloudflared
|
| 36 |
+
RUN apt-get update && apt-get install -y cloudflared && rm -rf /var/lib/apt/lists/*
|
| 37 |
+
|
| 38 |
+
# Install Node.js for Jupyter extensions
|
| 39 |
+
RUN curl -sL https://deb.nodesource.com/setup_21.x | bash - && \
|
| 40 |
+
apt-get install -y nodejs && \
|
| 41 |
+
npm install -g configurable-http-proxy
|
| 42 |
+
|
| 43 |
+
# Define error handling function and install Claude Code CLI
|
| 44 |
+
RUN handle_error() { echo "错误: $1" >&2; exit 1; } && \
|
| 45 |
+
npm install -g @anthropic-ai/claude-code || handle_error "安装Claude Code CLI失败"
|
| 46 |
+
|
| 47 |
+
# Create a working directory
|
| 48 |
+
WORKDIR /app
|
| 49 |
+
|
| 50 |
+
# Create a non-root user and switch to it
|
| 51 |
+
RUN adduser --disabled-password --gecos '' --shell /bin/bash user \
|
| 52 |
+
&& chown -R user:user /app
|
| 53 |
+
RUN echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-user
|
| 54 |
+
USER user
|
| 55 |
+
|
| 56 |
+
# All users can use /home/user as their home directory
|
| 57 |
+
ENV HOME=/home/user
|
| 58 |
+
RUN mkdir $HOME/.cache $HOME/.config \
|
| 59 |
+
&& chmod -R 777 $HOME
|
| 60 |
+
|
| 61 |
+
# Set up the Conda environment with Python 3.11
|
| 62 |
+
ENV CONDA_AUTO_UPDATE_CONDA=false \
|
| 63 |
+
PATH=$HOME/miniconda/bin:$PATH
|
| 64 |
+
RUN curl -sLo ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-py311_23.5.2-0-Linux-x86_64.sh \
|
| 65 |
+
&& chmod +x ~/miniconda.sh \
|
| 66 |
+
&& ~/miniconda.sh -b -p ~/miniconda \
|
| 67 |
+
&& rm ~/miniconda.sh \
|
| 68 |
+
&& conda clean -ya
|
| 69 |
+
|
| 70 |
+
WORKDIR $HOME/app
|
| 71 |
+
|
| 72 |
+
#######################################
|
| 73 |
+
# Start root user section
|
| 74 |
+
#######################################
|
| 75 |
+
|
| 76 |
+
USER root
|
| 77 |
+
|
| 78 |
+
# Create data directory
|
| 79 |
+
RUN mkdir /data && chown user:user /data
|
| 80 |
+
|
| 81 |
+
#######################################
|
| 82 |
+
# End root user section
|
| 83 |
+
#######################################
|
| 84 |
+
|
| 85 |
+
USER user
|
| 86 |
+
|
| 87 |
+
# Install Python packages using conda and pip
|
| 88 |
+
RUN conda install -c conda-forge python=3.11 && \
|
| 89 |
+
pip install --no-cache-dir --upgrade pip
|
| 90 |
+
|
| 91 |
+
# Copy requirements and install Python packages
|
| 92 |
+
COPY --chown=user requirements.txt $HOME/app/
|
| 93 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 94 |
+
|
| 95 |
+
# Copy the current directory contents into the container
|
| 96 |
+
COPY --chown=user . $HOME/app
|
| 97 |
+
|
| 98 |
+
RUN chmod +x start_server.sh
|
| 99 |
+
|
| 100 |
+
# Set environment variables
|
| 101 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 102 |
+
GRADIO_ALLOW_FLAGGING=never \
|
| 103 |
+
GRADIO_NUM_PORTS=1 \
|
| 104 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
| 105 |
+
GRADIO_THEME=huggingface \
|
| 106 |
+
SYSTEM=spaces \
|
| 107 |
+
SHELL=/bin/bash \
|
| 108 |
+
JUPYTER_TOKEN=huggingface
|
| 109 |
+
|
| 110 |
+
CMD ["./start_server.sh"]
|
README.md
CHANGED
|
@@ -1,11 +1,15 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
| 8 |
license: apache-2.0
|
| 9 |
---
|
| 10 |
|
|
|
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: 20250801test
|
| 3 |
+
emoji: 💻🐳
|
| 4 |
+
colorFrom: gray
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
tags:
|
| 9 |
+
- jupyterlab
|
| 10 |
+
suggested_storage: small
|
| 11 |
license: apache-2.0
|
| 12 |
---
|
| 13 |
|
| 14 |
+
docker build -t jupyterlab_modelscope .
|
| 15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
login.html
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "page.html" %}
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
{% block stylesheet %}
|
| 5 |
+
{% endblock %}
|
| 6 |
+
|
| 7 |
+
{% block site %}
|
| 8 |
+
|
| 9 |
+
<div id="jupyter-main-app" class="container">
|
| 10 |
+
|
| 11 |
+
<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face Logo">
|
| 12 |
+
<h4>Welcome to JupyterLab</h4>
|
| 13 |
+
|
| 14 |
+
<h5>The default token is <span style="color:orange;">huggingface</span></h5>
|
| 15 |
+
|
| 16 |
+
{% if login_available %}
|
| 17 |
+
{# login_available means password-login is allowed. Show the form. #}
|
| 18 |
+
<div class="row">
|
| 19 |
+
<div class="navbar col-sm-8">
|
| 20 |
+
<div class="navbar-inner">
|
| 21 |
+
<div class="container">
|
| 22 |
+
<div class="center-nav">
|
| 23 |
+
<form action="{{base_url}}login?next={{next}}" method="post" class="navbar-form pull-left">
|
| 24 |
+
{{ xsrf_form_html() | safe }}
|
| 25 |
+
{% if token_available %}
|
| 26 |
+
<label for="password_input"><strong>{% trans %}Jupyter token <span title="This is the secret you set up when deploying your JupyterLab space">ⓘ</span> {% endtrans
|
| 27 |
+
%}</strong></label>
|
| 28 |
+
{% else %}
|
| 29 |
+
<label for="password_input"><strong>{% trans %}Jupyter password:{% endtrans %}</strong></label>
|
| 30 |
+
{% endif %}
|
| 31 |
+
<input type="password" name="password" id="password_input" class="form-control">
|
| 32 |
+
<button type="submit" class="btn btn-default" id="login_submit">{% trans %}Log in{% endtrans
|
| 33 |
+
%}</button>
|
| 34 |
+
</form>
|
| 35 |
+
</div>
|
| 36 |
+
</div>
|
| 37 |
+
</div>
|
| 38 |
+
</div>
|
| 39 |
+
</div>
|
| 40 |
+
{% else %}
|
| 41 |
+
<p>{% trans %}No login available, you shouldn't be seeing this page.{% endtrans %}</p>
|
| 42 |
+
{% endif %}
|
| 43 |
+
|
| 44 |
+
<h5>If you don't have the credentials for this Jupyter space, <a target="_blank" href="https://huggingface.co/spaces/SpacesExamples/jupyterlab?duplicate=true">create your own.</a></h5>
|
| 45 |
+
<br>
|
| 46 |
+
|
| 47 |
+
<p>This template was created by <a href="https://twitter.com/camenduru" target="_blank" >camenduru</a> and <a href="https://huggingface.co/nateraw" target="_blank" >nateraw</a>, with contributions of <a href="https://huggingface.co/osanseviero" target="_blank" >osanseviero</a> and <a href="https://huggingface.co/azzr" target="_blank" >azzr</a> </p>
|
| 48 |
+
{% if message %}
|
| 49 |
+
<div class="row">
|
| 50 |
+
{% for key in message %}
|
| 51 |
+
<div class="message {{key}}">
|
| 52 |
+
{{message[key]}}
|
| 53 |
+
</div>
|
| 54 |
+
{% endfor %}
|
| 55 |
+
</div>
|
| 56 |
+
{% endif %}
|
| 57 |
+
{% if token_available %}
|
| 58 |
+
{% block token_message %}
|
| 59 |
+
|
| 60 |
+
{% endblock token_message %}
|
| 61 |
+
{% endif %}
|
| 62 |
+
</div>
|
| 63 |
+
|
| 64 |
+
{% endblock %}
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
{% block script %}
|
| 68 |
+
{% endblock %}
|
on_startup.sh
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Write some commands here that will run on root user before startup.
|
| 3 |
+
# For example, to clone transformers and install it in dev mode:
|
| 4 |
+
# git clone https://github.com/huggingface/transformers.git
|
| 5 |
+
# cd transformers && pip install -e ".[dev]"
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
tree
|
pdftranslate_web/.dockerignore
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 开发相关文件
|
| 2 |
+
.git
|
| 3 |
+
.gitignore
|
| 4 |
+
.env
|
| 5 |
+
*.env
|
| 6 |
+
.env.*
|
| 7 |
+
|
| 8 |
+
# Python相关
|
| 9 |
+
__pycache__
|
| 10 |
+
*.pyc
|
| 11 |
+
*.pyo
|
| 12 |
+
*.pyd
|
| 13 |
+
.Python
|
| 14 |
+
*.so
|
| 15 |
+
.coverage
|
| 16 |
+
.pytest_cache
|
| 17 |
+
.mypy_cache
|
| 18 |
+
.tox
|
| 19 |
+
venv/
|
| 20 |
+
env/
|
| 21 |
+
.venv/
|
| 22 |
+
|
| 23 |
+
# 编辑器相关
|
| 24 |
+
.vscode/
|
| 25 |
+
.idea/
|
| 26 |
+
*.swp
|
| 27 |
+
*.swo
|
| 28 |
+
*~
|
| 29 |
+
|
| 30 |
+
# 系统文件
|
| 31 |
+
.DS_Store
|
| 32 |
+
Thumbs.db
|
| 33 |
+
*.log
|
| 34 |
+
|
| 35 |
+
# 构建相关
|
| 36 |
+
build/
|
| 37 |
+
dist/
|
| 38 |
+
*.egg-info/
|
| 39 |
+
.wheel
|
| 40 |
+
|
| 41 |
+
# 测试相关
|
| 42 |
+
.coverage
|
| 43 |
+
htmlcov/
|
| 44 |
+
.pytest_cache/
|
| 45 |
+
.tox/
|
| 46 |
+
|
| 47 |
+
# 文档相关
|
| 48 |
+
docs/_build/
|
| 49 |
+
site/
|
| 50 |
+
|
| 51 |
+
# 临时文件
|
| 52 |
+
temp/
|
| 53 |
+
tmp/
|
| 54 |
+
*.tmp
|
| 55 |
+
*.temp
|
| 56 |
+
|
| 57 |
+
# 日志文件
|
| 58 |
+
logs/
|
| 59 |
+
*.log
|
| 60 |
+
|
| 61 |
+
# 数据库文件
|
| 62 |
+
*.db
|
| 63 |
+
*.sqlite
|
| 64 |
+
|
| 65 |
+
# 缓存文件
|
| 66 |
+
*.cache
|
| 67 |
+
|
| 68 |
+
# 备份文件
|
| 69 |
+
*.bak
|
| 70 |
+
*.backup
|
| 71 |
+
|
| 72 |
+
# 压缩文件
|
| 73 |
+
*.tar.gz
|
| 74 |
+
*.zip
|
| 75 |
+
*.rar
|
| 76 |
+
|
| 77 |
+
# 媒体文件 (示例,根据需要调整)
|
| 78 |
+
*.pdf
|
| 79 |
+
*.doc
|
| 80 |
+
*.docx
|
| 81 |
+
*.png
|
| 82 |
+
*.jpg
|
| 83 |
+
*.jpeg
|
| 84 |
+
*.gif
|
| 85 |
+
|
| 86 |
+
# 配置文件 (包含敏感信息)
|
| 87 |
+
config/config.toml
|
| 88 |
+
|
| 89 |
+
# Docker相关
|
| 90 |
+
.dockerignore
|
| 91 |
+
Dockerfile
|
| 92 |
+
docker-compose*.yml
|
| 93 |
+
|
| 94 |
+
# CI/CD相关
|
| 95 |
+
.github/
|
| 96 |
+
.gitlab-ci.yml
|
| 97 |
+
Jenkinsfile
|
pdftranslate_web/Dockerfile
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 使用官方Python运行时作为基础镜像
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# 设置工作目录
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# 设置环境变量
|
| 8 |
+
ENV PYTHONPATH=/app/src \
|
| 9 |
+
PYTHONUNBUFFERED=1 \
|
| 10 |
+
PIP_NO_CACHE_DIR=1
|
| 11 |
+
|
| 12 |
+
# 安装系统依赖
|
| 13 |
+
RUN apt-get update && apt-get install -y \
|
| 14 |
+
curl \
|
| 15 |
+
git \
|
| 16 |
+
libgl1-mesa-glx \
|
| 17 |
+
libglib2.0-0 \
|
| 18 |
+
libsm6 \
|
| 19 |
+
libxext6 \
|
| 20 |
+
libxrender-dev \
|
| 21 |
+
libgomp1 \
|
| 22 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 23 |
+
|
| 24 |
+
# 复制项目文件
|
| 25 |
+
COPY pyproject.toml /app/
|
| 26 |
+
COPY src/ /app/src/
|
| 27 |
+
COPY scripts/ /app/scripts/
|
| 28 |
+
COPY simaple/ /app/simaple/
|
| 29 |
+
COPY README.md /app/
|
| 30 |
+
|
| 31 |
+
# 安装Python依赖
|
| 32 |
+
RUN pip install --upgrade pip && \
|
| 33 |
+
pip install -e .
|
| 34 |
+
|
| 35 |
+
# 复制启动脚本
|
| 36 |
+
COPY docker/start.sh /app/start.sh
|
| 37 |
+
RUN chmod +x /app/start.sh
|
| 38 |
+
|
| 39 |
+
# 暴露端口
|
| 40 |
+
EXPOSE 8000 7860
|
| 41 |
+
|
| 42 |
+
# 健康检查
|
| 43 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
| 44 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 45 |
+
|
| 46 |
+
# 启动命令
|
| 47 |
+
CMD ["/app/start.sh"]
|
pdftranslate_web/README.md
ADDED
|
@@ -0,0 +1,385 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PDF翻译Web工具
|
| 2 |
+
|
| 3 |
+
🌍 基于AI的智能PDF文档翻译工具,支持Web界面和API接口,保持文档结构不变。
|
| 4 |
+
|
| 5 |
+
## 特性
|
| 6 |
+
|
| 7 |
+
- 🚀 **智能翻译**: 基于OpenAI/DeepSeek等大语言模型
|
| 8 |
+
- 📄 **保持格式**: 翻译后保持原始PDF文档结构和布局
|
| 9 |
+
- 🌐 **双重接口**: FastAPI REST API + Gradio Web界面
|
| 10 |
+
- ⚙️ **环境配置**: 通过环境变量灵活配置
|
| 11 |
+
- 🔧 **容器部署**: Docker一键部署
|
| 12 |
+
- 📊 **实时监控**: 翻译进度实时跟踪
|
| 13 |
+
|
| 14 |
+
## 项目结构
|
| 15 |
+
|
| 16 |
+
```
|
| 17 |
+
pdftranslate_web/
|
| 18 |
+
├── src/pdftranslate_web/ # 核心模块
|
| 19 |
+
│ ├── __init__.py
|
| 20 |
+
│ ├── api_server.py # FastAPI API服务器
|
| 21 |
+
│ ├── api_client.py # Python客户端SDK
|
| 22 |
+
│ └── gradio_client.py # Gradio Web界面
|
| 23 |
+
├── scripts/ # 启动脚本
|
| 24 |
+
│ ├── run_server.py # 启动API服务器
|
| 25 |
+
│ └── run_gradio.py # 启动Web界面
|
| 26 |
+
├── docker/ # Docker配置
|
| 27 |
+
│ └── start.sh # 容器启动脚本
|
| 28 |
+
├── docs/ # 文档
|
| 29 |
+
│ ├── API_USAGE.md # API使用说明
|
| 30 |
+
│ └── GRADIO_USAGE.md # Web界面使用说明
|
| 31 |
+
├── tests/ # 测试文件
|
| 32 |
+
├── .env.example # 环境变量配置模板
|
| 33 |
+
├── docker-compose.yml # Docker Compose配置
|
| 34 |
+
├── Dockerfile # Docker镜像配置
|
| 35 |
+
├── pyproject.toml # 项目配置和依赖
|
| 36 |
+
└── README.md # 项目说明
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
## 快速开始
|
| 40 |
+
|
| 41 |
+
### 1. 环境准备
|
| 42 |
+
|
| 43 |
+
> **推荐使用 uv 包管理器**:uv 是一个超快的 Python 包安装器和解析器,比 pip 快 10-100 倍,支持自动依赖解析和虚拟环境管理。
|
| 44 |
+
|
| 45 |
+
```bash
|
| 46 |
+
# 克隆项目
|
| 47 |
+
git clone https://github.com/wwwzhouhui/pdftranslate_web
|
| 48 |
+
cd pdftranslate_web
|
| 49 |
+
|
| 50 |
+
# 方式一:使用 uv 包管理器 (推荐)
|
| 51 |
+
# 安装 uv (如果没有安装)
|
| 52 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 53 |
+
# 或者使用 pip 安装
|
| 54 |
+
pip install uv
|
| 55 |
+
|
| 56 |
+
# 使用 uv 安装依赖 (自动创建虚拟环境)
|
| 57 |
+
uv sync
|
| 58 |
+
|
| 59 |
+
# 方式二:使用传统 pip
|
| 60 |
+
# 安装依赖
|
| 61 |
+
pip install -e .
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### 2. 配置设置
|
| 65 |
+
|
| 66 |
+
复制环境变量模板并编辑:
|
| 67 |
+
|
| 68 |
+
```bash
|
| 69 |
+
cp .env.example .env
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
编辑 `.env` 文件,设置API密钥:
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
# OpenAI配置 (必填)
|
| 76 |
+
OPENAI_API_KEY=your-api-key-here
|
| 77 |
+
OPENAI_MODEL=deepseek-ai/DeepSeek-V3
|
| 78 |
+
OPENAI_BASE_URL=https://api.siliconflow.cn/v1
|
| 79 |
+
|
| 80 |
+
# 翻译配置
|
| 81 |
+
QPS=4
|
| 82 |
+
DEFAULT_LANG_IN=en
|
| 83 |
+
DEFAULT_LANG_OUT=zh
|
| 84 |
+
WATERMARK_OUTPUT_MODE=no_watermark
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
### 3. 启动服务
|
| 88 |
+
|
| 89 |
+
#### 方式一:启动API服务器
|
| 90 |
+
|
| 91 |
+
```bash
|
| 92 |
+
# 使用 uv (推荐)
|
| 93 |
+
uv run python scripts/run_server.py --host 0.0.0.0 --port 8000
|
| 94 |
+
|
| 95 |
+
# 或使用传统方式
|
| 96 |
+
python scripts/run_server.py --host 0.0.0.0 --port 8000
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
API服务将在 `http://localhost:8000` 启动,文档地址:`http://localhost:8000/docs`
|
| 100 |
+
|
| 101 |
+
#### 方式二:启动Web界面
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
# 使用 uv (推荐)
|
| 105 |
+
uv run python scripts/run_gradio.py --server-url http://localhost:8000 --port 7860
|
| 106 |
+
|
| 107 |
+
# 或使用传统方式
|
| 108 |
+
python scripts/run_gradio.py --server-url http://localhost:8000 --port 7860
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
Web界面将在 `http://localhost:7860` 启动
|
| 112 |
+
|
| 113 |
+
## 使用方法
|
| 114 |
+
|
| 115 |
+
### API使用
|
| 116 |
+
|
| 117 |
+
```python
|
| 118 |
+
from pdftranslate_web.api_client import BabelDOCClient
|
| 119 |
+
|
| 120 |
+
# 创建客户端
|
| 121 |
+
client = BabelDOCClient("http://localhost:8000")
|
| 122 |
+
|
| 123 |
+
# 翻译PDF文件
|
| 124 |
+
downloaded_files = client.translate_and_download(
|
| 125 |
+
pdf_path="document.pdf",
|
| 126 |
+
output_dir="./output",
|
| 127 |
+
lang_in="en",
|
| 128 |
+
lang_out="zh"
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
print(f"翻译完成:{downloaded_files}")
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
### 命令行使用
|
| 135 |
+
|
| 136 |
+
```bash
|
| 137 |
+
# 使用API客户端
|
| 138 |
+
# uv方式 (推荐)
|
| 139 |
+
uv run python src/pdftranslate_web/api_client.py document.pdf --output-dir ./output --lang-out zh
|
| 140 |
+
|
| 141 |
+
# 传统方式
|
| 142 |
+
python src/pdftranslate_web/api_client.py document.pdf --output-dir ./output --lang-out zh
|
| 143 |
+
|
| 144 |
+
# 检查服务器状态
|
| 145 |
+
curl http://localhost:8000/health
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
### Web界面使用
|
| 149 |
+
|
| 150 |
+
1. 打开浏览器访问 `http://localhost:7860`
|
| 151 |
+
2. 上传PDF文件
|
| 152 |
+
3. 选择翻译选项(源语言、目标语言、输出类型)
|
| 153 |
+
4. 点击"开始翻译"
|
| 154 |
+
5. 等待翻译完成并下载结果
|
| 155 |
+
|
| 156 |
+
## API接口
|
| 157 |
+
|
| 158 |
+
### 核心接口
|
| 159 |
+
|
| 160 |
+
- `POST /translate` - 提交翻译任务
|
| 161 |
+
- `GET /status/{task_id}` - 查询翻译状态
|
| 162 |
+
- `GET /download/{task_id}/{file_type}` - 下载翻译结果
|
| 163 |
+
- `GET /health` - 健康检查
|
| 164 |
+
|
| 165 |
+
详细API文档请查看:`docs/API_USAGE.md`
|
| 166 |
+
|
| 167 |
+
## 配置说明
|
| 168 |
+
|
| 169 |
+
### 环境变量配置
|
| 170 |
+
|
| 171 |
+
所有配置通过环境变量进行设置。可以通过 `.env` 文件或直接设置环境变量:
|
| 172 |
+
|
| 173 |
+
```bash
|
| 174 |
+
# OpenAI配置 (必填)
|
| 175 |
+
OPENAI_API_KEY=your-api-key-here # API密钥
|
| 176 |
+
OPENAI_MODEL=deepseek-ai/DeepSeek-V3 # 模型名称
|
| 177 |
+
OPENAI_BASE_URL=https://api.siliconflow.cn/v1 # API基础URL
|
| 178 |
+
|
| 179 |
+
# 服务器配置
|
| 180 |
+
SERVER_HOST=0.0.0.0 # 绑定地址
|
| 181 |
+
SERVER_PORT=8000 # 端口号
|
| 182 |
+
QPS=4 # 每秒请求数限制
|
| 183 |
+
|
| 184 |
+
# 翻译配置
|
| 185 |
+
DEFAULT_LANG_IN=en # 默���源语言
|
| 186 |
+
DEFAULT_LANG_OUT=zh # 默认目标语言
|
| 187 |
+
WATERMARK_OUTPUT_MODE=no_watermark # 水印模式
|
| 188 |
+
NO_DUAL=false # 是否生成双语PDF
|
| 189 |
+
NO_MONO=false # 是否生成单语PDF
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
### 支持的环境变量
|
| 193 |
+
|
| 194 |
+
| 环境变量 | 默认值 | 说明 |
|
| 195 |
+
|---------|--------|------|
|
| 196 |
+
| `OPENAI_API_KEY` | - | OpenAI API密钥 (必填) |
|
| 197 |
+
| `OPENAI_MODEL` | `deepseek-ai/DeepSeek-V3` | 使用的模型 |
|
| 198 |
+
| `OPENAI_BASE_URL` | `https://api.siliconflow.cn/v1` | API端点 |
|
| 199 |
+
| `SERVER_HOST` | `0.0.0.0` | 服务器地址 |
|
| 200 |
+
| `SERVER_PORT` | `8000` | 服务器端口 |
|
| 201 |
+
| `QPS` | `4` | 请求频率限制 |
|
| 202 |
+
| `DEFAULT_LANG_IN` | `en` | 默认源语言 |
|
| 203 |
+
| `DEFAULT_LANG_OUT` | `zh` | 默认目标语言 |
|
| 204 |
+
| `WATERMARK_OUTPUT_MODE` | `no_watermark` | 水印模式 |
|
| 205 |
+
|
| 206 |
+
## 开发指南
|
| 207 |
+
|
| 208 |
+
### 开发环境设置
|
| 209 |
+
|
| 210 |
+
```bash
|
| 211 |
+
# 克隆项目
|
| 212 |
+
git clone https://github.com/wwwzhouhui/pdftranslate_web
|
| 213 |
+
cd pdftranslate_web
|
| 214 |
+
|
| 215 |
+
# 方式一:使用 uv (推荐)
|
| 216 |
+
# 安装 uv (如果没有安装)
|
| 217 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 218 |
+
|
| 219 |
+
# 安装开发依赖
|
| 220 |
+
uv sync --dev
|
| 221 |
+
|
| 222 |
+
# 方式二:使用传统 pip + 虚拟环境
|
| 223 |
+
# 创建虚拟环境
|
| 224 |
+
python -m venv venv
|
| 225 |
+
source venv/bin/activate # Linux/Mac
|
| 226 |
+
# venv\Scripts\activate # Windows
|
| 227 |
+
|
| 228 |
+
# 安装开发依赖
|
| 229 |
+
pip install -e ".[dev]"
|
| 230 |
+
```
|
| 231 |
+
|
| 232 |
+
### 运行测试
|
| 233 |
+
|
| 234 |
+
```bash
|
| 235 |
+
pytest tests/
|
| 236 |
+
```
|
| 237 |
+
|
| 238 |
+
### 代码格式化
|
| 239 |
+
|
| 240 |
+
```bash
|
| 241 |
+
black src/
|
| 242 |
+
isort src/
|
| 243 |
+
```
|
| 244 |
+
|
| 245 |
+
## Docker部署
|
| 246 |
+
|
| 247 |
+
### 快速开始
|
| 248 |
+
|
| 249 |
+
1. **准备环境变量文件**
|
| 250 |
+
|
| 251 |
+
```bash
|
| 252 |
+
# 复制环境变量模板
|
| 253 |
+
cp .env.example .env
|
| 254 |
+
|
| 255 |
+
# 编辑环境变量 (必须设置API密钥)
|
| 256 |
+
nano .env
|
| 257 |
+
```
|
| 258 |
+
|
| 259 |
+
2. **启动服务**
|
| 260 |
+
|
| 261 |
+
```bash
|
| 262 |
+
# 构建并启动服务
|
| 263 |
+
docker-compose up -d
|
| 264 |
+
|
| 265 |
+
# 查看服务状态
|
| 266 |
+
docker-compose ps
|
| 267 |
+
|
| 268 |
+
# 查看日志
|
| 269 |
+
docker-compose logs -f pdftranslate
|
| 270 |
+
```
|
| 271 |
+
|
| 272 |
+
服务启动后访问:
|
| 273 |
+
- API服务:http://localhost:8000
|
| 274 |
+
- Web界面:http://localhost:7860
|
| 275 |
+
- API文档:http://localhost:8000/docs
|
| 276 |
+
|
| 277 |
+
### 环境变量配置
|
| 278 |
+
|
| 279 |
+
在 `.env` 文件中设置必需的配置:
|
| 280 |
+
|
| 281 |
+
```bash
|
| 282 |
+
# 必需配置
|
| 283 |
+
OPENAI_API_KEY=your-api-key-here
|
| 284 |
+
OPENAI_MODEL=deepseek-ai/DeepSeek-V3
|
| 285 |
+
OPENAI_BASE_URL=https://api.siliconflow.cn/v1
|
| 286 |
+
|
| 287 |
+
# 可选配置
|
| 288 |
+
QPS=4
|
| 289 |
+
DEFAULT_LANG_IN=en
|
| 290 |
+
DEFAULT_LANG_OUT=zh
|
| 291 |
+
```
|
| 292 |
+
|
| 293 |
+
### Docker命令
|
| 294 |
+
|
| 295 |
+
```bash
|
| 296 |
+
# 构建镜像
|
| 297 |
+
docker-compose build
|
| 298 |
+
|
| 299 |
+
# 启动服务
|
| 300 |
+
docker-compose up -d
|
| 301 |
+
|
| 302 |
+
# 停止服务
|
| 303 |
+
docker-compose down
|
| 304 |
+
|
| 305 |
+
# 查看日志
|
| 306 |
+
docker-compose logs pdftranslate
|
| 307 |
+
|
| 308 |
+
# 重启服务
|
| 309 |
+
docker-compose restart pdftranslate
|
| 310 |
+
```
|
| 311 |
+
|
| 312 |
+
### 直接使用Docker
|
| 313 |
+
|
| 314 |
+
```bash
|
| 315 |
+
# 构建镜像
|
| 316 |
+
docker build -t pdftranslate_web .
|
| 317 |
+
|
| 318 |
+
# 运行容器
|
| 319 |
+
docker run -d \
|
| 320 |
+
--name pdftranslate \
|
| 321 |
+
-p 8000:8000 \
|
| 322 |
+
-p 7860:7860 \
|
| 323 |
+
-e OPENAI_API_KEY="your-api-key" \
|
| 324 |
+
pdftranslate_web
|
| 325 |
+
```
|
| 326 |
+
|
| 327 |
+
## 故障排除
|
| 328 |
+
|
| 329 |
+
### 常见问题
|
| 330 |
+
|
| 331 |
+
1. **API密钥错误**
|
| 332 |
+
- 检查 `.env` 文件中的 `OPENAI_API_KEY` 设置
|
| 333 |
+
- 确认API密钥有效且有足够配额
|
| 334 |
+
|
| 335 |
+
2. **模块导入错误**
|
| 336 |
+
- 确保已正确安装项目依赖
|
| 337 |
+
- 检查Python路径设置
|
| 338 |
+
|
| 339 |
+
3. **端口占用**
|
| 340 |
+
- 修改 `.env` 文件中的端口号
|
| 341 |
+
- 或使用环境变量指定其他端口
|
| 342 |
+
|
| 343 |
+
4. **翻译失败**
|
| 344 |
+
- 检查网络连接
|
| 345 |
+
- 确认API服务可用性
|
| 346 |
+
- 查看日志文件获取详细错误信息
|
| 347 |
+
|
| 348 |
+
### 日志查看
|
| 349 |
+
|
| 350 |
+
```bash
|
| 351 |
+
# 查看API服务器日志
|
| 352 |
+
python scripts/run_server.py --log-level DEBUG
|
| 353 |
+
|
| 354 |
+
# 查看特定任务日志
|
| 355 |
+
curl http://localhost:8000/status/{task_id}
|
| 356 |
+
```
|
| 357 |
+
|
| 358 |
+
## 贡献指南
|
| 359 |
+
|
| 360 |
+
1. Fork项目
|
| 361 |
+
2. 创建特性分支 (`git checkout -b feature/AmazingFeature`)
|
| 362 |
+
3. 提交更改 (`git commit -m 'Add some AmazingFeature'`)
|
| 363 |
+
4. 推送到分支 (`git push origin feature/AmazingFeature`)
|
| 364 |
+
5. 开启Pull Request
|
| 365 |
+
|
| 366 |
+
## 许可证
|
| 367 |
+
|
| 368 |
+
本项目采用 AGPL-3.0 许可证。详见 [LICENSE](LICENSE) 文件。
|
| 369 |
+
|
| 370 |
+
## 支持
|
| 371 |
+
|
| 372 |
+
- 📧 Email: 75271002@qq.com
|
| 373 |
+
- 🐛 问题反馈: [GitHub Issues](https://github.com/wwwzhouhui/pdftranslate_web/issues)
|
| 374 |
+
|
| 375 |
+
## 更新日志
|
| 376 |
+
|
| 377 |
+
### v0.0.1
|
| 378 |
+
- 重新整理项目目录结构
|
| 379 |
+
- 完善文档和配置文件
|
| 380 |
+
- 添加多种部署方式支持
|
| 381 |
+
- 优化API接口设计
|
| 382 |
+
|
| 383 |
+
---
|
| 384 |
+
|
| 385 |
+
⭐ 如果这个项目对您有帮助,请给我们一个star!
|
pdftranslate_web/docker-compose.yml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
pdftranslate:
|
| 5 |
+
build:
|
| 6 |
+
context: .
|
| 7 |
+
dockerfile: Dockerfile
|
| 8 |
+
container_name: pdftranslate-web
|
| 9 |
+
restart: unless-stopped
|
| 10 |
+
ports:
|
| 11 |
+
- "8000:8000" # API服务端口
|
| 12 |
+
- "7860:7860" # Gradio Web界面端口
|
| 13 |
+
environment:
|
| 14 |
+
# OpenAI配置 (必填)
|
| 15 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
| 16 |
+
- OPENAI_MODEL=${OPENAI_MODEL:-deepseek-ai/DeepSeek-V3}
|
| 17 |
+
- OPENAI_BASE_URL=${OPENAI_BASE_URL:-https://api.siliconflow.cn/v1}
|
| 18 |
+
|
| 19 |
+
# 服务器配置
|
| 20 |
+
- SERVER_HOST=${SERVER_HOST:-0.0.0.0}
|
| 21 |
+
- SERVER_PORT=${SERVER_PORT:-8000}
|
| 22 |
+
- QPS=${QPS:-4}
|
| 23 |
+
|
| 24 |
+
# 翻译配置
|
| 25 |
+
- DEFAULT_LANG_IN=${DEFAULT_LANG_IN:-en}
|
| 26 |
+
- DEFAULT_LANG_OUT=${DEFAULT_LANG_OUT:-zh}
|
| 27 |
+
- WATERMARK_OUTPUT_MODE=${WATERMARK_OUTPUT_MODE:-no_watermark}
|
| 28 |
+
- NO_DUAL=${NO_DUAL:-false}
|
| 29 |
+
- NO_MONO=${NO_MONO:-false}
|
| 30 |
+
volumes:
|
| 31 |
+
# 日志文件挂载 (可选)
|
| 32 |
+
- ./logs:/app/logs
|
| 33 |
+
healthcheck:
|
| 34 |
+
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
| 35 |
+
interval: 30s
|
| 36 |
+
timeout: 10s
|
| 37 |
+
retries: 3
|
| 38 |
+
start_period: 30s
|
pdftranslate_web/docker/.ipynb_checkpoints/start-checkpoint.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
# 检查必需的环境变量
|
| 5 |
+
if [ -z "$OPENAI_API_KEY" ]; then
|
| 6 |
+
echo "错误: 必须设置 OPENAI_API_KEY 环境变量"
|
| 7 |
+
exit 1
|
| 8 |
+
fi
|
| 9 |
+
|
| 10 |
+
echo "=== BabelDOC PDF翻译服务 ==="
|
| 11 |
+
echo "OpenAI模型: ${OPENAI_MODEL:-deepseek-ai/DeepSeek-V3}"
|
| 12 |
+
echo "API服务: http://localhost:8000"
|
| 13 |
+
echo "Web界面: http://localhost:7860"
|
| 14 |
+
echo "========================="
|
| 15 |
+
|
| 16 |
+
# 启动API服务器(后台)
|
| 17 |
+
echo "启动API服务器..."
|
| 18 |
+
python3 /app/scripts/run_server.py &
|
| 19 |
+
|
| 20 |
+
# 等待API服务器启动
|
| 21 |
+
sleep 10
|
| 22 |
+
|
| 23 |
+
# 启动Gradio客户端
|
| 24 |
+
echo "启动Web界面..."
|
| 25 |
+
python3 /app/scripts/run_gradio.py --server-url http://localhost:8000 --host 0.0.0.0 --port 7860
|
pdftranslate_web/docker/start.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
# 检查必需的环境变量
|
| 5 |
+
if [ -z "$OPENAI_API_KEY" ]; then
|
| 6 |
+
echo "错误: 必须设置 OPENAI_API_KEY 环境变量"
|
| 7 |
+
exit 1
|
| 8 |
+
fi
|
| 9 |
+
|
| 10 |
+
echo "=== BabelDOC PDF翻译服务 ==="
|
| 11 |
+
echo "OpenAI模型: ${OPENAI_MODEL:-deepseek-ai/DeepSeek-V3}"
|
| 12 |
+
echo "API服务: http://localhost:8000"
|
| 13 |
+
echo "Web界面: http://localhost:7860"
|
| 14 |
+
echo "========================="
|
| 15 |
+
|
| 16 |
+
# 启动API服务器(后台)
|
| 17 |
+
echo "启动API服务器..."
|
| 18 |
+
python3 /app/scripts/run_server.py &
|
| 19 |
+
|
| 20 |
+
# 等待API服务器启动
|
| 21 |
+
sleep 10
|
| 22 |
+
|
| 23 |
+
# 启动Gradio客户端
|
| 24 |
+
echo "启动Web界面..."
|
| 25 |
+
python3 /app/scripts/run_gradio.py --server-url http://localhost:8000 --host 0.0.0.0 --port 7860
|
pdftranslate_web/docs/API_USAGE.md
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# BabelDOC API 使用说明
|
| 2 |
+
|
| 3 |
+
## 配置说明
|
| 4 |
+
|
| 5 |
+
### 环境变量配置
|
| 6 |
+
|
| 7 |
+
服务器通过环境变量进行配置。创建 `.env` 文件或直接设置环境变量:
|
| 8 |
+
|
| 9 |
+
```bash
|
| 10 |
+
# OpenAI配置 (必填)
|
| 11 |
+
OPENAI_API_KEY=your-openai-api-key-here
|
| 12 |
+
OPENAI_MODEL=deepseek-ai/DeepSeek-V3
|
| 13 |
+
OPENAI_BASE_URL=https://api.siliconflow.cn/v1
|
| 14 |
+
|
| 15 |
+
# 服务器配置
|
| 16 |
+
SERVER_HOST=0.0.0.0
|
| 17 |
+
SERVER_PORT=8000
|
| 18 |
+
QPS=4
|
| 19 |
+
|
| 20 |
+
# 翻译配置
|
| 21 |
+
DEFAULT_LANG_IN=en
|
| 22 |
+
DEFAULT_LANG_OUT=zh
|
| 23 |
+
WATERMARK_OUTPUT_MODE=no_watermark
|
| 24 |
+
NO_DUAL=false
|
| 25 |
+
NO_MONO=false
|
| 26 |
+
```
|
| 27 |
+
- `OPENAI_API_KEY`: OpenAI API密钥 (必需)
|
| 28 |
+
- `OPENAI_MODEL`: OpenAI模型名称
|
| 29 |
+
- `OPENAI_BASE_URL`: OpenAI API基础URL
|
| 30 |
+
- `SERVER_HOST`: 服务器主机地址
|
| 31 |
+
- `SERVER_PORT`: 服务器端口
|
| 32 |
+
- `QPS`: 每秒请求数限制
|
| 33 |
+
- `DEFAULT_LANG_IN`: 默认源语言
|
| 34 |
+
- `DEFAULT_LANG_OUT`: 默认目标语言
|
| 35 |
+
- `WATERMARK_OUTPUT_MODE`: 水印模式
|
| 36 |
+
- `NO_DUAL`: 不生成双语PDF
|
| 37 |
+
- `NO_MONO`: 不生成单语PDF
|
| 38 |
+
|
| 39 |
+
## 服务端部署
|
| 40 |
+
|
| 41 |
+
### 安装依赖
|
| 42 |
+
```bash
|
| 43 |
+
# 安装必要的依赖包
|
| 44 |
+
uv sync
|
| 45 |
+
# 或者 pip install fastapi uvicorn python-multipart toml
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
### 启动服务器
|
| 49 |
+
```bash
|
| 50 |
+
# 方法1:直接运行启动脚本
|
| 51 |
+
uv run python babeldoc/run_server.py
|
| 52 |
+
|
| 53 |
+
# 方法2:使用API服务器模块
|
| 54 |
+
uv run python -m babeldoc.api_server
|
| 55 |
+
|
| 56 |
+
# 方法3:自定义主机和端口
|
| 57 |
+
uv run python babeldoc/run_server.py --host 0.0.0.0 --port 8000
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
服务器启动后会监听在 `http://0.0.0.0:8000`,可以通过浏览器访问 `http://localhost:8000/docs` 查看API文档。
|
| 61 |
+
|
| 62 |
+
## API接口说明
|
| 63 |
+
|
| 64 |
+
### 1. 翻译PDF文档
|
| 65 |
+
- **接口**: `POST /translate`
|
| 66 |
+
- **功能**: 上传PDF文件进行翻译
|
| 67 |
+
- **参数**:
|
| 68 |
+
- `file`: PDF文件 (必需)
|
| 69 |
+
- `lang_in`: 源语言代码 (可选,使用服务器默认配置)
|
| 70 |
+
- `lang_out`: 目标语言代码 (可选,使用服务器默认配置)
|
| 71 |
+
- `qps`: 每秒请求数限制 (可选,使用服务器默认配置)
|
| 72 |
+
- `no_dual`: 不生成双语PDF (可选,使用服务器默认配置)
|
| 73 |
+
- `no_mono`: 不生成单语PDF (可选,使用服务器默认配置)
|
| 74 |
+
- `watermark_output_mode`: 水印模式 (可选,使用服务器默认配置)
|
| 75 |
+
|
| 76 |
+
### 2. 查询翻译状态
|
| 77 |
+
- **接口**: `GET /status/{task_id}`
|
| 78 |
+
- **功能**: 查询翻译任务的当前状态和进度
|
| 79 |
+
|
| 80 |
+
### 3. 下载翻译结果
|
| 81 |
+
- **接口**: `GET /download/{task_id}/{file_type}`
|
| 82 |
+
- **功能**: 下载翻译完成的PDF文件
|
| 83 |
+
- **参数**:
|
| 84 |
+
- `file_type`: "dual" (双语版本) 或 "mono" (单语版本)
|
| 85 |
+
|
| 86 |
+
### 4. 健康检查
|
| 87 |
+
- **接口**: `GET /health`
|
| 88 |
+
- **功能**: 检查服务是否正常运行
|
| 89 |
+
|
| 90 |
+
### 5. 获取服务器配置
|
| 91 |
+
- **接口**: `GET /`
|
| 92 |
+
- **功能**: 获取服务器当前配置信息
|
| 93 |
+
|
| 94 |
+
## 客户端使用
|
| 95 |
+
|
| 96 |
+
### Python客户端库
|
| 97 |
+
```python
|
| 98 |
+
from babeldoc.api_client import BabelDOCClient
|
| 99 |
+
|
| 100 |
+
# 创建客户端
|
| 101 |
+
client = BabelDOCClient("http://localhost:8000")
|
| 102 |
+
|
| 103 |
+
# 翻译PDF文档 (使用服务器默认配置)
|
| 104 |
+
downloaded_files = client.translate_and_download(
|
| 105 |
+
pdf_path="example.pdf",
|
| 106 |
+
output_dir="./output"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# 翻译PDF文档 (自定义参数)
|
| 110 |
+
downloaded_files = client.translate_and_download(
|
| 111 |
+
pdf_path="example.pdf",
|
| 112 |
+
output_dir="./output",
|
| 113 |
+
lang_in="en",
|
| 114 |
+
lang_out="zh"
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
print("翻译完成,文件保存至:", downloaded_files)
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
### 命令行客户端
|
| 121 |
+
```bash
|
| 122 |
+
# 使用服务器默认配置翻译
|
| 123 |
+
uv run python babeldoc/api_client.py example.pdf --output-dir ./output
|
| 124 |
+
|
| 125 |
+
# 自定义翻译参数
|
| 126 |
+
uv run python babeldoc/api_client.py example.pdf \
|
| 127 |
+
--output-dir ./output \
|
| 128 |
+
--lang-in en \
|
| 129 |
+
--lang-out zh \
|
| 130 |
+
--server-url http://localhost:8000
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### cURL示例
|
| 134 |
+
```bash
|
| 135 |
+
# 1. 提交翻译任务 (使用服务器默认配置)
|
| 136 |
+
curl -X POST "http://localhost:8000/translate" \
|
| 137 |
+
-F "file=@example.pdf"
|
| 138 |
+
|
| 139 |
+
# 2. 提交翻译任务 (自定义参数)
|
| 140 |
+
curl -X POST "http://localhost:8000/translate" \
|
| 141 |
+
-F "file=@example.pdf" \
|
| 142 |
+
-F "lang_in=en" \
|
| 143 |
+
-F "lang_out=zh"
|
| 144 |
+
|
| 145 |
+
# 3. 查询任务状态
|
| 146 |
+
curl "http://localhost:8000/status/{task_id}"
|
| 147 |
+
|
| 148 |
+
# 4. 下载翻译结果
|
| 149 |
+
curl "http://localhost:8000/download/{task_id}/dual" -o translated.pdf
|
| 150 |
+
|
| 151 |
+
# 5. 获取服务器配置
|
| 152 |
+
curl "http://localhost:8000/"
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
## 部署说明
|
| 156 |
+
|
| 157 |
+
### 生产环境部署
|
| 158 |
+
```bash
|
| 159 |
+
# 使用Gunicorn部署
|
| 160 |
+
pip install gunicorn
|
| 161 |
+
gunicorn babeldoc.api_server:app -w 4 -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000
|
| 162 |
+
|
| 163 |
+
# 使用Docker部署
|
| 164 |
+
# Dockerfile示例:
|
| 165 |
+
FROM python:3.11-slim
|
| 166 |
+
WORKDIR /app
|
| 167 |
+
COPY . .
|
| 168 |
+
RUN pip install -e .
|
| 169 |
+
COPY .env /app/
|
| 170 |
+
EXPOSE 8000
|
| 171 |
+
CMD ["python", "babeldoc/run_server.py"]
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
## 配置优势
|
| 175 |
+
|
| 176 |
+
1. **安全性提升**: OpenAI API密钥只需在服务器端配置一次,客户端无需传递敏感信息
|
| 177 |
+
2. **配置集中**: 所有翻译相关配置在服务器端统一管理
|
| 178 |
+
3. **客户端简化**: 客户端只需上传文件,其他参数可选
|
| 179 |
+
4. **灵活性**: 支持配置文件和环境变量两种配置方式
|
| 180 |
+
5. **默认值**: 客户端可以使用服务器默认配置,也可以覆盖特定参数
|
| 181 |
+
|
| 182 |
+
## 注意事项
|
| 183 |
+
|
| 184 |
+
1. **环境变量优先**: 所有配置均通过环境变量设置
|
| 185 |
+
2. **API密钥安全**: 确保 `.env` 文件权限设置正确,避免泄露API密钥
|
| 186 |
+
3. **文件存储**: 翻译过程中的临时文件会自动清理
|
| 187 |
+
4. **并发限制**: 服务器会根据配置的QPS参数限制API调用频率
|
| 188 |
+
5. **超时设置**: 大文件翻译可能需要较长时间,建议适当调整客户端超时时间
|
| 189 |
+
|
| 190 |
+
## 故障排除
|
| 191 |
+
|
| 192 |
+
### 常见问题
|
| 193 |
+
1. **服务器无法启动**:
|
| 194 |
+
- 检查环境变量是否正确设置
|
| 195 |
+
- 确认 OpenAI API密钥已正确配置
|
| 196 |
+
- 检查端口是否被占用
|
| 197 |
+
2. **翻译失败**:
|
| 198 |
+
- 检查服务器日志中的错误信息
|
| 199 |
+
- 确认网络连接正常
|
| 200 |
+
- 验证OpenAI API配置是否正确
|
| 201 |
+
3. **下载失败**: 确认任务已完成且文件类型正确
|
| 202 |
+
|
| 203 |
+
### 日志查看
|
| 204 |
+
服务器运行时会输出详细日志,包括:
|
| 205 |
+
- 使用的OpenAI模型信息
|
| 206 |
+
- 默认语言配置
|
| 207 |
+
- 任务处理状态
|
| 208 |
+
- 错误详情
|
pdftranslate_web/docs/GRADIO_USAGE.md
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# BabelDOC Gradio客户端使用说明
|
| 2 |
+
|
| 3 |
+
## 概述
|
| 4 |
+
|
| 5 |
+
BabelDOC Gradio客户端提供了一个直观的Web界面,用于与BabelDOC API服务器进行交互,实现PDF文档的在线翻译。
|
| 6 |
+
|
| 7 |
+
## 功能特性
|
| 8 |
+
|
| 9 |
+
### 🎯 核心功能
|
| 10 |
+
- **PDF文件上传**: 支持拖拽或点击上传PDF文件
|
| 11 |
+
- **实时预览**: 左侧显示原始PDF预览,右侧显示翻译结果预览
|
| 12 |
+
- **在线翻译**: 调用API服务器进行文档翻译
|
| 13 |
+
- **进度监控**: 实时显示翻译进度和状态
|
| 14 |
+
- **结果下载**: 翻译完成后可直接下载结果文件
|
| 15 |
+
|
| 16 |
+
### 📋 界面布局
|
| 17 |
+
- **左侧面板**:
|
| 18 |
+
- 文件上传区域
|
| 19 |
+
- 原始PDF预览(显示前5页)
|
| 20 |
+
- 翻译选项设置
|
| 21 |
+
- 翻译按钮
|
| 22 |
+
- **右侧面板**:
|
| 23 |
+
- 翻译状态显示
|
| 24 |
+
- 翻译结果PDF预览
|
| 25 |
+
- 下载按钮
|
| 26 |
+
- **底部区域**:
|
| 27 |
+
- 任务状态查询功能
|
| 28 |
+
|
| 29 |
+
## 启动方式
|
| 30 |
+
|
| 31 |
+
### 前提条件
|
| 32 |
+
确保API服务器已启动:
|
| 33 |
+
```bash
|
| 34 |
+
# 启动API服务器
|
| 35 |
+
uv run python babeldoc/run_server.py
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
### 启动Gradio客户端
|
| 39 |
+
```bash
|
| 40 |
+
# 方法1:使用启动脚本
|
| 41 |
+
uv run python babeldoc/run_gradio.py
|
| 42 |
+
|
| 43 |
+
# 方法2:直接运行模块
|
| 44 |
+
uv run python -m babeldoc.gradio_client
|
| 45 |
+
|
| 46 |
+
# 方法3:自定义参数
|
| 47 |
+
uv run python babeldoc/run_gradio.py \
|
| 48 |
+
--server-url http://localhost:8000 \
|
| 49 |
+
--host 0.0.0.0 \
|
| 50 |
+
--port 7860 \
|
| 51 |
+
--share
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
### 命令行参数
|
| 55 |
+
- `--server-url`: API服务器地址 (默认: http://localhost:8000)
|
| 56 |
+
- `--host`: Gradio服务器主机 (默认: 0.0.0.0)
|
| 57 |
+
- `--port`: Gradio服务器端口 (默认: 7860)
|
| 58 |
+
- `--share`: 创建公共分享链接
|
| 59 |
+
|
| 60 |
+
## 使用流程
|
| 61 |
+
|
| 62 |
+
### 1. 检查服务器状态
|
| 63 |
+
启动后界面顶部会显示API服务器的连接状态和配置信息:
|
| 64 |
+
- ✅ 服务器在线:显示模型、默认语言等配置
|
| 65 |
+
- ❌ 服务器离线:显示连接错误信息
|
| 66 |
+
|
| 67 |
+
### 2. 上传PDF文件
|
| 68 |
+
- 点击"选择PDF文件"区域或拖拽PDF文件
|
| 69 |
+
- 上传后左侧会显示PDF预览(前5页)
|
| 70 |
+
- 支持的文件格式:.pdf
|
| 71 |
+
|
| 72 |
+
### 3. 设置翻译选项
|
| 73 |
+
- **源语言**: 留空使用服务器默认配置,或输入语言代码(如:en)
|
| 74 |
+
- **目标语言**: 留空使用服务器默认配置,或输入语言代码(如:zh)
|
| 75 |
+
- **输出类型**:
|
| 76 |
+
- `dual`: 双语对照版本
|
| 77 |
+
- `mono`: 纯翻译版本
|
| 78 |
+
|
| 79 |
+
### 4. 开始翻译
|
| 80 |
+
- 点击"🚀 开始翻译"按钮
|
| 81 |
+
- 界面会显示实时翻译进度
|
| 82 |
+
- 翻译过程包括:提交任务 → 处理中 → 完成下载
|
| 83 |
+
|
| 84 |
+
### 5. 查看结果
|
| 85 |
+
- 翻译完成后右侧显示结果PDF预览
|
| 86 |
+
- 点击"下载翻译结果"按钮下载完整文件
|
| 87 |
+
- 可以查看任务ID和状态信息
|
| 88 |
+
|
| 89 |
+
### 6. 任务状态查询
|
| 90 |
+
在底部"任务状态查询"区域:
|
| 91 |
+
- 输入任务ID可查询任务状态
|
| 92 |
+
- 显示进度、消息和结果文件信息
|
| 93 |
+
|
| 94 |
+
## 界面截图说明
|
| 95 |
+
|
| 96 |
+
```
|
| 97 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 98 |
+
│ BabelDOC PDF翻译工具 │
|
| 99 |
+
│ ✅ 服务器在线 - 模型: deepseek-ai/DeepSeek-V3 │
|
| 100 |
+
├─────────────────────┬───────────────────────────────────────┤
|
| 101 |
+
│ 📁 文件上传 │ 📊 翻译状态 │
|
| 102 |
+
│ [选择PDF文件] │ 等待上传文件... │
|
| 103 |
+
│ │ │
|
| 104 |
+
│ 📄 原始PDF预览 │ 📑 翻译结果预览 │
|
| 105 |
+
│ ┌─────────────────┐ │ ┌─────────────────────────────────┐ │
|
| 106 |
+
│ │ 页面1 │ │ │ 翻译结果页面 │ │
|
| 107 |
+
│ │ 页面2 │ │ │ (翻译完成后显示) │ │
|
| 108 |
+
│ │ ... │ │ │ │ │
|
| 109 |
+
│ └─────────────────┘ │ └─────────────────────────────────┘ │
|
| 110 |
+
│ │ │
|
| 111 |
+
│ ⚙️ 翻译选项 │ [下载翻译结果] │
|
| 112 |
+
│ 源语言: [ ] │ │
|
| 113 |
+
│ 目标语言: [ ] │ 任务信息: task_12345678 │
|
| 114 |
+
│ 输出类型: dual │ │
|
| 115 |
+
│ │ │
|
| 116 |
+
│ [🚀 开始翻译] │ │
|
| 117 |
+
├─────────────────────┴───────────────────────────────────────┤
|
| 118 |
+
│ 🔍 任务状态查询 │
|
| 119 |
+
│ 任务ID: [ ] [查询] │
|
| 120 |
+
│ 查询结果显示区域 │
|
| 121 |
+
└─────────────────────────────────────────────────────────────┘
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
## 技术特性
|
| 125 |
+
|
| 126 |
+
### PDF预览功能
|
| 127 |
+
- 使用PyMuPDF将PDF页面转换为图片
|
| 128 |
+
- 显示前5页预览,避免加载时间过长
|
| 129 |
+
- 自动调整图片大小适配界面
|
| 130 |
+
|
| 131 |
+
### 实时进度监控
|
| 132 |
+
- 通过API客户端轮询任务状态
|
| 133 |
+
- 显示详细的处理阶段和进度百分比
|
| 134 |
+
- 支持任务超时处理(1小时)
|
| 135 |
+
|
| 136 |
+
### 文件管理
|
| 137 |
+
- 自动创建临时目录存储上传和下载的文件
|
| 138 |
+
- 智能文件命名避免冲突
|
| 139 |
+
- 支持多种文件类型的下载
|
| 140 |
+
|
| 141 |
+
## 常见问题
|
| 142 |
+
|
| 143 |
+
### 1. 界面显示"服务器离线"
|
| 144 |
+
- 确认API服务器已启动并运行在正确端口
|
| 145 |
+
- 检查`--server-url`参数是否正确
|
| 146 |
+
- 确认网络连接正常
|
| 147 |
+
|
| 148 |
+
### 2. PDF预览不显示
|
| 149 |
+
- 确认上传的是有效的PDF文件
|
| 150 |
+
- 检查PDF文件是否已损坏
|
| 151 |
+
- 尝试重新上传文件
|
| 152 |
+
|
| 153 |
+
### 3. 翻译任务失败
|
| 154 |
+
- 查看右侧状态区域的错误信息
|
| 155 |
+
- 确认API服务器的OpenAI配置正确
|
| 156 |
+
- 检查网络连接和API密钥
|
| 157 |
+
|
| 158 |
+
### 4. 下载失败
|
| 159 |
+
- 确认翻译任务已完成
|
| 160 |
+
- 检查浏览器的下载设置
|
| 161 |
+
- 尝试刷新页面重新下载
|
| 162 |
+
|
| 163 |
+
## 部署建议
|
| 164 |
+
|
| 165 |
+
### 开发环境
|
| 166 |
+
```bash
|
| 167 |
+
# 同时启动API服务器和Gradio客户端
|
| 168 |
+
# 终端1
|
| 169 |
+
uv run python babeldoc/run_server.py
|
| 170 |
+
|
| 171 |
+
# 终端2
|
| 172 |
+
uv run python babeldoc/run_gradio.py
|
| 173 |
+
```
|
| 174 |
+
|
| 175 |
+
### 生产环境
|
| 176 |
+
```bash
|
| 177 |
+
# 使用Docker Compose部署
|
| 178 |
+
# docker-compose.yml 示例:
|
| 179 |
+
version: '3.8'
|
| 180 |
+
services:
|
| 181 |
+
api-server:
|
| 182 |
+
build: .
|
| 183 |
+
command: python babeldoc/run_server.py
|
| 184 |
+
ports:
|
| 185 |
+
- "8000:8000"
|
| 186 |
+
volumes:
|
| 187 |
+
- ./.env:/app/.env
|
| 188 |
+
|
| 189 |
+
gradio-client:
|
| 190 |
+
build: .
|
| 191 |
+
command: python babeldoc/run_gradio.py --server-url http://api-server:8000
|
| 192 |
+
ports:
|
| 193 |
+
- "7860:7860"
|
| 194 |
+
depends_on:
|
| 195 |
+
- api-server
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
## 性能优化
|
| 199 |
+
|
| 200 |
+
1. **预览优化**: 只显示前5页预览,减少加载时间
|
| 201 |
+
2. **异步处理**: 使用Gradio的Progress功能显示实时进度
|
| 202 |
+
3. **缓存机制**: 复用API客户端连接
|
| 203 |
+
4. **文件管理**: 智能清理临时文件
|
| 204 |
+
|
| 205 |
+
通过这个Gradio客户端,用户可以享受到直观、友好的PDF翻译体验!
|
pdftranslate_web/pdftranslate-mcp-server/.dockerignore
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git相关
|
| 2 |
+
.git
|
| 3 |
+
.gitignore
|
| 4 |
+
.gitattributes
|
| 5 |
+
|
| 6 |
+
# Python相关
|
| 7 |
+
__pycache__/
|
| 8 |
+
*.py[cod]
|
| 9 |
+
*$py.class
|
| 10 |
+
*.so
|
| 11 |
+
.Python
|
| 12 |
+
build/
|
| 13 |
+
develop-eggs/
|
| 14 |
+
dist/
|
| 15 |
+
downloads/
|
| 16 |
+
eggs/
|
| 17 |
+
.eggs/
|
| 18 |
+
lib/
|
| 19 |
+
lib64/
|
| 20 |
+
parts/
|
| 21 |
+
sdist/
|
| 22 |
+
var/
|
| 23 |
+
wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# 虚拟环境
|
| 30 |
+
venv/
|
| 31 |
+
env/
|
| 32 |
+
ENV/
|
| 33 |
+
.venv/
|
| 34 |
+
.env/
|
| 35 |
+
|
| 36 |
+
# IDE相关
|
| 37 |
+
.vscode/
|
| 38 |
+
.idea/
|
| 39 |
+
*.swp
|
| 40 |
+
*.swo
|
| 41 |
+
*~
|
| 42 |
+
|
| 43 |
+
# 测试相关
|
| 44 |
+
.pytest_cache/
|
| 45 |
+
.coverage
|
| 46 |
+
htmlcov/
|
| 47 |
+
.tox/
|
| 48 |
+
.cache
|
| 49 |
+
nosetests.xml
|
| 50 |
+
coverage.xml
|
| 51 |
+
*.cover
|
| 52 |
+
.hypothesis/
|
| 53 |
+
|
| 54 |
+
# 日志文件
|
| 55 |
+
*.log
|
| 56 |
+
logs/
|
| 57 |
+
|
| 58 |
+
# 临时文件
|
| 59 |
+
temp/
|
| 60 |
+
tmp/
|
| 61 |
+
*.tmp
|
| 62 |
+
*.temp
|
| 63 |
+
|
| 64 |
+
# 操作系统相关
|
| 65 |
+
.DS_Store
|
| 66 |
+
Thumbs.db
|
| 67 |
+
|
| 68 |
+
# Docker相关
|
| 69 |
+
Dockerfile*
|
| 70 |
+
docker-compose*.yml
|
| 71 |
+
.dockerignore
|
| 72 |
+
|
| 73 |
+
# 文档相关
|
| 74 |
+
docs/
|
| 75 |
+
*.md
|
| 76 |
+
!README.md
|
| 77 |
+
|
| 78 |
+
# 配置文件(保留示例文件)
|
| 79 |
+
.env
|
| 80 |
+
!.env.example
|
| 81 |
+
|
| 82 |
+
# 上传和下载目录
|
| 83 |
+
uploads/
|
| 84 |
+
downloads/
|
| 85 |
+
output/
|
pdftranslate_web/pdftranslate-mcp-server/.env.docker
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Docker环境配置文件
|
| 2 |
+
# 复制此文件为 .env 并填入实际的配置值
|
| 3 |
+
|
| 4 |
+
# ================================
|
| 5 |
+
# OpenAI 配置 (必填)
|
| 6 |
+
# ================================
|
| 7 |
+
OPENAI_API_KEY=your-api-key-here
|
| 8 |
+
OPENAI_MODEL=deepseek-ai/DeepSeek-V3
|
| 9 |
+
OPENAI_BASE_URL=https://api.siliconflow.cn/v1
|
| 10 |
+
|
| 11 |
+
# ================================
|
| 12 |
+
# MCP服务器配置
|
| 13 |
+
# ================================
|
| 14 |
+
MCP_HOST=0.0.0.0
|
| 15 |
+
MCP_PORT=8006
|
| 16 |
+
|
| 17 |
+
# ================================
|
| 18 |
+
# 翻译配置
|
| 19 |
+
# ================================
|
| 20 |
+
DEFAULT_LANG_IN=en
|
| 21 |
+
DEFAULT_LANG_OUT=zh
|
| 22 |
+
QPS=4
|
| 23 |
+
WATERMARK_OUTPUT_MODE=no_watermark
|
| 24 |
+
NO_DUAL=false
|
| 25 |
+
NO_MONO=false
|
| 26 |
+
|
| 27 |
+
# ================================
|
| 28 |
+
# 腾讯云COS配置 (可选,用于文件上传)
|
| 29 |
+
# ================================
|
| 30 |
+
COS_REGION=
|
| 31 |
+
COS_SECRET_ID=
|
| 32 |
+
COS_SECRET_KEY=
|
| 33 |
+
COS_BUCKET=
|
| 34 |
+
|
| 35 |
+
# ================================
|
| 36 |
+
# 日志配置
|
| 37 |
+
# ================================
|
| 38 |
+
LOG_LEVEL=INFO
|
pdftranslate_web/pdftranslate-mcp-server/Dockerfile
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 使用官方Python运行时作为基础镜像
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# 设置标签信息
|
| 5 |
+
LABEL maintainer="wwwzhouhui <75271002@qq.com>" \
|
| 6 |
+
version="1.0.0" \
|
| 7 |
+
description="PDFTranslate MCP Server - 基于BabelDOC的PDF文档翻译服务" \
|
| 8 |
+
org.opencontainers.image.source="https://github.com/wwwzhouhui/pdftranslate_web"
|
| 9 |
+
|
| 10 |
+
# 设置工作目录
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
# 设置环境变量
|
| 14 |
+
ENV PYTHONPATH=/app \
|
| 15 |
+
PYTHONUNBUFFERED=1 \
|
| 16 |
+
PIP_NO_CACHE_DIR=1 \
|
| 17 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 18 |
+
# MCP服务器配置
|
| 19 |
+
MCP_HOST=0.0.0.0 \
|
| 20 |
+
MCP_PORT=8006 \
|
| 21 |
+
# 翻译默认配置
|
| 22 |
+
DEFAULT_LANG_IN=en \
|
| 23 |
+
DEFAULT_LANG_OUT=zh \
|
| 24 |
+
QPS=4 \
|
| 25 |
+
WATERMARK_OUTPUT_MODE=no_watermark \
|
| 26 |
+
NO_DUAL=false \
|
| 27 |
+
NO_MONO=false \
|
| 28 |
+
# 日志配置
|
| 29 |
+
LOG_LEVEL=INFO
|
| 30 |
+
|
| 31 |
+
# 安装运行时系统依赖
|
| 32 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 33 |
+
curl \
|
| 34 |
+
libgl1-mesa-glx \
|
| 35 |
+
libglib2.0-0 \
|
| 36 |
+
libsm6 \
|
| 37 |
+
libxext6 \
|
| 38 |
+
libxrender-dev \
|
| 39 |
+
libgomp1 \
|
| 40 |
+
&& rm -rf /var/lib/apt/lists/* \
|
| 41 |
+
&& apt-get clean
|
| 42 |
+
|
| 43 |
+
# 复制MCP服务器项目文件
|
| 44 |
+
COPY pyproject.toml /app/
|
| 45 |
+
COPY main.py /app/
|
| 46 |
+
COPY config.ini /app/
|
| 47 |
+
COPY .env.example /app/
|
| 48 |
+
COPY README.md /app/
|
| 49 |
+
|
| 50 |
+
# 安装Python依赖
|
| 51 |
+
RUN pip install --upgrade pip && \
|
| 52 |
+
pip install . && \
|
| 53 |
+
pip install cos-python-sdk-v5
|
| 54 |
+
|
| 55 |
+
# 创建非root用户和home目录
|
| 56 |
+
RUN groupadd -r mcpuser && useradd -r -g mcpuser -m mcpuser
|
| 57 |
+
|
| 58 |
+
# 创建必要的目录并设置权限
|
| 59 |
+
RUN mkdir -p /app/temp /app/uploads /app/downloads /app/logs \
|
| 60 |
+
/home/mcpuser/.cache/babeldoc/tiktoken \
|
| 61 |
+
&& chown -R mcpuser:mcpuser /app /home/mcpuser
|
| 62 |
+
|
| 63 |
+
# 切换到非root用户
|
| 64 |
+
USER mcpuser
|
| 65 |
+
|
| 66 |
+
# 暴露MCP服务器端口
|
| 67 |
+
EXPOSE 8006
|
| 68 |
+
|
| 69 |
+
# 健康检查
|
| 70 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
| 71 |
+
CMD curl -f http://localhost:${MCP_PORT:-8006}/sse || exit 1
|
| 72 |
+
|
| 73 |
+
# 启动MCP服务器
|
| 74 |
+
CMD ["python", "main.py"]
|
pdftranslate_web/pdftranslate-mcp-server/PRD.MD
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PDF翻译MCP服务器产品需求文档 (PRD)
|
| 2 |
+
|
| 3 |
+
## 1. 项目概述
|
| 4 |
+
|
| 5 |
+
### 1.1 项目名称
|
| 6 |
+
PDFTranslate MCP Server - PDF文档翻译模型上下文协议服务器
|
| 7 |
+
|
| 8 |
+
### 1.2 项目背景
|
| 9 |
+
基于现有的PDF翻译Web应用(`src/pdftranslate_web/api_server.py`),开发一个MCP服务器,使AI助手能够通过模型上下文协议直接调用PDF翻译功能,提供标准化的PDF文档翻译服务接口。
|
| 10 |
+
|
| 11 |
+
### 1.3 项目目标
|
| 12 |
+
- 提供基于MCP协议的PDF翻译服务
|
| 13 |
+
- 支持多种语言间的PDF文档翻译
|
| 14 |
+
- 提供异步翻译任务管理
|
| 15 |
+
- 兼容Cherry Studio、Cursor、Cline、Dify、FastGPT、N8N等MCP客户端
|
| 16 |
+
|
| 17 |
+
## 2. 功能需求
|
| 18 |
+
|
| 19 |
+
### 2.1 核心功能
|
| 20 |
+
1. **PDF翻译工具 (translate_pdf)**
|
| 21 |
+
- 上传PDF文件进行翻译
|
| 22 |
+
- 支持指定源语言和目标语言
|
| 23 |
+
- 支持自定义QPS(每秒查询数)
|
| 24 |
+
- 支持双语对照和单语翻译模式选择
|
| 25 |
+
- 支持水印输出模式配置
|
| 26 |
+
|
| 27 |
+
2. **翻译状态查询工具 (get_translation_status)**
|
| 28 |
+
- 查询翻译任务状态
|
| 29 |
+
- 获取翻译进度信息
|
| 30 |
+
- 查看翻译结果文件列表
|
| 31 |
+
|
| 32 |
+
3. **翻译结果下载工具 (download_translation_result)**
|
| 33 |
+
- 下载翻译完成的PDF文件
|
| 34 |
+
- 支持下载双语对照版本
|
| 35 |
+
- 支持下载单语翻译版本
|
| 36 |
+
|
| 37 |
+
### 2.2 资源提供
|
| 38 |
+
1. **配置信息资源 (config://)**
|
| 39 |
+
- 当前服务器配置信息
|
| 40 |
+
- 支持的语言列表
|
| 41 |
+
- 默认翻译参数
|
| 42 |
+
|
| 43 |
+
2. **任务列表资源 (tasks://)**
|
| 44 |
+
- 所有翻译任务列表
|
| 45 |
+
- 任务状态概览
|
| 46 |
+
|
| 47 |
+
## 3. 技术规格
|
| 48 |
+
|
| 49 |
+
### 3.1 技术栈
|
| 50 |
+
- **MCP SDK**: 基于`mcp[cli]` Python SDK
|
| 51 |
+
- **翻译引擎**: BabelDOC翻译库
|
| 52 |
+
- **AI模型**: OpenAI兼容API (支持DeepSeek等)
|
| 53 |
+
- **异步处理**: 基于Python asyncio
|
| 54 |
+
- **文件处理**: 临时文件管理和清理
|
| 55 |
+
|
| 56 |
+
### 3.2 传输方式
|
| 57 |
+
- **主要**: SSE (Server-Sent Events) - 适用于云部署
|
| 58 |
+
- **备选**: STDIO - 适用于本地开发
|
| 59 |
+
|
| 60 |
+
### 3.3 配置管理
|
| 61 |
+
通过环境变量进行配置:
|
| 62 |
+
- `OPENAI_API_KEY`: AI模型API密钥
|
| 63 |
+
- `OPENAI_MODEL`: 使用的AI模型
|
| 64 |
+
- `OPENAI_BASE_URL`: API基础URL
|
| 65 |
+
- `DEFAULT_LANG_IN`: 默认源语言
|
| 66 |
+
- `DEFAULT_LANG_OUT`: 默认目标语言
|
| 67 |
+
- `QPS`: 默认查询每秒数
|
| 68 |
+
- `MCP_HOST`: MCP服务器监听地址
|
| 69 |
+
- `MCP_PORT`: MCP服务器监听端口
|
| 70 |
+
|
| 71 |
+
## 4. 接口设计
|
| 72 |
+
|
| 73 |
+
### 4.1 工具接口
|
| 74 |
+
|
| 75 |
+
#### translate_pdf
|
| 76 |
+
```python
|
| 77 |
+
@mcp.tool()
|
| 78 |
+
def translate_pdf(
|
| 79 |
+
file_path: str,
|
| 80 |
+
lang_in: str = "en",
|
| 81 |
+
lang_out: str = "zh",
|
| 82 |
+
qps: int = 4,
|
| 83 |
+
no_dual: bool = False,
|
| 84 |
+
no_mono: bool = False,
|
| 85 |
+
watermark_output_mode: str = "no_watermark"
|
| 86 |
+
) -> dict:
|
| 87 |
+
"""
|
| 88 |
+
翻译PDF文档
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
file_path: PDF文件路径
|
| 92 |
+
lang_in: 源语言代码
|
| 93 |
+
lang_out: 目标语言代码
|
| 94 |
+
qps: 每秒查询数限制
|
| 95 |
+
no_dual: 是否禁用双语对照版本
|
| 96 |
+
no_mono: 是否禁用单语翻译版本
|
| 97 |
+
watermark_output_mode: 水印模式 (no_watermark/watermarked/both)
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
dict: {"task_id": str, "message": str}
|
| 101 |
+
"""
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
#### get_translation_status
|
| 105 |
+
```python
|
| 106 |
+
@mcp.tool()
|
| 107 |
+
def get_translation_status(task_id: str) -> dict:
|
| 108 |
+
"""
|
| 109 |
+
查询翻译任务状态
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
task_id: 翻译任务ID
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
dict: {
|
| 116 |
+
"task_id": str,
|
| 117 |
+
"status": str, # pending/processing/completed/failed
|
| 118 |
+
"progress": float, # 0.0-100.0
|
| 119 |
+
"message": str,
|
| 120 |
+
"result_files": dict
|
| 121 |
+
}
|
| 122 |
+
"""
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
#### download_translation_result
|
| 126 |
+
```python
|
| 127 |
+
@mcp.tool()
|
| 128 |
+
def download_translation_result(
|
| 129 |
+
task_id: str,
|
| 130 |
+
file_type: str = "dual"
|
| 131 |
+
) -> str:
|
| 132 |
+
"""
|
| 133 |
+
下载翻译结果文件
|
| 134 |
+
|
| 135 |
+
Args:
|
| 136 |
+
task_id: 翻译任务ID
|
| 137 |
+
file_type: 文件类型 (dual/mono)
|
| 138 |
+
|
| 139 |
+
Returns:
|
| 140 |
+
str: 下载链接或文件路径
|
| 141 |
+
"""
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
### 4.2 资源接口
|
| 145 |
+
|
| 146 |
+
#### config://
|
| 147 |
+
```python
|
| 148 |
+
@mcp.resource("config://")
|
| 149 |
+
def get_config() -> str:
|
| 150 |
+
"""返回当前配置信息"""
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
#### tasks://
|
| 154 |
+
```python
|
| 155 |
+
@mcp.resource("tasks://")
|
| 156 |
+
def get_all_tasks() -> str:
|
| 157 |
+
"""返回所有翻译任务状态"""
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
## 5. 部署要求
|
| 161 |
+
|
| 162 |
+
### 5.1 环境要求
|
| 163 |
+
- Python 3.12+
|
| 164 |
+
- UV包管理器
|
| 165 |
+
- 足够的磁盘空间存储临时文件
|
| 166 |
+
- 网络访问权限(调用AI API)
|
| 167 |
+
|
| 168 |
+
### 5.2 部署方式
|
| 169 |
+
1. **本地开发部署**
|
| 170 |
+
- STDIO传输方式
|
| 171 |
+
- 直接通过UV运行
|
| 172 |
+
|
| 173 |
+
2. **云服务器部署**
|
| 174 |
+
- SSE传输方式
|
| 175 |
+
- 配置外部访问端口
|
| 176 |
+
- 建议使用反向代理
|
| 177 |
+
|
| 178 |
+
### 5.3 安全考虑
|
| 179 |
+
- API密钥安全存储
|
| 180 |
+
- 文件上传大小限制
|
| 181 |
+
- 临时文件定期清理
|
| 182 |
+
- 访问频率限制
|
| 183 |
+
|
| 184 |
+
## 6. 客户端兼容性
|
| 185 |
+
|
| 186 |
+
### 6.1 已测试兼容
|
| 187 |
+
- Cherry Studio
|
| 188 |
+
- Dify
|
| 189 |
+
- N8N
|
| 190 |
+
|
| 191 |
+
### 6.2 理论兼容
|
| 192 |
+
- Cursor
|
| 193 |
+
- Cline
|
| 194 |
+
- FastGPT
|
| 195 |
+
- 其他支持MCP协议的客户端
|
| 196 |
+
|
| 197 |
+
## 7. 性能指标
|
| 198 |
+
|
| 199 |
+
### 7.1 翻译性能
|
| 200 |
+
- 支持自定义QPS限制
|
| 201 |
+
- 异步处理避免阻塞
|
| 202 |
+
- 进度实时反馈
|
| 203 |
+
|
| 204 |
+
### 7.2 系统性能
|
| 205 |
+
- 内存使用: 根据PDF大小动态调整
|
| 206 |
+
- 磁盘使用: 临时文件自动清理
|
| 207 |
+
- 网络使用: 依赖AI API调用频率
|
| 208 |
+
|
| 209 |
+
## 8. 错误处理
|
| 210 |
+
|
| 211 |
+
### 8.1 常见错误
|
| 212 |
+
- 文件格式不支持
|
| 213 |
+
- API密钥无效
|
| 214 |
+
- 网络连接失败
|
| 215 |
+
- 磁盘空间不足
|
| 216 |
+
|
| 217 |
+
### 8.2 错误响应
|
| 218 |
+
所有错误都通过标准MCP错误格式返回,包含错误代码和详细信息。
|
| 219 |
+
|
| 220 |
+
## 9. 未来扩展
|
| 221 |
+
|
| 222 |
+
### 9.1 功能扩展
|
| 223 |
+
- 支持更多文档格式
|
| 224 |
+
- 批量翻译功能
|
| 225 |
+
- 翻译记忆库集成
|
| 226 |
+
- 自定义翻译模板
|
| 227 |
+
|
| 228 |
+
### 9.2 性能优化
|
| 229 |
+
- 缓存机制
|
| 230 |
+
- 分布式处理
|
| 231 |
+
- GPU加速支持
|
| 232 |
+
|
| 233 |
+
## 10. 验收标准
|
| 234 |
+
|
| 235 |
+
### 10.1 功能验收
|
| 236 |
+
- [ ] 成功翻译PDF文档
|
| 237 |
+
- [ ] 正确返回任务状态
|
| 238 |
+
- [ ] 文件下载功能正常
|
| 239 |
+
- [ ] 配置信息获取正确
|
| 240 |
+
|
| 241 |
+
### 10.2 兼容性验收
|
| 242 |
+
- [ ] Cherry Studio客户端调用成功
|
| 243 |
+
- [ ] Dify平台集成正常
|
| 244 |
+
- [ ] N8N工作流调用成功
|
| 245 |
+
|
| 246 |
+
### 10.3 性能验收
|
| 247 |
+
- [ ] 单个文档翻译时间在合理范围内
|
| 248 |
+
- [ ] 并发处理不影响系统稳定性
|
| 249 |
+
- [ ] 内存和磁盘使用控制在预期范围内
|
| 250 |
+
|
| 251 |
+
---
|
| 252 |
+
|
| 253 |
+
**文档版本**: v1.0
|
| 254 |
+
**创建日期**: 2025-07-28
|
| 255 |
+
**最后更新**: 2025-07-28
|
pdftranslate_web/pdftranslate-mcp-server/README.md
ADDED
|
@@ -0,0 +1,771 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PDFTranslate MCP Server
|
| 2 |
+
|
| 3 |
+
基于BabelDOC的PDF文档翻译模型上下文协议(MCP)服务器,为AI助手提供标准化的PDF翻译功能。
|
| 4 |
+
|
| 5 |
+
## 功能特性
|
| 6 |
+
|
| 7 |
+
- 🚀 **多语言PDF翻译**: 支持12+种语言间的PDF文档翻译
|
| 8 |
+
- 🔄 **异步任务处理**: 非阻塞的翻译任务管理
|
| 9 |
+
- 📊 **实时进度反馈**: 翻译进度实时更新
|
| 10 |
+
- 🎯 **双模式输出**: 支持双语对照和单语翻译版本
|
| 11 |
+
- ☁️ **云存储集成**: 自动上传翻译结果到腾讯云COS,返回直接下载链接
|
| 12 |
+
- 🛠️ **MCP协议兼容**: 支持Cherry Studio、Dify、N8N等MCP客户端
|
| 13 |
+
- ⚙️ **灵活配置**: 支持自定义QPS、水印模式、COS配置等参数
|
| 14 |
+
- 🔧 **动态配置**: 支持通过MCP参数动态更新配置,无需重启服务
|
| 15 |
+
|
| 16 |
+
## 安装要求
|
| 17 |
+
|
| 18 |
+
- Python 3.12+
|
| 19 |
+
- UV包管理器 (推荐) 或 pip
|
| 20 |
+
- OpenAI兼容的API密钥
|
| 21 |
+
- 腾讯云COS SDK (可选,用于文件上传功能)
|
| 22 |
+
|
| 23 |
+
## 快速开始
|
| 24 |
+
|
| 25 |
+
### 方式一:Docker部署 (推荐)
|
| 26 |
+
|
| 27 |
+
#### 1. 克隆项目
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
git clone <repository-url>
|
| 31 |
+
cd pdftranslate-mcp-server
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
#### 2. 配置环境变量
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
# 复制环境变量模板
|
| 38 |
+
cp .env.docker .env
|
| 39 |
+
|
| 40 |
+
# 编辑 .env 文件,填入你的配置
|
| 41 |
+
nano .env
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
必须配置的环境变量:
|
| 45 |
+
```bash
|
| 46 |
+
OPENAI_API_KEY=your-api-key-here
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
#### 3. 使用Docker Compose启动 (推荐)
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
# 构建并启动服务
|
| 53 |
+
docker-compose up -d
|
| 54 |
+
|
| 55 |
+
# 查看日志
|
| 56 |
+
docker-compose logs -f
|
| 57 |
+
|
| 58 |
+
# 停止服务
|
| 59 |
+
docker-compose down
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
#### 4. 或者使用Docker命令
|
| 63 |
+
|
| 64 |
+
```bash
|
| 65 |
+
# 构建镜像
|
| 66 |
+
docker build -t pdftranslate-mcp-server .
|
| 67 |
+
|
| 68 |
+
# 运行容器
|
| 69 |
+
docker run -d \
|
| 70 |
+
--name pdftranslate-mcp-server \
|
| 71 |
+
-p 8006:8006 \
|
| 72 |
+
-e OPENAI_API_KEY=your-api-key-here \
|
| 73 |
+
-e OPENAI_MODEL=deepseek-ai/DeepSeek-V3 \
|
| 74 |
+
-e OPENAI_BASE_URL=https://api.siliconflow.cn/v1 \
|
| 75 |
+
-v $(pwd)/logs:/app/logs \
|
| 76 |
+
-v $(pwd)/temp:/app/temp \
|
| 77 |
+
pdftranslate-mcp-server
|
| 78 |
+
|
| 79 |
+
# 查看日志
|
| 80 |
+
docker logs -f pdftranslate-mcp-server
|
| 81 |
+
|
| 82 |
+
# 停止容器
|
| 83 |
+
docker stop pdftranslate-mcp-server
|
| 84 |
+
docker rm pdftranslate-mcp-server
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
### 方式二:本地安装
|
| 88 |
+
|
| 89 |
+
#### 1. 克隆项目
|
| 90 |
+
|
| 91 |
+
```bash
|
| 92 |
+
git clone <repository-url>
|
| 93 |
+
cd pdftranslate-mcp-server
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
#### 2. 安装依赖
|
| 97 |
+
|
| 98 |
+
使用UV (推荐):
|
| 99 |
+
```bash
|
| 100 |
+
uv venv
|
| 101 |
+
uv pip install .
|
| 102 |
+
# 安装COS SDK (可选,用于文件上传功能)
|
| 103 |
+
uv pip install cos-python-sdk-v5
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
或使用pip:
|
| 107 |
+
```bash
|
| 108 |
+
pip install -r requirements.txt
|
| 109 |
+
# 安装COS SDK (可选,用于文件上传功能)
|
| 110 |
+
pip install cos-python-sdk-v5
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
#### 3. 安装BabelDOC
|
| 114 |
+
|
| 115 |
+
```bash
|
| 116 |
+
# 使用UV
|
| 117 |
+
uv pip install babeldoc
|
| 118 |
+
|
| 119 |
+
# 或使用pip
|
| 120 |
+
pip install babeldoc
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
#### 4. 配置环境变量
|
| 124 |
+
|
| 125 |
+
复制环境变量模板:
|
| 126 |
+
```bash
|
| 127 |
+
cp .env.example .env
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
编辑 `.env` 文件,填入你的配置:
|
| 131 |
+
```env
|
| 132 |
+
# OpenAI API配置
|
| 133 |
+
OPENAI_API_KEY=your_api_key_here
|
| 134 |
+
OPENAI_MODEL=deepseek-ai/DeepSeek-V3
|
| 135 |
+
OPENAI_BASE_URL=https://api.siliconflow.cn/v1
|
| 136 |
+
|
| 137 |
+
# 翻译配置
|
| 138 |
+
DEFAULT_LANG_IN=en
|
| 139 |
+
DEFAULT_LANG_OUT=zh
|
| 140 |
+
QPS=4
|
| 141 |
+
WATERMARK_OUTPUT_MODE=no_watermark
|
| 142 |
+
NO_DUAL=false
|
| 143 |
+
NO_MONO=false
|
| 144 |
+
|
| 145 |
+
# 服务器配置
|
| 146 |
+
MCP_HOST=0.0.0.0
|
| 147 |
+
MCP_PORT=8003
|
| 148 |
+
|
| 149 |
+
# 腾讯云COS配置 (可选,用于文件上传功能)
|
| 150 |
+
COS_REGION=ap-nanjing
|
| 151 |
+
COS_SECRET_ID=your_cos_secret_id
|
| 152 |
+
COS_SECRET_KEY=your_cos_secret_key
|
| 153 |
+
COS_BUCKET=your_cos_bucket
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
### 5. 配置文件设置 (可选)
|
| 157 |
+
|
| 158 |
+
除了环境变量,您也可以通过 `config.ini` 文件配置COS参数:
|
| 159 |
+
|
| 160 |
+
```ini
|
| 161 |
+
[common]
|
| 162 |
+
cos_region = ap-nanjing
|
| 163 |
+
cos_secret_id = your_cos_secret_id
|
| 164 |
+
cos_secret_key = your_cos_secret_key
|
| 165 |
+
cos_bucket = your_cos_bucket
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
> **注意**: 环境变量的优先级高于config.ini配置
|
| 169 |
+
|
| 170 |
+
### 6. 启动服务器
|
| 171 |
+
|
| 172 |
+
```bash
|
| 173 |
+
# 使用UV
|
| 174 |
+
uv run main.py
|
| 175 |
+
|
| 176 |
+
# 或直接运行
|
| 177 |
+
python main.py
|
| 178 |
+
```
|
| 179 |
+
|
| 180 |
+
服务器将在 `http://0.0.0.0:8006/sse` 启动(SSE模式)。
|
| 181 |
+
|
| 182 |
+
**注意**: 默认使用SSE传输方式,适合云部署和远程客户端访问。如果需要STDIO模式(适合本地开发),请修改main.py中的`mcp.run(transport="stdio")`。
|
| 183 |
+
|
| 184 |
+
## Docker镜像构建详解
|
| 185 |
+
|
| 186 |
+
### 镜像特性
|
| 187 |
+
|
| 188 |
+
- **多阶段构建**:优化镜像大小,减少最终镜像体积
|
| 189 |
+
- **非root用户**:使用专用的`mcpuser`用户运行服务,提高安全性
|
| 190 |
+
- **环境变量配置**:支持通过环境变量动态配置所有参数
|
| 191 |
+
- **健康检查**:内置健康检查机制,确保服务正常运行
|
| 192 |
+
- **数据持久化**:支持挂载卷保存日志、临时文件等
|
| 193 |
+
|
| 194 |
+
### 环境变量配置
|
| 195 |
+
|
| 196 |
+
| 环境变量 | 默认值 | 说明 |
|
| 197 |
+
|---------|--------|------|
|
| 198 |
+
| `OPENAI_API_KEY` | - | OpenAI API密钥 (必填) |
|
| 199 |
+
| `OPENAI_MODEL` | `deepseek-ai/DeepSeek-V3` | 使用的AI模型 |
|
| 200 |
+
| `OPENAI_BASE_URL` | `https://api.siliconflow.cn/v1` | API基础URL |
|
| 201 |
+
| `MCP_HOST` | `0.0.0.0` | MCP服务器监听地址 |
|
| 202 |
+
| `MCP_PORT` | `8006` | MCP服务器端口 |
|
| 203 |
+
| `DEFAULT_LANG_IN` | `en` | 默认源语言 |
|
| 204 |
+
| `DEFAULT_LANG_OUT` | `zh` | 默认目标语言 |
|
| 205 |
+
| `QPS` | `4` | 每秒查询数限制 |
|
| 206 |
+
| `WATERMARK_OUTPUT_MODE` | `no_watermark` | 水印模式 |
|
| 207 |
+
| `NO_DUAL` | `false` | 是否禁用双语版本 |
|
| 208 |
+
| `NO_MONO` | `false` | 是否禁用单语版本 |
|
| 209 |
+
| `COS_REGION` | - | 腾讯云COS地域 |
|
| 210 |
+
| `COS_SECRET_ID` | - | 腾讯云COS密钥ID |
|
| 211 |
+
| `COS_SECRET_KEY` | - | 腾讯云COS密钥Key |
|
| 212 |
+
| `COS_BUCKET` | - | 腾讯云COS存储桶 |
|
| 213 |
+
| `LOG_LEVEL` | `INFO` | 日志级别 |
|
| 214 |
+
|
| 215 |
+
### 数据卷挂载
|
| 216 |
+
|
| 217 |
+
推荐挂载以下目录:
|
| 218 |
+
|
| 219 |
+
```bash
|
| 220 |
+
# 日志目录
|
| 221 |
+
-v ./logs:/app/logs
|
| 222 |
+
|
| 223 |
+
# 临时文件目录
|
| 224 |
+
-v ./temp:/app/temp
|
| 225 |
+
|
| 226 |
+
# 上传文件目录
|
| 227 |
+
-v ./uploads:/app/uploads
|
| 228 |
+
|
| 229 |
+
# 下载文件目录
|
| 230 |
+
-v ./downloads:/app/downloads
|
| 231 |
+
|
| 232 |
+
# 配置文件 (可选)
|
| 233 |
+
-v ./config.ini:/app/config.ini:ro
|
| 234 |
+
```
|
| 235 |
+
|
| 236 |
+
### 生产环境部署
|
| 237 |
+
|
| 238 |
+
#### 使用Docker Compose (推荐)
|
| 239 |
+
|
| 240 |
+
```yaml
|
| 241 |
+
version: '3.8'
|
| 242 |
+
|
| 243 |
+
services:
|
| 244 |
+
pdftranslate-mcp:
|
| 245 |
+
image: pdftranslate-mcp-server:latest
|
| 246 |
+
container_name: pdftranslate-mcp-server
|
| 247 |
+
restart: unless-stopped
|
| 248 |
+
ports:
|
| 249 |
+
- "8006:8006"
|
| 250 |
+
environment:
|
| 251 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
| 252 |
+
- OPENAI_MODEL=${OPENAI_MODEL}
|
| 253 |
+
- OPENAI_BASE_URL=${OPENAI_BASE_URL}
|
| 254 |
+
volumes:
|
| 255 |
+
- ./logs:/app/logs
|
| 256 |
+
- ./temp:/app/temp
|
| 257 |
+
networks:
|
| 258 |
+
- pdftranslate-network
|
| 259 |
+
|
| 260 |
+
networks:
|
| 261 |
+
pdftranslate-network:
|
| 262 |
+
driver: bridge
|
| 263 |
+
```
|
| 264 |
+
|
| 265 |
+
#### 使用Docker Swarm
|
| 266 |
+
|
| 267 |
+
```bash
|
| 268 |
+
# 创建Docker Secret存储API密钥
|
| 269 |
+
echo "your-api-key" | docker secret create openai_api_key -
|
| 270 |
+
|
| 271 |
+
# 部署服务
|
| 272 |
+
docker service create \
|
| 273 |
+
--name pdftranslate-mcp \
|
| 274 |
+
--publish 8006:8006 \
|
| 275 |
+
--secret openai_api_key \
|
| 276 |
+
--env OPENAI_API_KEY_FILE=/run/secrets/openai_api_key \
|
| 277 |
+
--mount type=volume,source=pdftranslate-logs,target=/app/logs \
|
| 278 |
+
--mount type=volume,source=pdftranslate-temp,target=/app/temp \
|
| 279 |
+
pdftranslate-mcp-server:latest
|
| 280 |
+
```
|
| 281 |
+
|
| 282 |
+
### 镜像管理
|
| 283 |
+
|
| 284 |
+
```bash
|
| 285 |
+
# 构建镜像
|
| 286 |
+
docker build -t pdftranslate-mcp-server:latest .
|
| 287 |
+
|
| 288 |
+
# 构建指定版本
|
| 289 |
+
docker build -t pdftranslate-mcp-server:1.0.0 .
|
| 290 |
+
|
| 291 |
+
# 推送到镜像仓库
|
| 292 |
+
docker tag pdftranslate-mcp-server:latest your-registry/pdftranslate-mcp-server:latest
|
| 293 |
+
docker push your-registry/pdftranslate-mcp-server:latest
|
| 294 |
+
|
| 295 |
+
# 清理未使用的镜像
|
| 296 |
+
docker image prune -f
|
| 297 |
+
```
|
| 298 |
+
|
| 299 |
+
### 容器监控和维护
|
| 300 |
+
|
| 301 |
+
```bash
|
| 302 |
+
# 查看容器状态
|
| 303 |
+
docker ps
|
| 304 |
+
docker stats pdftranslate-mcp-server
|
| 305 |
+
|
| 306 |
+
# 查看容器日志
|
| 307 |
+
docker logs -f pdftranslate-mcp-server
|
| 308 |
+
docker logs --tail 100 pdftranslate-mcp-server
|
| 309 |
+
|
| 310 |
+
# 进入容器调试
|
| 311 |
+
docker exec -it pdftranslate-mcp-server /bin/bash
|
| 312 |
+
|
| 313 |
+
# 重启容器
|
| 314 |
+
docker restart pdftranslate-mcp-server
|
| 315 |
+
|
| 316 |
+
# 更新容器
|
| 317 |
+
docker pull pdftranslate-mcp-server:latest
|
| 318 |
+
docker stop pdftranslate-mcp-server
|
| 319 |
+
docker rm pdftranslate-mcp-server
|
| 320 |
+
docker run -d --name pdftranslate-mcp-server ...
|
| 321 |
+
```
|
| 322 |
+
|
| 323 |
+
## MCP工具说明
|
| 324 |
+
|
| 325 |
+
### translate_pdf
|
| 326 |
+
翻译PDF文档 - 支持多种文件输入方式
|
| 327 |
+
|
| 328 |
+
**参数:**
|
| 329 |
+
- `file_input` (str): 文件输入内容
|
| 330 |
+
- 当`input_type="base64"`时,为base64编码的文件内容
|
| 331 |
+
- 当`input_type="url"`时,为文件下载URL
|
| 332 |
+
- 当`input_type="path"`时,为本地文件路径(仅限本地开发)
|
| 333 |
+
- `input_type` (str, 可选): 输入类型,可选值: "base64", "url", "path",默认为 "base64"
|
| 334 |
+
- `filename` (str, 可选): 文件名称(用于识别和存储),默认为 "document.pdf"
|
| 335 |
+
- `lang_in` (str, 可选): 源语言代码,默认为 "en"
|
| 336 |
+
- `lang_out` (str, 可选): 目标语言代码,默认为 "zh"
|
| 337 |
+
- `qps` (int, 可选): 每秒查询数限制,默认为 4
|
| 338 |
+
- `no_dual` (bool, 可选): 是否禁用双语对照版本,默认为 False
|
| 339 |
+
- `no_mono` (bool, 可选): 是否禁用单语翻译版本,默认为 False
|
| 340 |
+
- `watermark_output_mode` (str, 可选): 水印模式,可选值: "no_watermark", "watermarked", "both"
|
| 341 |
+
|
| 342 |
+
**使用示例:**
|
| 343 |
+
|
| 344 |
+
1. **Base64文件上传(推荐用于SSE方式):**
|
| 345 |
+
```json
|
| 346 |
+
{
|
| 347 |
+
"file_input": "JVBERi0xLjQKJcOkw7zDtsOfCjIgMCBvYmoKPDwKL...",
|
| 348 |
+
"input_type": "base64",
|
| 349 |
+
"filename": "my_document.pdf",
|
| 350 |
+
"lang_in": "en",
|
| 351 |
+
"lang_out": "zh"
|
| 352 |
+
}
|
| 353 |
+
```
|
| 354 |
+
|
| 355 |
+
2. **URL文件下载:**
|
| 356 |
+
```json
|
| 357 |
+
{
|
| 358 |
+
"file_input": "https://example.com/document.pdf",
|
| 359 |
+
"input_type": "url",
|
| 360 |
+
"filename": "remote_document.pdf"
|
| 361 |
+
}
|
| 362 |
+
```
|
| 363 |
+
|
| 364 |
+
3. **本地文件路径(仅限本地开发):**
|
| 365 |
+
```json
|
| 366 |
+
{
|
| 367 |
+
"file_input": "/path/to/local/document.pdf",
|
| 368 |
+
"input_type": "path"
|
| 369 |
+
}
|
| 370 |
+
```
|
| 371 |
+
|
| 372 |
+
**返回:**
|
| 373 |
+
```json
|
| 374 |
+
{
|
| 375 |
+
"task_id": "uuid-string",
|
| 376 |
+
"message": "翻译任务已创建,正在从en翻译到zh",
|
| 377 |
+
"status": "pending",
|
| 378 |
+
"file_name": "document.pdf",
|
| 379 |
+
"file_size_mb": 2.5,
|
| 380 |
+
"input_type": "base64",
|
| 381 |
+
"settings": {
|
| 382 |
+
"source_language": "en",
|
| 383 |
+
"target_language": "zh",
|
| 384 |
+
"qps": 4,
|
| 385 |
+
"dual_output": true,
|
| 386 |
+
"mono_output": true,
|
| 387 |
+
"watermark_mode": "no_watermark"
|
| 388 |
+
}
|
| 389 |
+
}
|
| 390 |
+
```
|
| 391 |
+
|
| 392 |
+
### get_translation_status
|
| 393 |
+
查询翻译任务状态
|
| 394 |
+
|
| 395 |
+
**参数:**
|
| 396 |
+
- `task_id` (str): 翻译任务ID
|
| 397 |
+
|
| 398 |
+
**返回:**
|
| 399 |
+
```json
|
| 400 |
+
{
|
| 401 |
+
"task_id": "uuid-string",
|
| 402 |
+
"status": "processing",
|
| 403 |
+
"progress": 45.5,
|
| 404 |
+
"message": "正在翻译文档...",
|
| 405 |
+
"result_files": {},
|
| 406 |
+
"cos_urls": {},
|
| 407 |
+
"created_at": "2025-07-28T10:00:00",
|
| 408 |
+
"updated_at": "2025-07-28T10:05:00"
|
| 409 |
+
}
|
| 410 |
+
```
|
| 411 |
+
|
| 412 |
+
### get_translation_result_cos_url
|
| 413 |
+
获取翻译结果文件的COS云存储URL(推荐用于文件分发)
|
| 414 |
+
|
| 415 |
+
**参数:**
|
| 416 |
+
- `task_id` (str): 翻译任务ID
|
| 417 |
+
- `file_type` (str, 可选): 文件类型,"dual" 或 "mono",默认为 "dual"
|
| 418 |
+
|
| 419 |
+
**返回:**
|
| 420 |
+
```json
|
| 421 |
+
{
|
| 422 |
+
"success": true,
|
| 423 |
+
"file_type": "dual",
|
| 424 |
+
"cos_url": "https://your-bucket.cos.ap-nanjing.myqcloud.com/dual_document_20250128_143022.pdf",
|
| 425 |
+
"download_url": "https://your-bucket.cos.ap-nanjing.myqcloud.com/dual_document_20250128_143022.pdf",
|
| 426 |
+
"message": "成功获取dual类型的翻译结果文件COS URL"
|
| 427 |
+
}
|
| 428 |
+
```
|
| 429 |
+
|
| 430 |
+
### get_translation_result_base64
|
| 431 |
+
获取翻译结果文件的base64编码内容(推荐用于SSE方式)
|
| 432 |
+
|
| 433 |
+
**参数:**
|
| 434 |
+
- `task_id` (str): 翻译任务ID
|
| 435 |
+
- `file_type` (str, 可选): 文件类型,"dual" 或 "mono",默认为 "dual"
|
| 436 |
+
|
| 437 |
+
**返回:**
|
| 438 |
+
```json
|
| 439 |
+
{
|
| 440 |
+
"success": true,
|
| 441 |
+
"file_name": "translated_document.pdf",
|
| 442 |
+
"file_size": 1024000,
|
| 443 |
+
"file_size_mb": 1.0,
|
| 444 |
+
"file_type": "dual",
|
| 445 |
+
"base64_content": "JVBERi0xLjQKJcOkw7zDtsOfCjIgMCBvYmoKPDwKL...",
|
| 446 |
+
"data_url": "data:application/pdf;base64,JVBERi0xLjQK...",
|
| 447 |
+
"message": "成功获取dual类型的翻译结果文件base64编码"
|
| 448 |
+
}
|
| 449 |
+
```
|
| 450 |
+
|
| 451 |
+
### download_translation_result
|
| 452 |
+
获取翻译结果文件信息
|
| 453 |
+
|
| 454 |
+
**参数:**
|
| 455 |
+
- `task_id` (str): 翻译任务ID
|
| 456 |
+
- `file_type` (str, 可选): 文件类型,"dual" 或 "mono",默认为 "dual"
|
| 457 |
+
|
| 458 |
+
**返回:**
|
| 459 |
+
```json
|
| 460 |
+
{
|
| 461 |
+
"file_path": "/path/to/translated.pdf",
|
| 462 |
+
"file_name": "translated.pdf",
|
| 463 |
+
"file_size": 1024000,
|
| 464 |
+
"file_type": "dual"
|
| 465 |
+
}
|
| 466 |
+
```
|
| 467 |
+
|
| 468 |
+
### list_all_tasks
|
| 469 |
+
列出所有翻译任务
|
| 470 |
+
|
| 471 |
+
**返回:**
|
| 472 |
+
```json
|
| 473 |
+
{
|
| 474 |
+
"total_tasks": 5,
|
| 475 |
+
"tasks": [...]
|
| 476 |
+
}
|
| 477 |
+
```
|
| 478 |
+
|
| 479 |
+
### get_supported_languages
|
| 480 |
+
获取支持的语言列表
|
| 481 |
+
|
| 482 |
+
**返回:**
|
| 483 |
+
```json
|
| 484 |
+
{
|
| 485 |
+
"languages": {
|
| 486 |
+
"zh": "中文",
|
| 487 |
+
"en": "English",
|
| 488 |
+
"ja": "日本語",
|
| 489 |
+
...
|
| 490 |
+
},
|
| 491 |
+
"default_lang_in": "en",
|
| 492 |
+
"default_lang_out": "zh"
|
| 493 |
+
}
|
| 494 |
+
```
|
| 495 |
+
|
| 496 |
+
### update_cos_config
|
| 497 |
+
动态更新COS配置参数(无需重启服务)
|
| 498 |
+
|
| 499 |
+
**参数:**
|
| 500 |
+
- `cos_region` (str, 可选): COS地域,如 "ap-nanjing"
|
| 501 |
+
- `cos_secret_id` (str, 可选): COS密钥ID
|
| 502 |
+
- `cos_secret_key` (str, 可选): COS密钥Key
|
| 503 |
+
- `cos_bucket` (str, 可选): COS存储桶名称
|
| 504 |
+
|
| 505 |
+
**返回:**
|
| 506 |
+
```json
|
| 507 |
+
{
|
| 508 |
+
"success": true,
|
| 509 |
+
"message": "COS配置已更新: region, bucket",
|
| 510 |
+
"updated_fields": ["region", "bucket"],
|
| 511 |
+
"config_complete": true,
|
| 512 |
+
"cos_upload_ready": true,
|
| 513 |
+
"current_config": {
|
| 514 |
+
"region": "ap-nanjing",
|
| 515 |
+
"secret_id": "已配置",
|
| 516 |
+
"secret_key": "已配置",
|
| 517 |
+
"bucket": "my-bucket"
|
| 518 |
+
}
|
| 519 |
+
}
|
| 520 |
+
```
|
| 521 |
+
|
| 522 |
+
### check_system_status
|
| 523 |
+
检查系统状态和依赖(已增强COS支持检查)
|
| 524 |
+
|
| 525 |
+
**返回:**
|
| 526 |
+
```json
|
| 527 |
+
{
|
| 528 |
+
"service_name": "PDFTranslate MCP Server",
|
| 529 |
+
"version": "1.0.0",
|
| 530 |
+
"babeldoc_available": true,
|
| 531 |
+
"cos_available": true,
|
| 532 |
+
"api_key_configured": true,
|
| 533 |
+
"cos_configured": true,
|
| 534 |
+
"dependencies": {
|
| 535 |
+
"babeldoc": "✅ 已安装",
|
| 536 |
+
"openai_api": "✅ 已配置",
|
| 537 |
+
"cos_sdk": "✅ 已安装",
|
| 538 |
+
"cos_config": "✅ 已配置"
|
| 539 |
+
},
|
| 540 |
+
"configuration": {
|
| 541 |
+
"model": "deepseek-ai/DeepSeek-V3",
|
| 542 |
+
"cos_region": "ap-nanjing",
|
| 543 |
+
"cos_bucket": "my-bucket"
|
| 544 |
+
},
|
| 545 |
+
"ready": true,
|
| 546 |
+
"cos_upload_ready": true
|
| 547 |
+
}
|
| 548 |
+
```
|
| 549 |
+
|
| 550 |
+
## MCP资源说明
|
| 551 |
+
|
| 552 |
+
### config://
|
| 553 |
+
获取服务器配置信息
|
| 554 |
+
```json
|
| 555 |
+
{
|
| 556 |
+
"service_name": "PDFTranslate MCP Server",
|
| 557 |
+
"version": "1.0.0",
|
| 558 |
+
"babeldoc_available": true,
|
| 559 |
+
"openai_model": "deepseek-ai/DeepSeek-V3",
|
| 560 |
+
...
|
| 561 |
+
}
|
| 562 |
+
```
|
| 563 |
+
|
| 564 |
+
### tasks://
|
| 565 |
+
获取任务状态统计
|
| 566 |
+
```json
|
| 567 |
+
{
|
| 568 |
+
"total_tasks": 10,
|
| 569 |
+
"tasks_by_status": {
|
| 570 |
+
"completed": 7,
|
| 571 |
+
"processing": 2,
|
| 572 |
+
"failed": 1
|
| 573 |
+
},
|
| 574 |
+
"recent_tasks": [...]
|
| 575 |
+
}
|
| 576 |
+
```
|
| 577 |
+
|
| 578 |
+
## 客户端配置
|
| 579 |
+
|
| 580 |
+
### Cherry Studio
|
| 581 |
+
|
| 582 |
+
1. 打开Cherry Studio设置
|
| 583 |
+
2. 添加MCP服务器
|
| 584 |
+
3. 选择 **"SSE"** 传输方式
|
| 585 |
+
4. 输入服务器地址: `http://your-server:8003/sse`
|
| 586 |
+
- 本地测试: `http://localhost:8003/sse`
|
| 587 |
+
- 云服务器: `http://your-domain:8003/sse`
|
| 588 |
+
5. 保存配置
|
| 589 |
+
|
| 590 |
+
### Dify
|
| 591 |
+
|
| 592 |
+
在Dify工作流中添加MCP节点,配置SSE服务器地址:
|
| 593 |
+
- 本地: `http://localhost:8003/sse`
|
| 594 |
+
- 远程: `http://your-server:8003/sse`
|
| 595 |
+
|
| 596 |
+
### N8N
|
| 597 |
+
|
| 598 |
+
使用HTTP Request节点连接到MCP服务器的SSE端点:
|
| 599 |
+
- URL: `http://your-server:8003/sse`
|
| 600 |
+
- Method: POST
|
| 601 |
+
- Content-Type: application/json
|
| 602 |
+
|
| 603 |
+
**重要提示**:
|
| 604 |
+
- SSE方式支持base64文件上传,推荐使用`input_type="base64"`
|
| 605 |
+
- 文件大小限制为100MB
|
| 606 |
+
- 支持URL下载方式 (`input_type="url"`)
|
| 607 |
+
- 本地文件路径方式仅在服务器本地可用
|
| 608 |
+
|
| 609 |
+
## 云存储功能 (COS集成)
|
| 610 |
+
|
| 611 |
+
### 功能说明
|
| 612 |
+
|
| 613 |
+
本服务集成了腾讯云对象存储(COS),提供以下功能:
|
| 614 |
+
|
| 615 |
+
- ☁️ **自动上传**: 翻译完成后自动上传结果文件到COS
|
| 616 |
+
- 🔗 **直接访问**: 返回可直接下载的COS URL链接
|
| 617 |
+
- 📁 **文件管理**: 自动生成唯一文件名,避免冲突
|
| 618 |
+
- 🔒 **安全配置**: 支持多种配置方式,保护密钥安全
|
| 619 |
+
|
| 620 |
+
### 配置方式优先级
|
| 621 |
+
|
| 622 |
+
1. **环境变量** (最高优先级)
|
| 623 |
+
2. **config.ini文件**
|
| 624 |
+
3. **MCP动态配置**
|
| 625 |
+
|
| 626 |
+
### 使用流程
|
| 627 |
+
|
| 628 |
+
1. **配置COS参数** - 通过环境变量、config.ini或MCP工具配置
|
| 629 |
+
2. **翻译文件** - 正常使用`translate_pdf()`功能
|
| 630 |
+
3. **自动上传** - 翻译完成后自动上传到COS
|
| 631 |
+
4. **获取URL** - 使用`get_translation_result_cos_url()`获取下载链接
|
| 632 |
+
5. **直接下载** - 用户通过URL直接下载文件
|
| 633 |
+
|
| 634 |
+
### 文件命名规则
|
| 635 |
+
|
| 636 |
+
上传到COS的文件会自动添加时间戳前缀:
|
| 637 |
+
- 双语版本: `dual_原文件名_20250128_143022.pdf`
|
| 638 |
+
- 单语版本: `mono_原文件名_20250128_143022.pdf`
|
| 639 |
+
|
| 640 |
+
### 故障处理
|
| 641 |
+
|
| 642 |
+
如果COS上传失败,系统会:
|
| 643 |
+
- 记录详细错误日志
|
| 644 |
+
- 保留本地文件供其他方式获取
|
| 645 |
+
- 在任务状态中标明上传失败原因
|
| 646 |
+
|
| 647 |
+
## 支持的语言
|
| 648 |
+
|
| 649 |
+
- 中文 (zh)
|
| 650 |
+
- English (en)
|
| 651 |
+
- 日本語 (ja)
|
| 652 |
+
- 한국어 (ko)
|
| 653 |
+
- Français (fr)
|
| 654 |
+
- Deutsch (de)
|
| 655 |
+
- Español (es)
|
| 656 |
+
- Русский (ru)
|
| 657 |
+
- Italiano (it)
|
| 658 |
+
- Português (pt)
|
| 659 |
+
- العربية (ar)
|
| 660 |
+
- हिन्दी (hi)
|
| 661 |
+
|
| 662 |
+
## 部署
|
| 663 |
+
|
| 664 |
+
### 本地开发
|
| 665 |
+
|
| 666 |
+
```bash
|
| 667 |
+
uv run main.py
|
| 668 |
+
```
|
| 669 |
+
|
| 670 |
+
### 云服务器部署
|
| 671 |
+
|
| 672 |
+
1. 安装依赖和配置环境变量
|
| 673 |
+
2. 使用systemd或supervisor管理进程
|
| 674 |
+
3. 配置nginx反向代理 (可选)
|
| 675 |
+
|
| 676 |
+
示例systemd服务文件:
|
| 677 |
+
```ini
|
| 678 |
+
[Unit]
|
| 679 |
+
Description=PDFTranslate MCP Server
|
| 680 |
+
After=network.target
|
| 681 |
+
|
| 682 |
+
[Service]
|
| 683 |
+
Type=simple
|
| 684 |
+
User=your-user
|
| 685 |
+
WorkingDirectory=/path/to/pdftranslate-mcp-server
|
| 686 |
+
Environment=PATH=/path/to/venv/bin
|
| 687 |
+
ExecStart=/path/to/venv/bin/python main.py
|
| 688 |
+
Restart=always
|
| 689 |
+
|
| 690 |
+
[Install]
|
| 691 |
+
WantedBy=multi-user.target
|
| 692 |
+
```
|
| 693 |
+
|
| 694 |
+
### Docker部署
|
| 695 |
+
|
| 696 |
+
```dockerfile
|
| 697 |
+
FROM python:3.12-slim
|
| 698 |
+
|
| 699 |
+
WORKDIR /app
|
| 700 |
+
COPY . .
|
| 701 |
+
|
| 702 |
+
RUN pip install uv
|
| 703 |
+
RUN uv pip install .
|
| 704 |
+
RUN uv pip install babeldoc
|
| 705 |
+
|
| 706 |
+
EXPOSE 8003
|
| 707 |
+
|
| 708 |
+
CMD ["python", "main.py"]
|
| 709 |
+
```
|
| 710 |
+
|
| 711 |
+
## 故障排除
|
| 712 |
+
|
| 713 |
+
### 常见问题
|
| 714 |
+
|
| 715 |
+
1. **BabelDOC未安装**
|
| 716 |
+
```bash
|
| 717 |
+
pip install babeldoc
|
| 718 |
+
# 或
|
| 719 |
+
uv pip install babeldoc
|
| 720 |
+
```
|
| 721 |
+
|
| 722 |
+
2. **COS SDK未安装**
|
| 723 |
+
```bash
|
| 724 |
+
pip install cos-python-sdk-v5
|
| 725 |
+
# 或
|
| 726 |
+
uv pip install cos-python-sdk-v5
|
| 727 |
+
```
|
| 728 |
+
|
| 729 |
+
3. **API密钥错误**
|
| 730 |
+
检查 `.env` 文件中的 `OPENAI_API_KEY` 配置
|
| 731 |
+
|
| 732 |
+
4. **COS配置错误**
|
| 733 |
+
- 检查 `COS_REGION`、`COS_SECRET_ID`、`COS_SECRET_KEY`、`COS_BUCKET` 配置
|
| 734 |
+
- 使用 `check_system_status()` 工具检查COS配置状态
|
| 735 |
+
- 使用 `update_cos_config()` 工具动态更新配置
|
| 736 |
+
|
| 737 |
+
5. **COS上传失败**
|
| 738 |
+
- 检查网络连接
|
| 739 |
+
- 验证COS密钥权限
|
| 740 |
+
- 确认存储桶名称和地域正确
|
| 741 |
+
- 查看服务器日志获取详细错误信息
|
| 742 |
+
|
| 743 |
+
6. **端口占用**
|
| 744 |
+
修改 `.env` 文件中的 `MCP_PORT` 配置
|
| 745 |
+
|
| 746 |
+
7. **内存不足**
|
| 747 |
+
处理大型PDF文件时可能需要更多内存
|
| 748 |
+
|
| 749 |
+
8. **文件无法下载**
|
| 750 |
+
- 如果COS上传失败,可使用 `get_translation_result_base64()` 获取文件
|
| 751 |
+
- 检查COS存储桶的访问权限设置
|
| 752 |
+
|
| 753 |
+
### 日志查看
|
| 754 |
+
|
| 755 |
+
服务器日志包含详细的错误信息和调试信息,可以帮助定位问题。
|
| 756 |
+
|
| 757 |
+
## 许可证
|
| 758 |
+
|
| 759 |
+
MIT License
|
| 760 |
+
|
| 761 |
+
## 贡献
|
| 762 |
+
|
| 763 |
+
欢迎贡献代码和报告问题!请创建Issue或提交Pull Request。
|
| 764 |
+
|
| 765 |
+
## 更新日志
|
| 766 |
+
|
| 767 |
+
### v1.0.0 (2025-07-28)
|
| 768 |
+
- 初始版本发布
|
| 769 |
+
- 支持PDF翻译功能
|
| 770 |
+
- 支持MCP协议
|
| 771 |
+
- 支持多种客户端
|
pdftranslate_web/pdftranslate-mcp-server/config.ini
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[common]
|
| 2 |
+
cos_region = ap-nanjing
|
| 3 |
+
cos_secret_id = AKID0036B78yiS5NSM3WyIQKzSI1VjgO9qPl
|
| 4 |
+
cos_secret_key = IZhavCLI6IH1WlofM6i9NXUFqGTUOFvS
|
| 5 |
+
cos_bucket =tts-1258720957
|
pdftranslate_web/pdftranslate-mcp-server/docker-compose.yml
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
pdftranslate-mcp:
|
| 5 |
+
build:
|
| 6 |
+
context: .
|
| 7 |
+
dockerfile: Dockerfile
|
| 8 |
+
image: pdftranslate-mcp-server:latest
|
| 9 |
+
container_name: pdftranslate-mcp-server
|
| 10 |
+
restart: unless-stopped
|
| 11 |
+
ports:
|
| 12 |
+
- "8006:8006"
|
| 13 |
+
environment:
|
| 14 |
+
# OpenAI API配置 (必填)
|
| 15 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
| 16 |
+
- OPENAI_MODEL=${OPENAI_MODEL:-deepseek-ai/DeepSeek-V3}
|
| 17 |
+
- OPENAI_BASE_URL=${OPENAI_BASE_URL:-https://api.siliconflow.cn/v1}
|
| 18 |
+
|
| 19 |
+
# MCP服务器配置
|
| 20 |
+
- MCP_HOST=0.0.0.0
|
| 21 |
+
- MCP_PORT=8006
|
| 22 |
+
|
| 23 |
+
# 翻译配置
|
| 24 |
+
- DEFAULT_LANG_IN=${DEFAULT_LANG_IN:-en}
|
| 25 |
+
- DEFAULT_LANG_OUT=${DEFAULT_LANG_OUT:-zh}
|
| 26 |
+
- QPS=${QPS:-4}
|
| 27 |
+
- WATERMARK_OUTPUT_MODE=${WATERMARK_OUTPUT_MODE:-no_watermark}
|
| 28 |
+
- NO_DUAL=${NO_DUAL:-false}
|
| 29 |
+
- NO_MONO=${NO_MONO:-false}
|
| 30 |
+
|
| 31 |
+
# 腾讯云COS配置 (可选,用于文件上传)
|
| 32 |
+
- COS_REGION=${COS_REGION}
|
| 33 |
+
- COS_SECRET_ID=${COS_SECRET_ID}
|
| 34 |
+
- COS_SECRET_KEY=${COS_SECRET_KEY}
|
| 35 |
+
- COS_BUCKET=${COS_BUCKET}
|
| 36 |
+
|
| 37 |
+
# 日志配置
|
| 38 |
+
- LOG_LEVEL=${LOG_LEVEL:-INFO}
|
| 39 |
+
|
| 40 |
+
volumes:
|
| 41 |
+
# 挂载配置文件 (可选)
|
| 42 |
+
- ./config.ini:/app/config.ini:ro
|
| 43 |
+
# 挂载日志目录
|
| 44 |
+
- ./logs:/app/logs
|
| 45 |
+
# 挂载临时文件目录
|
| 46 |
+
- ./temp:/app/temp
|
| 47 |
+
# 挂载上传下载目录
|
| 48 |
+
- ./uploads:/app/uploads
|
| 49 |
+
- ./downloads:/app/downloads
|
| 50 |
+
|
| 51 |
+
healthcheck:
|
| 52 |
+
test: ["CMD", "curl", "-f", "http://localhost:8006/sse"]
|
| 53 |
+
interval: 30s
|
| 54 |
+
timeout: 10s
|
| 55 |
+
retries: 3
|
| 56 |
+
start_period: 60s
|
| 57 |
+
|
| 58 |
+
networks:
|
| 59 |
+
- pdftranslate-network
|
| 60 |
+
|
| 61 |
+
networks:
|
| 62 |
+
pdftranslate-network:
|
| 63 |
+
driver: bridge
|
pdftranslate_web/pdftranslate-mcp-server/main.py
ADDED
|
@@ -0,0 +1,1100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import logging
|
| 3 |
+
import uuid
|
| 4 |
+
import os
|
| 5 |
+
import tempfile
|
| 6 |
+
import shutil
|
| 7 |
+
import json
|
| 8 |
+
import base64
|
| 9 |
+
import aiohttp
|
| 10 |
+
import configparser
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Dict, Any, Optional, List, Union
|
| 13 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 14 |
+
from datetime import datetime
|
| 15 |
+
|
| 16 |
+
from mcp.server.fastmcp import FastMCP
|
| 17 |
+
from dotenv import load_dotenv
|
| 18 |
+
|
| 19 |
+
# 尝试导入腾讯云COS相关模块
|
| 20 |
+
try:
|
| 21 |
+
from qcloud_cos import CosConfig, CosS3Client
|
| 22 |
+
COS_AVAILABLE = True
|
| 23 |
+
print("✅ 腾讯云COS库已成功加载")
|
| 24 |
+
except ImportError as e:
|
| 25 |
+
COS_AVAILABLE = False
|
| 26 |
+
print("❌ 警告: 腾讯云COS库未安装")
|
| 27 |
+
print("📦 请使用以下命令安装:")
|
| 28 |
+
print(" pip install cos-python-sdk-v5")
|
| 29 |
+
print(" 或者")
|
| 30 |
+
print(" uv pip install cos-python-sdk-v5")
|
| 31 |
+
print(f"详细错误信息: {e}")
|
| 32 |
+
|
| 33 |
+
# 尝试导入BabelDOC相关模块
|
| 34 |
+
try:
|
| 35 |
+
import babeldoc.format.pdf.high_level
|
| 36 |
+
from babeldoc.format.pdf.translation_config import TranslationConfig, WatermarkOutputMode
|
| 37 |
+
from babeldoc.translator.translator import OpenAITranslator, set_translate_rate_limiter
|
| 38 |
+
from babeldoc.docvision.doclayout import DocLayoutModel
|
| 39 |
+
BABELDOC_AVAILABLE = True
|
| 40 |
+
print("✅ BabelDOC库已成功加载")
|
| 41 |
+
except ImportError as e:
|
| 42 |
+
BABELDOC_AVAILABLE = False
|
| 43 |
+
print("❌ 警告: BabelDOC库未安装")
|
| 44 |
+
print("📦 请使用以下命令安装BabelDOC:")
|
| 45 |
+
print(" pip install babeldoc")
|
| 46 |
+
print(" 或者")
|
| 47 |
+
print(" uv pip install babeldoc")
|
| 48 |
+
print(f"详细错误信息: {e}")
|
| 49 |
+
|
| 50 |
+
# 配置日志
|
| 51 |
+
logging.basicConfig(level=logging.INFO)
|
| 52 |
+
logger = logging.getLogger(__name__)
|
| 53 |
+
|
| 54 |
+
# 加载环境变量
|
| 55 |
+
load_dotenv()
|
| 56 |
+
|
| 57 |
+
def load_cos_config():
|
| 58 |
+
"""从config.ini和环境变量加载COS配置"""
|
| 59 |
+
cos_config = {}
|
| 60 |
+
|
| 61 |
+
# 优先从环境变量加载
|
| 62 |
+
cos_config["region"] = os.getenv("COS_REGION")
|
| 63 |
+
cos_config["secret_id"] = os.getenv("COS_SECRET_ID")
|
| 64 |
+
cos_config["secret_key"] = os.getenv("COS_SECRET_KEY")
|
| 65 |
+
cos_config["bucket"] = os.getenv("COS_BUCKET")
|
| 66 |
+
|
| 67 |
+
# 如果环境变量没有设置,从config.ini加载
|
| 68 |
+
config_file_path = os.getenv('CONFIG_INI_PATH', 'config.ini')
|
| 69 |
+
if os.path.exists(config_file_path):
|
| 70 |
+
config = configparser.ConfigParser()
|
| 71 |
+
config.read(config_file_path, encoding='utf-8')
|
| 72 |
+
|
| 73 |
+
if not cos_config["region"]:
|
| 74 |
+
cos_config["region"] = config.get('common', 'cos_region', fallback=None)
|
| 75 |
+
if not cos_config["secret_id"]:
|
| 76 |
+
cos_config["secret_id"] = config.get('common', 'cos_secret_id', fallback=None)
|
| 77 |
+
if not cos_config["secret_key"]:
|
| 78 |
+
cos_config["secret_key"] = config.get('common', 'cos_secret_key', fallback=None)
|
| 79 |
+
if not cos_config["bucket"]:
|
| 80 |
+
cos_config["bucket"] = config.get('common', 'cos_bucket', fallback=None)
|
| 81 |
+
|
| 82 |
+
return cos_config
|
| 83 |
+
|
| 84 |
+
# 全局配置
|
| 85 |
+
CONFIG = {
|
| 86 |
+
"openai": {
|
| 87 |
+
"api_key": os.getenv("OPENAI_API_KEY", ""),
|
| 88 |
+
"model": os.getenv("OPENAI_MODEL", "deepseek-ai/DeepSeek-V3"),
|
| 89 |
+
"base_url": os.getenv("OPENAI_BASE_URL", "https://api.siliconflow.cn/v1")
|
| 90 |
+
},
|
| 91 |
+
"translation": {
|
| 92 |
+
"default_lang_in": os.getenv("DEFAULT_LANG_IN", "en"),
|
| 93 |
+
"default_lang_out": os.getenv("DEFAULT_LANG_OUT", "zh"),
|
| 94 |
+
"qps": int(os.getenv("QPS", "4")),
|
| 95 |
+
"watermark_output_mode": os.getenv("WATERMARK_OUTPUT_MODE", "no_watermark"),
|
| 96 |
+
"no_dual": os.getenv("NO_DUAL", "false").lower() == "true",
|
| 97 |
+
"no_mono": os.getenv("NO_MONO", "false").lower() == "true"
|
| 98 |
+
},
|
| 99 |
+
"server": {
|
| 100 |
+
"host": os.getenv("MCP_HOST", "0.0.0.0"),
|
| 101 |
+
"port": int(os.getenv("MCP_PORT", "8003"))
|
| 102 |
+
},
|
| 103 |
+
"cos": load_cos_config()
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
# 验证配置
|
| 107 |
+
if not CONFIG["openai"]["api_key"]:
|
| 108 |
+
logger.warning("未找到OpenAI API密钥!请通过环境变量OPENAI_API_KEY提供")
|
| 109 |
+
|
| 110 |
+
# 创建MCP服务器
|
| 111 |
+
mcp = FastMCP("PDFTranslate")
|
| 112 |
+
|
| 113 |
+
# 全局任务存储
|
| 114 |
+
translation_tasks: Dict[str, Dict[str, Any]] = {}
|
| 115 |
+
task_files: Dict[str, Dict[str, Path]] = {}
|
| 116 |
+
|
| 117 |
+
async def download_file_from_url(url: str, target_path: Path) -> bool:
|
| 118 |
+
"""
|
| 119 |
+
从URL下载文件到本地路径
|
| 120 |
+
|
| 121 |
+
Args:
|
| 122 |
+
url: 文件URL
|
| 123 |
+
target_path: 目标文件路径
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
bool: 下载是否成功
|
| 127 |
+
"""
|
| 128 |
+
try:
|
| 129 |
+
async with aiohttp.ClientSession() as session:
|
| 130 |
+
async with session.get(url) as response:
|
| 131 |
+
if response.status == 200:
|
| 132 |
+
with open(target_path, 'wb') as f:
|
| 133 |
+
async for chunk in response.content.iter_chunked(8192):
|
| 134 |
+
f.write(chunk)
|
| 135 |
+
return True
|
| 136 |
+
else:
|
| 137 |
+
logger.error(f"下载失败,HTTP状态码: {response.status}")
|
| 138 |
+
return False
|
| 139 |
+
except Exception as e:
|
| 140 |
+
logger.error(f"下载文件时出错: {e}")
|
| 141 |
+
return False
|
| 142 |
+
|
| 143 |
+
def save_base64_file(base64_content: str, target_path: Path) -> bool:
|
| 144 |
+
"""
|
| 145 |
+
保存base64编码��文件内容到本地
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
base64_content: base64编码的文件内容
|
| 149 |
+
target_path: 目标文件路径
|
| 150 |
+
|
| 151 |
+
Returns:
|
| 152 |
+
bool: 保存是否成功
|
| 153 |
+
"""
|
| 154 |
+
try:
|
| 155 |
+
# 如果包含data URL前缀,去掉它
|
| 156 |
+
if ',' in base64_content and base64_content.startswith('data:'):
|
| 157 |
+
base64_content = base64_content.split(',', 1)[1]
|
| 158 |
+
|
| 159 |
+
file_data = base64.b64decode(base64_content)
|
| 160 |
+
with open(target_path, 'wb') as f:
|
| 161 |
+
f.write(file_data)
|
| 162 |
+
return True
|
| 163 |
+
except Exception as e:
|
| 164 |
+
logger.error(f"保存base64文件时出错: {e}")
|
| 165 |
+
return False
|
| 166 |
+
|
| 167 |
+
def validate_pdf_file(file_path: Path) -> bool:
|
| 168 |
+
"""
|
| 169 |
+
验证文件是否为有效的PDF文件
|
| 170 |
+
|
| 171 |
+
Args:
|
| 172 |
+
file_path: 文件路径
|
| 173 |
+
|
| 174 |
+
Returns:
|
| 175 |
+
bool: 是否为有效PDF
|
| 176 |
+
"""
|
| 177 |
+
try:
|
| 178 |
+
with open(file_path, 'rb') as f:
|
| 179 |
+
header = f.read(8)
|
| 180 |
+
return header.startswith(b'%PDF-')
|
| 181 |
+
except Exception:
|
| 182 |
+
return False
|
| 183 |
+
|
| 184 |
+
def upload_file_to_cos(file_path: Path, file_name: str = None) -> Dict[str, Any]:
|
| 185 |
+
"""
|
| 186 |
+
上传文件到腾讯云COS
|
| 187 |
+
|
| 188 |
+
Args:
|
| 189 |
+
file_path: 本地文件路径
|
| 190 |
+
file_name: COS中的文件名,如果为None则使用原文件名
|
| 191 |
+
|
| 192 |
+
Returns:
|
| 193 |
+
dict: 上传结果,包含success状态和url或error信息
|
| 194 |
+
"""
|
| 195 |
+
if not COS_AVAILABLE:
|
| 196 |
+
return {
|
| 197 |
+
"success": False,
|
| 198 |
+
"error": "腾讯云COS库未安装,无法上传文件",
|
| 199 |
+
"message": "请先安装cos-python-sdk-v5库"
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
cos_config = CONFIG["cos"]
|
| 203 |
+
if not all([cos_config.get("region"), cos_config.get("secret_id"),
|
| 204 |
+
cos_config.get("secret_key"), cos_config.get("bucket")]):
|
| 205 |
+
return {
|
| 206 |
+
"success": False,
|
| 207 |
+
"error": "COS配置不完整",
|
| 208 |
+
"message": "请检查config.ini或环境变量中的COS配置",
|
| 209 |
+
"missing_config": {
|
| 210 |
+
"region": not cos_config.get("region"),
|
| 211 |
+
"secret_id": not cos_config.get("secret_id"),
|
| 212 |
+
"secret_key": not cos_config.get("secret_key"),
|
| 213 |
+
"bucket": not cos_config.get("bucket")
|
| 214 |
+
}
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
try:
|
| 218 |
+
# 创建COS客户端
|
| 219 |
+
config = CosConfig(
|
| 220 |
+
Region=cos_config["region"],
|
| 221 |
+
SecretId=cos_config["secret_id"],
|
| 222 |
+
SecretKey=cos_config["secret_key"]
|
| 223 |
+
)
|
| 224 |
+
client = CosS3Client(config)
|
| 225 |
+
|
| 226 |
+
# 如果没有指定文件名,使用原文件名
|
| 227 |
+
if not file_name:
|
| 228 |
+
file_name = file_path.name
|
| 229 |
+
|
| 230 |
+
# 添加时间戳避免文件名冲突
|
| 231 |
+
import datetime
|
| 232 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 233 |
+
name_parts = file_name.rsplit('.', 1)
|
| 234 |
+
if len(name_parts) == 2:
|
| 235 |
+
file_name = f"{name_parts[0]}_{timestamp}.{name_parts[1]}"
|
| 236 |
+
else:
|
| 237 |
+
file_name = f"{file_name}_{timestamp}"
|
| 238 |
+
|
| 239 |
+
# 上传文件
|
| 240 |
+
with open(file_path, 'rb') as f:
|
| 241 |
+
response = client.put_object(
|
| 242 |
+
Bucket=cos_config["bucket"],
|
| 243 |
+
Body=f,
|
| 244 |
+
Key=file_name,
|
| 245 |
+
EnableMD5=False
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
if response and response.get('ETag'):
|
| 249 |
+
# 构造文件URL
|
| 250 |
+
url = f"https://{cos_config['bucket']}.cos.{cos_config['region']}.myqcloud.com/{file_name}"
|
| 251 |
+
return {
|
| 252 |
+
"success": True,
|
| 253 |
+
"url": url,
|
| 254 |
+
"file_name": file_name,
|
| 255 |
+
"etag": response['ETag'],
|
| 256 |
+
"message": "文件上传成功"
|
| 257 |
+
}
|
| 258 |
+
else:
|
| 259 |
+
return {
|
| 260 |
+
"success": False,
|
| 261 |
+
"error": "上传失败",
|
| 262 |
+
"message": f"COS响应异常: {response}"
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
except Exception as e:
|
| 266 |
+
logger.error(f"上传文件到COS时出错: {e}")
|
| 267 |
+
return {
|
| 268 |
+
"success": False,
|
| 269 |
+
"error": f"上传过程出错: {str(e)}",
|
| 270 |
+
"message": "请检查COS配置和网络连接"
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
class TranslationTask:
|
| 274 |
+
def __init__(self, task_id: str):
|
| 275 |
+
self.task_id = task_id
|
| 276 |
+
self.status = "pending" # pending, processing, completed, failed
|
| 277 |
+
self.progress = 0.0
|
| 278 |
+
self.message = "任务已创建,等待处理..."
|
| 279 |
+
self.result_files = {}
|
| 280 |
+
self.cos_urls = {} # 添加COS URL存储
|
| 281 |
+
self.created_at = datetime.now().isoformat()
|
| 282 |
+
self.updated_at = datetime.now().isoformat()
|
| 283 |
+
|
| 284 |
+
def to_dict(self):
|
| 285 |
+
return {
|
| 286 |
+
"task_id": self.task_id,
|
| 287 |
+
"status": self.status,
|
| 288 |
+
"progress": self.progress,
|
| 289 |
+
"message": self.message,
|
| 290 |
+
"result_files": self.result_files,
|
| 291 |
+
"cos_urls": self.cos_urls, # 包含COS URL信息
|
| 292 |
+
"created_at": self.created_at,
|
| 293 |
+
"updated_at": self.updated_at
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
async def translate_document_async(
|
| 297 |
+
task_id: str,
|
| 298 |
+
pdf_file: Path,
|
| 299 |
+
lang_in: str,
|
| 300 |
+
lang_out: str,
|
| 301 |
+
qps: int,
|
| 302 |
+
no_dual: bool,
|
| 303 |
+
no_mono: bool,
|
| 304 |
+
watermark_output_mode: str,
|
| 305 |
+
output_dir: Path
|
| 306 |
+
):
|
| 307 |
+
"""异步翻译文档"""
|
| 308 |
+
if not BABELDOC_AVAILABLE:
|
| 309 |
+
translation_tasks[task_id].status = "failed"
|
| 310 |
+
translation_tasks[task_id].message = "BabelDOC库未安装,无法进行翻译"
|
| 311 |
+
return
|
| 312 |
+
|
| 313 |
+
try:
|
| 314 |
+
task = translation_tasks[task_id]
|
| 315 |
+
task.status = "processing"
|
| 316 |
+
task.message = "正在初始化翻译器..."
|
| 317 |
+
task.updated_at = datetime.now().isoformat()
|
| 318 |
+
|
| 319 |
+
# 初始化翻译器
|
| 320 |
+
translator = OpenAITranslator(
|
| 321 |
+
lang_in=lang_in,
|
| 322 |
+
lang_out=lang_out,
|
| 323 |
+
model=CONFIG["openai"]["model"],
|
| 324 |
+
base_url=CONFIG["openai"]["base_url"],
|
| 325 |
+
api_key=CONFIG["openai"]["api_key"],
|
| 326 |
+
ignore_cache=False,
|
| 327 |
+
)
|
| 328 |
+
|
| 329 |
+
set_translate_rate_limiter(qps)
|
| 330 |
+
|
| 331 |
+
# 加载文档布局模型
|
| 332 |
+
doc_layout_model = DocLayoutModel.load_onnx()
|
| 333 |
+
|
| 334 |
+
# 配置水印模式
|
| 335 |
+
watermark_mode = WatermarkOutputMode.NoWatermark
|
| 336 |
+
if watermark_output_mode == "watermarked":
|
| 337 |
+
watermark_mode = WatermarkOutputMode.Watermarked
|
| 338 |
+
elif watermark_output_mode == "both":
|
| 339 |
+
watermark_mode = WatermarkOutputMode.Both
|
| 340 |
+
|
| 341 |
+
# 创建翻译配置
|
| 342 |
+
config_obj = TranslationConfig(
|
| 343 |
+
input_file=str(pdf_file),
|
| 344 |
+
output_dir=str(output_dir),
|
| 345 |
+
translator=translator,
|
| 346 |
+
lang_in=lang_in,
|
| 347 |
+
lang_out=lang_out,
|
| 348 |
+
no_dual=no_dual,
|
| 349 |
+
no_mono=no_mono,
|
| 350 |
+
qps=qps,
|
| 351 |
+
doc_layout_model=doc_layout_model,
|
| 352 |
+
watermark_output_mode=watermark_mode,
|
| 353 |
+
debug=False,
|
| 354 |
+
pages=None,
|
| 355 |
+
font=None,
|
| 356 |
+
formular_font_pattern=None,
|
| 357 |
+
formular_char_pattern=None,
|
| 358 |
+
split_short_lines=False,
|
| 359 |
+
short_line_split_factor=0.8,
|
| 360 |
+
skip_clean=False,
|
| 361 |
+
dual_translate_first=False,
|
| 362 |
+
disable_rich_text_translate=False,
|
| 363 |
+
enhance_compatibility=False,
|
| 364 |
+
use_alternating_pages_dual=False,
|
| 365 |
+
report_interval=0.1,
|
| 366 |
+
min_text_length=5,
|
| 367 |
+
split_strategy=None,
|
| 368 |
+
table_model=None,
|
| 369 |
+
show_char_box=False,
|
| 370 |
+
skip_scanned_detection=False,
|
| 371 |
+
ocr_workaround=False,
|
| 372 |
+
custom_system_prompt=None,
|
| 373 |
+
working_dir=None,
|
| 374 |
+
add_formula_placehold_hint=False,
|
| 375 |
+
glossaries=[],
|
| 376 |
+
pool_max_workers=None,
|
| 377 |
+
auto_extract_glossary=True,
|
| 378 |
+
auto_enable_ocr_workaround=False,
|
| 379 |
+
primary_font_family=None,
|
| 380 |
+
only_include_translated_page=False,
|
| 381 |
+
save_auto_extracted_glossary=False,
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
task.message = "正在翻译文档..."
|
| 385 |
+
|
| 386 |
+
# 执行翻译
|
| 387 |
+
async for event in babeldoc.format.pdf.high_level.async_translate(config_obj):
|
| 388 |
+
if event["type"] == "progress_update":
|
| 389 |
+
task.progress = event.get("overall_progress", 0.0)
|
| 390 |
+
task.message = f"{event.get('stage', '处理中')} ({event.get('stage_current', 0)}/{event.get('stage_total', 100)})"
|
| 391 |
+
task.updated_at = datetime.now().isoformat()
|
| 392 |
+
elif event["type"] == "error":
|
| 393 |
+
task.status = "failed"
|
| 394 |
+
task.message = f"翻译失败: {event.get('error', '未知错误')}"
|
| 395 |
+
task.updated_at = datetime.now().isoformat()
|
| 396 |
+
logger.error(f"Translation failed for task {task_id}: {event.get('error')}")
|
| 397 |
+
return
|
| 398 |
+
elif event["type"] == "finish":
|
| 399 |
+
result = event["translate_result"]
|
| 400 |
+
task.status = "completed"
|
| 401 |
+
task.progress = 100.0
|
| 402 |
+
task.message = "翻译完成"
|
| 403 |
+
task.updated_at = datetime.now().isoformat()
|
| 404 |
+
|
| 405 |
+
# 收集结果文件并上传到COS
|
| 406 |
+
result_files = {}
|
| 407 |
+
cos_urls = {}
|
| 408 |
+
|
| 409 |
+
if result.dual_pdf_path and Path(result.dual_pdf_path).exists():
|
| 410 |
+
dual_path = Path(result.dual_pdf_path)
|
| 411 |
+
result_files["dual"] = str(dual_path)
|
| 412 |
+
|
| 413 |
+
# 上传双语版本到COS
|
| 414 |
+
task.message = "正在上传双语版本到云存储..."
|
| 415 |
+
task.updated_at = datetime.now().isoformat()
|
| 416 |
+
upload_result = upload_file_to_cos(dual_path, f"dual_{dual_path.name}")
|
| 417 |
+
if upload_result.get("success"):
|
| 418 |
+
cos_urls["dual"] = upload_result["url"]
|
| 419 |
+
logger.info(f"双语版本已上传到COS: {upload_result['url']}")
|
| 420 |
+
else:
|
| 421 |
+
logger.warning(f"双语版本上传COS失败: {upload_result.get('error')}")
|
| 422 |
+
|
| 423 |
+
if result.mono_pdf_path and Path(result.mono_pdf_path).exists():
|
| 424 |
+
mono_path = Path(result.mono_pdf_path)
|
| 425 |
+
result_files["mono"] = str(mono_path)
|
| 426 |
+
|
| 427 |
+
# 上传单语版本到COS
|
| 428 |
+
task.message = "正在上传单语版本到云存储..."
|
| 429 |
+
task.updated_at = datetime.now().isoformat()
|
| 430 |
+
upload_result = upload_file_to_cos(mono_path, f"mono_{mono_path.name}")
|
| 431 |
+
if upload_result.get("success"):
|
| 432 |
+
cos_urls["mono"] = upload_result["url"]
|
| 433 |
+
logger.info(f"单语版本已上传到COS: {upload_result['url']}")
|
| 434 |
+
else:
|
| 435 |
+
logger.warning(f"单语版本上传COS失败: {upload_result.get('error')}")
|
| 436 |
+
|
| 437 |
+
task.result_files = result_files
|
| 438 |
+
task.cos_urls = cos_urls # 添加COS URL信息
|
| 439 |
+
task_files[task_id] = {k: Path(v) for k, v in result_files.items()}
|
| 440 |
+
|
| 441 |
+
# 更新最终状态
|
| 442 |
+
if cos_urls:
|
| 443 |
+
task.message = f"翻译完成,文件已上传到云存储。可用版本: {', '.join(cos_urls.keys())}"
|
| 444 |
+
else:
|
| 445 |
+
task.message = "翻译完成,但文件上传到云存储失败,可通过其他方式获取文件"
|
| 446 |
+
|
| 447 |
+
logger.info(f"Translation completed for task {task_id}")
|
| 448 |
+
break
|
| 449 |
+
|
| 450 |
+
except Exception as e:
|
| 451 |
+
task = translation_tasks[task_id]
|
| 452 |
+
task.status = "failed"
|
| 453 |
+
task.message = f"翻译过程出错: {str(e)}"
|
| 454 |
+
task.updated_at = datetime.now().isoformat()
|
| 455 |
+
logger.error(f"Translation error for task {task_id}: {e}", exc_info=True)
|
| 456 |
+
|
| 457 |
+
@mcp.tool()
|
| 458 |
+
async def translate_pdf(
|
| 459 |
+
file_input: str,
|
| 460 |
+
input_type: str = "base64",
|
| 461 |
+
filename: str = "document.pdf",
|
| 462 |
+
lang_in: str = None,
|
| 463 |
+
lang_out: str = None,
|
| 464 |
+
qps: int = None,
|
| 465 |
+
no_dual: bool = False,
|
| 466 |
+
no_mono: bool = False,
|
| 467 |
+
watermark_output_mode: str = None
|
| 468 |
+
) -> dict:
|
| 469 |
+
"""
|
| 470 |
+
翻译PDF文档 - 支持多种文件输入方式
|
| 471 |
+
|
| 472 |
+
Args:
|
| 473 |
+
file_input: 文件输入内容
|
| 474 |
+
- 当input_type="base64"时,为base64编码的文件内容
|
| 475 |
+
- 当input_type="url"时,为文件下载URL
|
| 476 |
+
- 当input_type="path"时,为本地文件路径(仅限本地开发)
|
| 477 |
+
input_type: 输入类型 ("base64", "url", "path")
|
| 478 |
+
filename: 文件名称(用于识别和存储)
|
| 479 |
+
lang_in: 源语言代码 (默认: en)
|
| 480 |
+
lang_out: 目标语言代码 (默认: zh)
|
| 481 |
+
qps: 每秒查询数限制 (默认: 4)
|
| 482 |
+
no_dual: 是否禁用双语对照版本 (默认: False)
|
| 483 |
+
no_mono: 是否禁用单语翻译版本 (默认: False)
|
| 484 |
+
watermark_output_mode: 水印模式 (no_watermark/watermarked/both,默认: no_watermark)
|
| 485 |
+
|
| 486 |
+
Returns:
|
| 487 |
+
dict: {"task_id": str, "message": str, "status": str} 或错误信息
|
| 488 |
+
"""
|
| 489 |
+
# 检查BabelDOC是否可用
|
| 490 |
+
if not BABELDOC_AVAILABLE:
|
| 491 |
+
return {
|
| 492 |
+
"error": "BabelDOC库未安装,无法进行翻译",
|
| 493 |
+
"message": "请先安装BabelDOC库",
|
| 494 |
+
"install_commands": [
|
| 495 |
+
"pip install babeldoc",
|
| 496 |
+
"uv pip install babeldoc"
|
| 497 |
+
],
|
| 498 |
+
"status": "failed"
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
# 检查API密钥
|
| 502 |
+
if not CONFIG["openai"]["api_key"]:
|
| 503 |
+
return {
|
| 504 |
+
"error": "未配置OpenAI API密钥",
|
| 505 |
+
"message": "请在.env文件中设置OPENAI_API_KEY",
|
| 506 |
+
"status": "failed"
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
# 验证输入类型
|
| 510 |
+
if input_type not in ["base64", "url", "path"]:
|
| 511 |
+
return {
|
| 512 |
+
"error": "不支持的输入类型",
|
| 513 |
+
"message": f"支持的输入类型: base64, url, path。当前: {input_type}",
|
| 514 |
+
"status": "failed"
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
try:
|
| 518 |
+
# 创建临时目录和文件
|
| 519 |
+
temp_dir = Path(tempfile.mkdtemp())
|
| 520 |
+
|
| 521 |
+
# 确保文件名以.pdf结尾
|
| 522 |
+
if not filename.lower().endswith('.pdf'):
|
| 523 |
+
filename += '.pdf'
|
| 524 |
+
|
| 525 |
+
pdf_path = temp_dir / filename
|
| 526 |
+
|
| 527 |
+
# 根据输入类型处理文件
|
| 528 |
+
if input_type == "base64":
|
| 529 |
+
logger.info(f"正在处理base64文件: {filename}")
|
| 530 |
+
if not save_base64_file(file_input, pdf_path):
|
| 531 |
+
return {
|
| 532 |
+
"error": "base64文件解码失败",
|
| 533 |
+
"message": "请检查base64编码格式是否正确",
|
| 534 |
+
"status": "failed"
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
elif input_type == "url":
|
| 538 |
+
logger.info(f"正在从URL下载文件: {file_input}")
|
| 539 |
+
if not await download_file_from_url(file_input, pdf_path):
|
| 540 |
+
return {
|
| 541 |
+
"error": "文件下载失败",
|
| 542 |
+
"message": f"无法从URL下载文件: {file_input}",
|
| 543 |
+
"status": "failed"
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
elif input_type == "path":
|
| 547 |
+
logger.info(f"正在处理本地文件: {file_input}")
|
| 548 |
+
source_path = Path(file_input)
|
| 549 |
+
if not source_path.exists():
|
| 550 |
+
return {
|
| 551 |
+
"error": f"文件不存在: {file_input}",
|
| 552 |
+
"message": "请检查文件路径是否正确",
|
| 553 |
+
"status": "failed"
|
| 554 |
+
}
|
| 555 |
+
# 复制文件到临时目录
|
| 556 |
+
shutil.copy2(source_path, pdf_path)
|
| 557 |
+
|
| 558 |
+
# 验证是否为有效的PDF文件
|
| 559 |
+
if not validate_pdf_file(pdf_path):
|
| 560 |
+
return {
|
| 561 |
+
"error": "文件格式验证失败",
|
| 562 |
+
"message": "文件不是有效的PDF格式",
|
| 563 |
+
"status": "failed"
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
# 检查文件大小(限制为100MB)
|
| 567 |
+
file_size = pdf_path.stat().st_size
|
| 568 |
+
max_size = 100 * 1024 * 1024 # 100MB
|
| 569 |
+
if file_size > max_size:
|
| 570 |
+
return {
|
| 571 |
+
"error": "文件过大",
|
| 572 |
+
"message": f"文件大小 {file_size/1024/1024:.1f}MB 超过限制 {max_size/1024/1024}MB",
|
| 573 |
+
"status": "failed"
|
| 574 |
+
}
|
| 575 |
+
|
| 576 |
+
# 使用默认值
|
| 577 |
+
lang_in = lang_in or CONFIG["translation"]["default_lang_in"]
|
| 578 |
+
lang_out = lang_out or CONFIG["translation"]["default_lang_out"]
|
| 579 |
+
qps = qps or CONFIG["translation"]["qps"]
|
| 580 |
+
watermark_output_mode = watermark_output_mode or CONFIG["translation"]["watermark_output_mode"]
|
| 581 |
+
|
| 582 |
+
# 创建任务
|
| 583 |
+
task_id = str(uuid.uuid4())
|
| 584 |
+
task = TranslationTask(task_id)
|
| 585 |
+
translation_tasks[task_id] = task
|
| 586 |
+
|
| 587 |
+
# 创建输出目录
|
| 588 |
+
output_dir = temp_dir / "output"
|
| 589 |
+
output_dir.mkdir(exist_ok=True)
|
| 590 |
+
|
| 591 |
+
# 启动异步翻译任务
|
| 592 |
+
asyncio.create_task(translate_document_async(
|
| 593 |
+
task_id, pdf_path, lang_in, lang_out, qps,
|
| 594 |
+
no_dual, no_mono, watermark_output_mode, output_dir
|
| 595 |
+
))
|
| 596 |
+
|
| 597 |
+
logger.info(f"翻译任务已创建: {task_id}, 文件: {filename}")
|
| 598 |
+
|
| 599 |
+
return {
|
| 600 |
+
"task_id": task_id,
|
| 601 |
+
"message": f"翻译任务已创建,正在从{lang_in}翻译到{lang_out}",
|
| 602 |
+
"status": "pending",
|
| 603 |
+
"file_name": filename,
|
| 604 |
+
"file_size_mb": round(file_size / 1024 / 1024, 2),
|
| 605 |
+
"input_type": input_type,
|
| 606 |
+
"settings": {
|
| 607 |
+
"source_language": lang_in,
|
| 608 |
+
"target_language": lang_out,
|
| 609 |
+
"qps": qps,
|
| 610 |
+
"dual_output": not no_dual,
|
| 611 |
+
"mono_output": not no_mono,
|
| 612 |
+
"watermark_mode": watermark_output_mode
|
| 613 |
+
}
|
| 614 |
+
}
|
| 615 |
+
|
| 616 |
+
except Exception as e:
|
| 617 |
+
logger.error(f"创建翻译任务时出错: {e}")
|
| 618 |
+
return {
|
| 619 |
+
"error": f"创建翻译任务失败: {str(e)}",
|
| 620 |
+
"message": "请检查文件内容和参数设置",
|
| 621 |
+
"status": "failed"
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
@mcp.tool()
|
| 625 |
+
def get_translation_status(task_id: str) -> dict:
|
| 626 |
+
"""
|
| 627 |
+
查询翻译任务状态
|
| 628 |
+
|
| 629 |
+
Args:
|
| 630 |
+
task_id: 翻译任务ID
|
| 631 |
+
|
| 632 |
+
Returns:
|
| 633 |
+
dict: 任务状态信息
|
| 634 |
+
"""
|
| 635 |
+
if task_id not in translation_tasks:
|
| 636 |
+
return {
|
| 637 |
+
"error": "任务不存在",
|
| 638 |
+
"message": f"找不到任务ID: {task_id}",
|
| 639 |
+
"status": "not_found"
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
task = translation_tasks[task_id]
|
| 643 |
+
return task.to_dict()
|
| 644 |
+
|
| 645 |
+
@mcp.tool()
|
| 646 |
+
def get_translation_result_base64(task_id: str, file_type: str = "dual") -> dict:
|
| 647 |
+
"""
|
| 648 |
+
获取翻译结果文件的base64编码内容
|
| 649 |
+
|
| 650 |
+
Args:
|
| 651 |
+
task_id: 翻译任务ID
|
| 652 |
+
file_type: 文件类型 (dual/mono)
|
| 653 |
+
|
| 654 |
+
Returns:
|
| 655 |
+
dict: 包含base64编码内容的文件信息
|
| 656 |
+
"""
|
| 657 |
+
if task_id not in translation_tasks:
|
| 658 |
+
return {
|
| 659 |
+
"error": "任务不存在",
|
| 660 |
+
"message": f"找不到任务ID: {task_id}",
|
| 661 |
+
"status": "not_found"
|
| 662 |
+
}
|
| 663 |
+
|
| 664 |
+
task = translation_tasks[task_id]
|
| 665 |
+
if task.status != "completed":
|
| 666 |
+
return {
|
| 667 |
+
"error": "翻译尚未完成",
|
| 668 |
+
"message": f"当前任务状态: {task.status},请等待翻译完成",
|
| 669 |
+
"current_status": task.status,
|
| 670 |
+
"progress": task.progress
|
| 671 |
+
}
|
| 672 |
+
|
| 673 |
+
if task_id not in task_files or file_type not in task_files[task_id]:
|
| 674 |
+
available_types = list(task_files.get(task_id, {}).keys())
|
| 675 |
+
return {
|
| 676 |
+
"error": f"文件类型 '{file_type}' 不存在",
|
| 677 |
+
"message": f"可用的文件类型: {available_types}",
|
| 678 |
+
"available_types": available_types
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
file_path = task_files[task_id][file_type]
|
| 682 |
+
if not file_path.exists():
|
| 683 |
+
return {
|
| 684 |
+
"error": "文件不存在",
|
| 685 |
+
"message": f"翻译结果文件已被删除或移动: {file_path}",
|
| 686 |
+
"status": "file_missing"
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
try:
|
| 690 |
+
# 读取文件并转换为base64
|
| 691 |
+
with open(file_path, 'rb') as f:
|
| 692 |
+
file_data = f.read()
|
| 693 |
+
|
| 694 |
+
base64_content = base64.b64encode(file_data).decode('utf-8')
|
| 695 |
+
|
| 696 |
+
return {
|
| 697 |
+
"success": True,
|
| 698 |
+
"file_name": file_path.name,
|
| 699 |
+
"file_size": len(file_data),
|
| 700 |
+
"file_size_mb": round(len(file_data) / 1024 / 1024, 2),
|
| 701 |
+
"file_type": file_type,
|
| 702 |
+
"base64_content": base64_content,
|
| 703 |
+
"data_url": f"data:application/pdf;base64,{base64_content}",
|
| 704 |
+
"message": f"成功获取{file_type}类型的翻译结果文件base64编码"
|
| 705 |
+
}
|
| 706 |
+
|
| 707 |
+
except Exception as e:
|
| 708 |
+
logger.error(f"读取文件时出错: {e}")
|
| 709 |
+
return {
|
| 710 |
+
"error": f"读取文件失败: {str(e)}",
|
| 711 |
+
"message": "无法读取翻译结果文件",
|
| 712 |
+
"status": "read_error"
|
| 713 |
+
}
|
| 714 |
+
|
| 715 |
+
@mcp.tool()
|
| 716 |
+
def get_translation_result_cos_url(task_id: str, file_type: str = "dual") -> dict:
|
| 717 |
+
"""
|
| 718 |
+
获取翻译结果文件的COS URL
|
| 719 |
+
|
| 720 |
+
Args:
|
| 721 |
+
task_id: 翻译任务ID
|
| 722 |
+
file_type: 文件类型 (dual/mono)
|
| 723 |
+
|
| 724 |
+
Returns:
|
| 725 |
+
dict: 包含COS URL的文件信息
|
| 726 |
+
"""
|
| 727 |
+
if task_id not in translation_tasks:
|
| 728 |
+
return {
|
| 729 |
+
"error": "任务不存在",
|
| 730 |
+
"message": f"找不到任务ID: {task_id}",
|
| 731 |
+
"status": "not_found"
|
| 732 |
+
}
|
| 733 |
+
|
| 734 |
+
task = translation_tasks[task_id]
|
| 735 |
+
if task.status != "completed":
|
| 736 |
+
return {
|
| 737 |
+
"error": "翻译尚未完成",
|
| 738 |
+
"message": f"当前任务状态: {task.status},请等待翻译完成",
|
| 739 |
+
"current_status": task.status,
|
| 740 |
+
"progress": task.progress
|
| 741 |
+
}
|
| 742 |
+
|
| 743 |
+
if not hasattr(task, 'cos_urls') or not task.cos_urls:
|
| 744 |
+
return {
|
| 745 |
+
"error": "文件未上传到云存储",
|
| 746 |
+
"message": "翻译完成但文件未成功上传到COS,请使用其他方式获取文件",
|
| 747 |
+
"status": "no_cos_upload"
|
| 748 |
+
}
|
| 749 |
+
|
| 750 |
+
if file_type not in task.cos_urls:
|
| 751 |
+
available_types = list(task.cos_urls.keys())
|
| 752 |
+
return {
|
| 753 |
+
"error": f"文件类型 '{file_type}' 的COS URL不存在",
|
| 754 |
+
"message": f"可用的文件类型: {available_types}",
|
| 755 |
+
"available_types": available_types
|
| 756 |
+
}
|
| 757 |
+
|
| 758 |
+
return {
|
| 759 |
+
"success": True,
|
| 760 |
+
"file_type": file_type,
|
| 761 |
+
"cos_url": task.cos_urls[file_type],
|
| 762 |
+
"message": f"成功获取{file_type}类型的翻译结果文件COS URL",
|
| 763 |
+
"download_url": task.cos_urls[file_type]
|
| 764 |
+
}
|
| 765 |
+
|
| 766 |
+
@mcp.tool()
|
| 767 |
+
def download_translation_result(task_id: str, file_type: str = "dual") -> dict:
|
| 768 |
+
"""
|
| 769 |
+
获取翻译结果文件信息
|
| 770 |
+
|
| 771 |
+
Args:
|
| 772 |
+
task_id: 翻译任务ID
|
| 773 |
+
file_type: 文件类型 (dual/mono)
|
| 774 |
+
|
| 775 |
+
Returns:
|
| 776 |
+
dict: 文件信息
|
| 777 |
+
"""
|
| 778 |
+
if task_id not in translation_tasks:
|
| 779 |
+
return {
|
| 780 |
+
"error": "任务不存在",
|
| 781 |
+
"message": f"找不到任务ID: {task_id}",
|
| 782 |
+
"status": "not_found"
|
| 783 |
+
}
|
| 784 |
+
|
| 785 |
+
task = translation_tasks[task_id]
|
| 786 |
+
if task.status != "completed":
|
| 787 |
+
return {
|
| 788 |
+
"error": "翻译尚未完成",
|
| 789 |
+
"message": f"当前任务状态: {task.status},请等待翻译完成",
|
| 790 |
+
"current_status": task.status,
|
| 791 |
+
"progress": task.progress
|
| 792 |
+
}
|
| 793 |
+
|
| 794 |
+
if task_id not in task_files or file_type not in task_files[task_id]:
|
| 795 |
+
available_types = list(task_files.get(task_id, {}).keys())
|
| 796 |
+
return {
|
| 797 |
+
"error": f"文件类型 '{file_type}' 不存在",
|
| 798 |
+
"message": f"可用的文件类型: {available_types}",
|
| 799 |
+
"available_types": available_types
|
| 800 |
+
}
|
| 801 |
+
|
| 802 |
+
file_path = task_files[task_id][file_type]
|
| 803 |
+
if not file_path.exists():
|
| 804 |
+
return {
|
| 805 |
+
"error": "文件不存在",
|
| 806 |
+
"message": f"翻译结果文件已被删除或移动: {file_path}",
|
| 807 |
+
"status": "file_missing"
|
| 808 |
+
}
|
| 809 |
+
|
| 810 |
+
return {
|
| 811 |
+
"success": True,
|
| 812 |
+
"file_path": str(file_path),
|
| 813 |
+
"file_name": file_path.name,
|
| 814 |
+
"file_size": file_path.stat().st_size,
|
| 815 |
+
"file_type": file_type,
|
| 816 |
+
"message": f"找到{file_type}类型的翻译结果文件"
|
| 817 |
+
}
|
| 818 |
+
|
| 819 |
+
@mcp.tool()
|
| 820 |
+
def check_system_status() -> dict:
|
| 821 |
+
"""
|
| 822 |
+
检查系统状态和依赖
|
| 823 |
+
|
| 824 |
+
Returns:
|
| 825 |
+
dict: 系统状态信息
|
| 826 |
+
"""
|
| 827 |
+
cos_config = CONFIG["cos"]
|
| 828 |
+
cos_configured = all([cos_config.get("region"), cos_config.get("secret_id"),
|
| 829 |
+
cos_config.get("secret_key"), cos_config.get("bucket")])
|
| 830 |
+
|
| 831 |
+
status = {
|
| 832 |
+
"service_name": "PDFTranslate MCP Server",
|
| 833 |
+
"version": "1.0.0",
|
| 834 |
+
"babeldoc_available": BABELDOC_AVAILABLE,
|
| 835 |
+
"cos_available": COS_AVAILABLE,
|
| 836 |
+
"api_key_configured": bool(CONFIG["openai"]["api_key"]),
|
| 837 |
+
"cos_configured": cos_configured,
|
| 838 |
+
"dependencies": {
|
| 839 |
+
"babeldoc": "✅ 已安装" if BABELDOC_AVAILABLE else "❌ 未安装",
|
| 840 |
+
"openai_api": "✅ 已配置" if CONFIG["openai"]["api_key"] else "❌ 未配置",
|
| 841 |
+
"cos_sdk": "✅ 已安装" if COS_AVAILABLE else "❌ 未安装",
|
| 842 |
+
"cos_config": "✅ 已配置" if cos_configured else "❌ 未配置"
|
| 843 |
+
},
|
| 844 |
+
"configuration": {
|
| 845 |
+
"model": CONFIG["openai"]["model"],
|
| 846 |
+
"base_url": CONFIG["openai"]["base_url"],
|
| 847 |
+
"default_languages": f"{CONFIG['translation']['default_lang_in']} -> {CONFIG['translation']['default_lang_out']}",
|
| 848 |
+
"default_qps": CONFIG["translation"]["qps"],
|
| 849 |
+
"cos_region": cos_config.get("region", "未配置"),
|
| 850 |
+
"cos_bucket": cos_config.get("bucket", "未配置")
|
| 851 |
+
},
|
| 852 |
+
"active_tasks": len(translation_tasks),
|
| 853 |
+
"ready": BABELDOC_AVAILABLE and bool(CONFIG["openai"]["api_key"]),
|
| 854 |
+
"cos_upload_ready": COS_AVAILABLE and cos_configured
|
| 855 |
+
}
|
| 856 |
+
|
| 857 |
+
if not BABELDOC_AVAILABLE:
|
| 858 |
+
status["install_instructions"] = {
|
| 859 |
+
"message": "请安装BabelDOC库以启用PDF翻译功能",
|
| 860 |
+
"commands": [
|
| 861 |
+
"pip install babeldoc",
|
| 862 |
+
"uv pip install babeldoc"
|
| 863 |
+
]
|
| 864 |
+
}
|
| 865 |
+
|
| 866 |
+
if not COS_AVAILABLE:
|
| 867 |
+
status["cos_install_instructions"] = {
|
| 868 |
+
"message": "请安装腾讯云COS SDK以启用文件上传功能",
|
| 869 |
+
"commands": [
|
| 870 |
+
"pip install cos-python-sdk-v5",
|
| 871 |
+
"uv pip install cos-python-sdk-v5"
|
| 872 |
+
]
|
| 873 |
+
}
|
| 874 |
+
|
| 875 |
+
if not CONFIG["openai"]["api_key"]:
|
| 876 |
+
status["api_key_instructions"] = {
|
| 877 |
+
"message": "请配置OpenAI API密钥",
|
| 878 |
+
"steps": [
|
| 879 |
+
"1. 复制 .env.example 为 .env",
|
| 880 |
+
"2. 在 .env 文件中设置 OPENAI_API_KEY=your_api_key",
|
| 881 |
+
"3. 重启MCP服务器"
|
| 882 |
+
]
|
| 883 |
+
}
|
| 884 |
+
|
| 885 |
+
if not cos_configured:
|
| 886 |
+
status["cos_config_instructions"] = {
|
| 887 |
+
"message": "请配置腾讯云COS参数以启用文件上传功能",
|
| 888 |
+
"config_methods": [
|
| 889 |
+
{
|
| 890 |
+
"method": "环境变量配置",
|
| 891 |
+
"steps": [
|
| 892 |
+
"设置 COS_REGION=your_region",
|
| 893 |
+
"设置 COS_SECRET_ID=your_secret_id",
|
| 894 |
+
"设置 COS_SECRET_KEY=your_secret_key",
|
| 895 |
+
"设置 COS_BUCKET=your_bucket"
|
| 896 |
+
]
|
| 897 |
+
},
|
| 898 |
+
{
|
| 899 |
+
"method": "config.ini配置",
|
| 900 |
+
"steps": [
|
| 901 |
+
"在config.ini的[common]部分添加:",
|
| 902 |
+
"cos_region = your_region",
|
| 903 |
+
"cos_secret_id = your_secret_id",
|
| 904 |
+
"cos_secret_key = your_secret_key",
|
| 905 |
+
"cos_bucket = your_bucket"
|
| 906 |
+
]
|
| 907 |
+
}
|
| 908 |
+
],
|
| 909 |
+
"note": "环境变量优先级高于config.ini配置"
|
| 910 |
+
}
|
| 911 |
+
|
| 912 |
+
return status
|
| 913 |
+
|
| 914 |
+
@mcp.tool()
|
| 915 |
+
def list_all_tasks() -> dict:
|
| 916 |
+
"""
|
| 917 |
+
列出所有翻译任务
|
| 918 |
+
|
| 919 |
+
Returns:
|
| 920 |
+
dict: 所有任务状态
|
| 921 |
+
"""
|
| 922 |
+
return {
|
| 923 |
+
"total_tasks": len(translation_tasks),
|
| 924 |
+
"tasks": [task.to_dict() for task in translation_tasks.values()]
|
| 925 |
+
}
|
| 926 |
+
|
| 927 |
+
@mcp.tool()
|
| 928 |
+
def update_cos_config(
|
| 929 |
+
cos_region: str = None,
|
| 930 |
+
cos_secret_id: str = None,
|
| 931 |
+
cos_secret_key: str = None,
|
| 932 |
+
cos_bucket: str = None
|
| 933 |
+
) -> dict:
|
| 934 |
+
"""
|
| 935 |
+
动态更新COS配置参数
|
| 936 |
+
|
| 937 |
+
Args:
|
| 938 |
+
cos_region: COS地域
|
| 939 |
+
cos_secret_id: COS密钥ID
|
| 940 |
+
cos_secret_key: COS密钥Key
|
| 941 |
+
cos_bucket: COS存储桶名称
|
| 942 |
+
|
| 943 |
+
Returns:
|
| 944 |
+
dict: 更新结果
|
| 945 |
+
"""
|
| 946 |
+
try:
|
| 947 |
+
updated_fields = []
|
| 948 |
+
|
| 949 |
+
if cos_region:
|
| 950 |
+
CONFIG["cos"]["region"] = cos_region
|
| 951 |
+
updated_fields.append("region")
|
| 952 |
+
|
| 953 |
+
if cos_secret_id:
|
| 954 |
+
CONFIG["cos"]["secret_id"] = cos_secret_id
|
| 955 |
+
updated_fields.append("secret_id")
|
| 956 |
+
|
| 957 |
+
if cos_secret_key:
|
| 958 |
+
CONFIG["cos"]["secret_key"] = cos_secret_key
|
| 959 |
+
updated_fields.append("secret_key")
|
| 960 |
+
|
| 961 |
+
if cos_bucket:
|
| 962 |
+
CONFIG["cos"]["bucket"] = cos_bucket
|
| 963 |
+
updated_fields.append("bucket")
|
| 964 |
+
|
| 965 |
+
if not updated_fields:
|
| 966 |
+
return {
|
| 967 |
+
"success": False,
|
| 968 |
+
"message": "没有提供任何配置参数",
|
| 969 |
+
"current_config": {
|
| 970 |
+
"region": CONFIG["cos"].get("region", "未配置"),
|
| 971 |
+
"secret_id": "已配置" if CONFIG["cos"].get("secret_id") else "未配置",
|
| 972 |
+
"secret_key": "已配置" if CONFIG["cos"].get("secret_key") else "未配置",
|
| 973 |
+
"bucket": CONFIG["cos"].get("bucket", "未配置")
|
| 974 |
+
}
|
| 975 |
+
}
|
| 976 |
+
|
| 977 |
+
# 检查配置完整性
|
| 978 |
+
cos_config = CONFIG["cos"]
|
| 979 |
+
is_complete = all([cos_config.get("region"), cos_config.get("secret_id"),
|
| 980 |
+
cos_config.get("secret_key"), cos_config.get("bucket")])
|
| 981 |
+
|
| 982 |
+
return {
|
| 983 |
+
"success": True,
|
| 984 |
+
"message": f"COS配置已更新: {', '.join(updated_fields)}",
|
| 985 |
+
"updated_fields": updated_fields,
|
| 986 |
+
"config_complete": is_complete,
|
| 987 |
+
"cos_upload_ready": COS_AVAILABLE and is_complete,
|
| 988 |
+
"current_config": {
|
| 989 |
+
"region": CONFIG["cos"].get("region", "未配置"),
|
| 990 |
+
"secret_id": "已配置" if CONFIG["cos"].get("secret_id") else "未配置",
|
| 991 |
+
"secret_key": "已配置" if CONFIG["cos"].get("secret_key") else "未配置",
|
| 992 |
+
"bucket": CONFIG["cos"].get("bucket", "未配置")
|
| 993 |
+
}
|
| 994 |
+
}
|
| 995 |
+
|
| 996 |
+
except Exception as e:
|
| 997 |
+
logger.error(f"更新COS配置时出错: {e}")
|
| 998 |
+
return {
|
| 999 |
+
"success": False,
|
| 1000 |
+
"error": f"更新配置失败: {str(e)}",
|
| 1001 |
+
"message": "请检查提供的配置参数"
|
| 1002 |
+
}
|
| 1003 |
+
|
| 1004 |
+
@mcp.tool()
|
| 1005 |
+
def get_supported_languages() -> dict:
|
| 1006 |
+
"""
|
| 1007 |
+
获取支持的语言列表
|
| 1008 |
+
|
| 1009 |
+
Returns:
|
| 1010 |
+
dict: 支持的语言代码
|
| 1011 |
+
"""
|
| 1012 |
+
return {
|
| 1013 |
+
"languages": {
|
| 1014 |
+
"zh": "中文",
|
| 1015 |
+
"en": "English",
|
| 1016 |
+
"ja": "日本語",
|
| 1017 |
+
"ko": "한국어",
|
| 1018 |
+
"fr": "Français",
|
| 1019 |
+
"de": "Deutsch",
|
| 1020 |
+
"es": "Español",
|
| 1021 |
+
"ru": "Русский",
|
| 1022 |
+
"it": "Italiano",
|
| 1023 |
+
"pt": "Português",
|
| 1024 |
+
"ar": "العربية",
|
| 1025 |
+
"hi": "हिन्दी"
|
| 1026 |
+
},
|
| 1027 |
+
"default_lang_in": CONFIG["translation"]["default_lang_in"],
|
| 1028 |
+
"default_lang_out": CONFIG["translation"]["default_lang_out"]
|
| 1029 |
+
}
|
| 1030 |
+
|
| 1031 |
+
@mcp.resource("config://")
|
| 1032 |
+
def get_config() -> str:
|
| 1033 |
+
"""返回当前配置信息"""
|
| 1034 |
+
config_info = {
|
| 1035 |
+
"service_name": "PDFTranslate MCP Server",
|
| 1036 |
+
"version": "1.0.0",
|
| 1037 |
+
"babeldoc_available": BABELDOC_AVAILABLE,
|
| 1038 |
+
"openai_model": CONFIG["openai"]["model"],
|
| 1039 |
+
"openai_base_url": CONFIG["openai"]["base_url"],
|
| 1040 |
+
"default_languages": {
|
| 1041 |
+
"input": CONFIG["translation"]["default_lang_in"],
|
| 1042 |
+
"output": CONFIG["translation"]["default_lang_out"]
|
| 1043 |
+
},
|
| 1044 |
+
"default_qps": CONFIG["translation"]["qps"],
|
| 1045 |
+
"watermark_mode": CONFIG["translation"]["watermark_output_mode"]
|
| 1046 |
+
}
|
| 1047 |
+
return json.dumps(config_info, indent=2, ensure_ascii=False)
|
| 1048 |
+
|
| 1049 |
+
@mcp.resource("tasks://")
|
| 1050 |
+
def get_all_tasks() -> str:
|
| 1051 |
+
"""返回所有翻译任务状态"""
|
| 1052 |
+
tasks_info = {
|
| 1053 |
+
"total_tasks": len(translation_tasks),
|
| 1054 |
+
"tasks_by_status": {},
|
| 1055 |
+
"recent_tasks": []
|
| 1056 |
+
}
|
| 1057 |
+
|
| 1058 |
+
# 按状态分组
|
| 1059 |
+
status_counts = {}
|
| 1060 |
+
for task in translation_tasks.values():
|
| 1061 |
+
status = task.status
|
| 1062 |
+
if status not in status_counts:
|
| 1063 |
+
status_counts[status] = 0
|
| 1064 |
+
status_counts[status] += 1
|
| 1065 |
+
|
| 1066 |
+
tasks_info["tasks_by_status"] = status_counts
|
| 1067 |
+
|
| 1068 |
+
# 最近的任务
|
| 1069 |
+
recent_tasks = sorted(
|
| 1070 |
+
translation_tasks.values(),
|
| 1071 |
+
key=lambda x: x.updated_at,
|
| 1072 |
+
reverse=True
|
| 1073 |
+
)[:10]
|
| 1074 |
+
|
| 1075 |
+
tasks_info["recent_tasks"] = [task.to_dict() for task in recent_tasks]
|
| 1076 |
+
|
| 1077 |
+
return json.dumps(tasks_info, indent=2, ensure_ascii=False)
|
| 1078 |
+
|
| 1079 |
+
if __name__ == "__main__":
|
| 1080 |
+
# 初始化BabelDOC(如果可用)
|
| 1081 |
+
if BABELDOC_AVAILABLE:
|
| 1082 |
+
try:
|
| 1083 |
+
babeldoc.format.pdf.high_level.init()
|
| 1084 |
+
logger.info("BabelDOC initialized successfully")
|
| 1085 |
+
except Exception as e:
|
| 1086 |
+
logger.error(f"Failed to initialize BabelDOC: {e}")
|
| 1087 |
+
|
| 1088 |
+
# 配置MCP服务器
|
| 1089 |
+
mcp.settings.host = CONFIG["server"]["host"]
|
| 1090 |
+
mcp.settings.port = CONFIG["server"]["port"]
|
| 1091 |
+
|
| 1092 |
+
# 配置信息日志
|
| 1093 |
+
logger.info(f"Starting PDFTranslate MCP Server in SSE mode")
|
| 1094 |
+
logger.info(f"Server URL: http://{CONFIG['server']['host']}:{CONFIG['server']['port']}/sse")
|
| 1095 |
+
logger.info(f"OpenAI Model: {CONFIG['openai']['model']}")
|
| 1096 |
+
logger.info(f"Default Translation: {CONFIG['translation']['default_lang_in']} -> {CONFIG['translation']['default_lang_out']}")
|
| 1097 |
+
logger.info(f"BabelDOC Available: {BABELDOC_AVAILABLE}")
|
| 1098 |
+
|
| 1099 |
+
# 启动MCP服务器 (SSE模式)
|
| 1100 |
+
mcp.run(transport="sse")
|
pdftranslate_web/pdftranslate-mcp-server/pyproject.toml
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "pdftranslate-mcp-server"
|
| 3 |
+
version = "1.0.0"
|
| 4 |
+
description = "PDF翻译MCP服务器 - 基于BabelDOC的PDF文档翻译模型上下文协议服务器"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.11"
|
| 7 |
+
authors = [
|
| 8 |
+
{name = "wwwzhouhui", email = "75271002@qq.com"}
|
| 9 |
+
]
|
| 10 |
+
classifiers = [
|
| 11 |
+
"Development Status :: 4 - Beta",
|
| 12 |
+
"Intended Audience :: Developers",
|
| 13 |
+
"License :: OSI Approved :: MIT License",
|
| 14 |
+
"Programming Language :: Python :: 3",
|
| 15 |
+
"Programming Language :: Python :: 3.11",
|
| 16 |
+
"Programming Language :: Python :: 3.12",
|
| 17 |
+
"Programming Language :: Python :: 3.13",
|
| 18 |
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
| 19 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
| 20 |
+
"Topic :: Text Processing :: Linguistic",
|
| 21 |
+
]
|
| 22 |
+
keywords = ["mcp", "pdf", "translation", "ai", "babel", "document"]
|
| 23 |
+
|
| 24 |
+
dependencies = [
|
| 25 |
+
"mcp[cli]>=1.9.3",
|
| 26 |
+
"fastapi>=0.104.0",
|
| 27 |
+
"uvicorn>=0.24.0",
|
| 28 |
+
"python-dotenv>=1.0.0",
|
| 29 |
+
"pydantic>=2.5.0",
|
| 30 |
+
"python-multipart>=0.0.6",
|
| 31 |
+
"aiofiles>=23.2.0",
|
| 32 |
+
"aiohttp>=3.9.0",
|
| 33 |
+
# BabelDOC相关依赖 - 注释掉以避免安装问题,用户需要手动安装
|
| 34 |
+
"babeldoc>=0.4.16",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
[project.optional-dependencies]
|
| 38 |
+
dev = [
|
| 39 |
+
"pytest>=7.4.0",
|
| 40 |
+
"pytest-asyncio>=0.21.0",
|
| 41 |
+
"black>=23.0.0",
|
| 42 |
+
"isort>=5.12.0",
|
| 43 |
+
"flake8>=6.0.0",
|
| 44 |
+
"mypy>=1.7.0",
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
full = [
|
| 48 |
+
"babeldoc>=0.4.16",
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
[project.urls]
|
| 52 |
+
Homepage = "https://github.com/wwwzhouhui/pdftranslate_web"
|
| 53 |
+
Repository = "https://github.com/wwwzhouhui/pdftranslate_web"
|
| 54 |
+
|
| 55 |
+
[project.scripts]
|
| 56 |
+
pdftranslate-mcp = "main:main"
|
| 57 |
+
|
| 58 |
+
[build-system]
|
| 59 |
+
requires = ["hatchling"]
|
| 60 |
+
build-backend = "hatchling.build"
|
| 61 |
+
|
| 62 |
+
[tool.hatch.build.targets.wheel]
|
| 63 |
+
packages = ["."]
|
| 64 |
+
|
| 65 |
+
[tool.black]
|
| 66 |
+
line-length = 100
|
| 67 |
+
target-version = ['py312']
|
| 68 |
+
include = '\.pyi?$'
|
| 69 |
+
extend-exclude = '''
|
| 70 |
+
/(
|
| 71 |
+
# directories
|
| 72 |
+
\.eggs
|
| 73 |
+
| \.git
|
| 74 |
+
| \.hg
|
| 75 |
+
| \.mypy_cache
|
| 76 |
+
| \.tox
|
| 77 |
+
| \.venv
|
| 78 |
+
| build
|
| 79 |
+
| dist
|
| 80 |
+
)/
|
| 81 |
+
'''
|
| 82 |
+
|
| 83 |
+
[tool.isort]
|
| 84 |
+
profile = "black"
|
| 85 |
+
line_length = 100
|
| 86 |
+
multi_line_output = 3
|
| 87 |
+
include_trailing_comma = true
|
| 88 |
+
force_grid_wrap = 0
|
| 89 |
+
use_parentheses = true
|
| 90 |
+
ensure_newline_before_comments = true
|
| 91 |
+
|
| 92 |
+
[tool.mypy]
|
| 93 |
+
python_version = "3.12"
|
| 94 |
+
warn_return_any = true
|
| 95 |
+
warn_unused_configs = true
|
| 96 |
+
disallow_untyped_defs = true
|
| 97 |
+
disallow_incomplete_defs = true
|
| 98 |
+
check_untyped_defs = true
|
| 99 |
+
disallow_untyped_decorators = true
|
| 100 |
+
no_implicit_optional = true
|
| 101 |
+
warn_redundant_casts = true
|
| 102 |
+
warn_unused_ignores = true
|
| 103 |
+
warn_no_return = true
|
| 104 |
+
warn_unreachable = true
|
| 105 |
+
strict_equality = true
|
| 106 |
+
|
| 107 |
+
[tool.pytest.ini_options]
|
| 108 |
+
testpaths = ["tests"]
|
| 109 |
+
python_files = ["test_*.py", "*_test.py"]
|
| 110 |
+
python_classes = ["Test*"]
|
| 111 |
+
python_functions = ["test_*"]
|
| 112 |
+
addopts = [
|
| 113 |
+
"-v",
|
| 114 |
+
"--tb=short",
|
| 115 |
+
"--strict-config",
|
| 116 |
+
"--strict-markers",
|
| 117 |
+
]
|
| 118 |
+
asyncio_mode = "auto"
|
| 119 |
+
|
| 120 |
+
[tool.coverage.run]
|
| 121 |
+
source = ["."]
|
| 122 |
+
omit = [
|
| 123 |
+
"tests/*",
|
| 124 |
+
"*/tests/*",
|
| 125 |
+
"*/test_*",
|
| 126 |
+
"*/__pycache__/*",
|
| 127 |
+
]
|
| 128 |
+
|
| 129 |
+
[tool.coverage.report]
|
| 130 |
+
exclude_lines = [
|
| 131 |
+
"pragma: no cover",
|
| 132 |
+
"def __repr__",
|
| 133 |
+
"if self.debug:",
|
| 134 |
+
"if settings.DEBUG",
|
| 135 |
+
"raise AssertionError",
|
| 136 |
+
"raise NotImplementedError",
|
| 137 |
+
"if 0:",
|
| 138 |
+
"if __name__ == .__main__.:",
|
| 139 |
+
"class .*\\bProtocol\\):",
|
| 140 |
+
"@(abc\\.)?abstractmethod",
|
| 141 |
+
]
|
pdftranslate_web/pdftranslate-mcp-server/pyproject_scnet.toml
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "pdftranslate-mcp-server"
|
| 3 |
+
version = "1.0.0"
|
| 4 |
+
description = "PDF翻译MCP服务器 - 基于BabelDOC的PDF文档翻译模型上下文协议服务器"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.11"
|
| 7 |
+
authors = [
|
| 8 |
+
{name = "wwwzhouhui", email = "75271002@qq.com"}
|
| 9 |
+
]
|
| 10 |
+
classifiers = [
|
| 11 |
+
"Development Status :: 4 - Beta",
|
| 12 |
+
"Intended Audience :: Developers",
|
| 13 |
+
"License :: OSI Approved :: MIT License",
|
| 14 |
+
"Programming Language :: Python :: 3",
|
| 15 |
+
"Programming Language :: Python :: 3.11",
|
| 16 |
+
"Programming Language :: Python :: 3.12",
|
| 17 |
+
"Programming Language :: Python :: 3.13",
|
| 18 |
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
| 19 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
| 20 |
+
"Topic :: Text Processing :: Linguistic",
|
| 21 |
+
]
|
| 22 |
+
keywords = ["mcp", "pdf", "translation", "ai", "babel", "document"]
|
| 23 |
+
|
| 24 |
+
dependencies = [
|
| 25 |
+
"mcp[cli]>=1.9.3",
|
| 26 |
+
"fastapi>=0.104.0",
|
| 27 |
+
"uvicorn>=0.24.0",
|
| 28 |
+
"python-dotenv>=1.0.0",
|
| 29 |
+
"pydantic>=2.5.0",
|
| 30 |
+
"python-multipart>=0.0.6",
|
| 31 |
+
"aiofiles>=23.2.0",
|
| 32 |
+
"aiohttp>=3.9.0",
|
| 33 |
+
# BabelDOC相关依赖 - 注释掉以避免安装问题,用户需要手动安装
|
| 34 |
+
"babeldoc>=0.4.16",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
[project.optional-dependencies]
|
| 38 |
+
dev = [
|
| 39 |
+
"pytest>=7.4.0",
|
| 40 |
+
"pytest-asyncio>=0.21.0",
|
| 41 |
+
"black>=23.0.0",
|
| 42 |
+
"isort>=5.12.0",
|
| 43 |
+
"flake8>=6.0.0",
|
| 44 |
+
"mypy>=1.7.0",
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
full = [
|
| 48 |
+
"babeldoc>=0.4.16",
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
[project.urls]
|
| 52 |
+
Homepage = "https://github.com/wwwzhouhui/pdftranslate_web"
|
| 53 |
+
Repository = "https://github.com/wwwzhouhui/pdftranslate_web"
|
| 54 |
+
|
| 55 |
+
[project.scripts]
|
| 56 |
+
pdftranslate-mcp = "main:main"
|
| 57 |
+
|
| 58 |
+
[build-system]
|
| 59 |
+
requires = ["hatchling"]
|
| 60 |
+
build-backend = "hatchling.build"
|
| 61 |
+
|
| 62 |
+
[tool.hatch.build.targets.wheel]
|
| 63 |
+
packages = ["."]
|
| 64 |
+
|
| 65 |
+
[tool.black]
|
| 66 |
+
line-length = 100
|
| 67 |
+
target-version = ['py312']
|
| 68 |
+
include = '\.pyi?$'
|
| 69 |
+
extend-exclude = '''
|
| 70 |
+
/(
|
| 71 |
+
# directories
|
| 72 |
+
\.eggs
|
| 73 |
+
| \.git
|
| 74 |
+
| \.hg
|
| 75 |
+
| \.mypy_cache
|
| 76 |
+
| \.tox
|
| 77 |
+
| \.venv
|
| 78 |
+
| build
|
| 79 |
+
| dist
|
| 80 |
+
)/
|
| 81 |
+
'''
|
| 82 |
+
|
| 83 |
+
[tool.isort]
|
| 84 |
+
profile = "black"
|
| 85 |
+
line_length = 100
|
| 86 |
+
multi_line_output = 3
|
| 87 |
+
include_trailing_comma = true
|
| 88 |
+
force_grid_wrap = 0
|
| 89 |
+
use_parentheses = true
|
| 90 |
+
ensure_newline_before_comments = true
|
| 91 |
+
|
| 92 |
+
[tool.mypy]
|
| 93 |
+
python_version = "3.12"
|
| 94 |
+
warn_return_any = true
|
| 95 |
+
warn_unused_configs = true
|
| 96 |
+
disallow_untyped_defs = true
|
| 97 |
+
disallow_incomplete_defs = true
|
| 98 |
+
check_untyped_defs = true
|
| 99 |
+
disallow_untyped_decorators = true
|
| 100 |
+
no_implicit_optional = true
|
| 101 |
+
warn_redundant_casts = true
|
| 102 |
+
warn_unused_ignores = true
|
| 103 |
+
warn_no_return = true
|
| 104 |
+
warn_unreachable = true
|
| 105 |
+
strict_equality = true
|
| 106 |
+
|
| 107 |
+
[tool.pytest.ini_options]
|
| 108 |
+
testpaths = ["tests"]
|
| 109 |
+
python_files = ["test_*.py", "*_test.py"]
|
| 110 |
+
python_classes = ["Test*"]
|
| 111 |
+
python_functions = ["test_*"]
|
| 112 |
+
addopts = [
|
| 113 |
+
"-v",
|
| 114 |
+
"--tb=short",
|
| 115 |
+
"--strict-config",
|
| 116 |
+
"--strict-markers",
|
| 117 |
+
]
|
| 118 |
+
asyncio_mode = "auto"
|
| 119 |
+
|
| 120 |
+
[tool.coverage.run]
|
| 121 |
+
source = ["."]
|
| 122 |
+
omit = [
|
| 123 |
+
"tests/*",
|
| 124 |
+
"*/tests/*",
|
| 125 |
+
"*/test_*",
|
| 126 |
+
"*/__pycache__/*",
|
| 127 |
+
]
|
| 128 |
+
|
| 129 |
+
[tool.coverage.report]
|
| 130 |
+
exclude_lines = [
|
| 131 |
+
"pragma: no cover",
|
| 132 |
+
"def __repr__",
|
| 133 |
+
"if self.debug:",
|
| 134 |
+
"if settings.DEBUG",
|
| 135 |
+
"raise AssertionError",
|
| 136 |
+
"raise NotImplementedError",
|
| 137 |
+
"if 0:",
|
| 138 |
+
"if __name__ == .__main__.:",
|
| 139 |
+
"class .*\\bProtocol\\):",
|
| 140 |
+
"@(abc\\.)?abstractmethod",
|
| 141 |
+
]
|
pdftranslate_web/pdftranslate-mcp-server/uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pdftranslate_web/pyproject.toml
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "pdftranslate_web"
|
| 3 |
+
version = "0.0.1"
|
| 4 |
+
description = "用户上传PDF,系统会把它翻译成其他国家语言,保持文档结构不变"
|
| 5 |
+
license = "AGPL-3.0"
|
| 6 |
+
readme = "README.md"
|
| 7 |
+
requires-python = ">=3.10,<3.14"
|
| 8 |
+
authors = [
|
| 9 |
+
{ name = "wwwzhouhui", email = "75271002@qq.com" }
|
| 10 |
+
]
|
| 11 |
+
maintainers = [
|
| 12 |
+
{ name = "wwwzhouhui", email = "75271002@qq.com" }
|
| 13 |
+
]
|
| 14 |
+
classifiers = [
|
| 15 |
+
"Programming Language :: Python :: 3",
|
| 16 |
+
"Operating System :: OS Independent",
|
| 17 |
+
]
|
| 18 |
+
keywords = ["PDF"]
|
| 19 |
+
dependencies = [
|
| 20 |
+
# Core BabelDOC translation engine
|
| 21 |
+
"babeldoc @ git+https://github.com/funstory-ai/BabelDOC.git",
|
| 22 |
+
|
| 23 |
+
# Additional dependencies for web interface
|
| 24 |
+
"httpx[socks]>=0.27.0", # HTTP client for API calls
|
| 25 |
+
"fastapi>=0.116.1", # Web framework
|
| 26 |
+
"uvicorn[standard]>=0.35.0", # ASGI server
|
| 27 |
+
"python-multipart>=0.0.20", # File upload support
|
| 28 |
+
"gradio>=5.34.1", # Web interface
|
| 29 |
+
"requests>=2.32.4", # HTTP client for gradio
|
| 30 |
+
"Pillow>=11.3.0", # Image processing for previews
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
[project.optional-dependencies]
|
| 34 |
+
directml = ["onnxruntime-directml>=1.16.1"]
|
| 35 |
+
cuda = ["onnxruntime-gpu>=1.16.1"]
|
| 36 |
+
memray = ["memray>=1.17.1"]
|
| 37 |
+
|
| 38 |
+
[project.urls]
|
| 39 |
+
Homepage = "https://github.com/wwwzhouhui/pdftranslate_web"
|
| 40 |
+
Issues = "https://github.com/wwwzhouhui/pdftranslate_web/issues"
|
| 41 |
+
|
| 42 |
+
[project.scripts]
|
| 43 |
+
pdftranslate = "pdftranslate_web.api_server:main"
|
| 44 |
+
|
| 45 |
+
[build-system]
|
| 46 |
+
requires = ["hatchling"]
|
| 47 |
+
build-backend = "hatchling.build"
|
| 48 |
+
|
| 49 |
+
[tool.hatch.metadata]
|
| 50 |
+
allow-direct-references = true
|
| 51 |
+
|
| 52 |
+
[tool.hatch.build.targets.wheel]
|
| 53 |
+
packages = ["src/pdftranslate_web"]
|
| 54 |
+
|
| 55 |
+
[tool.flake8]
|
| 56 |
+
ignore = ["E203", "E261", "E501", "W503", "E741", "E501"]
|
| 57 |
+
max-line-length = 88
|
| 58 |
+
|
| 59 |
+
[tool.ruff]
|
| 60 |
+
src = ["babeldoc"]
|
| 61 |
+
target-version = "py310"
|
| 62 |
+
show-fixes = true
|
| 63 |
+
|
| 64 |
+
[tool.ruff.format]
|
| 65 |
+
# Enable reformatting of code snippets in docstrings.
|
| 66 |
+
docstring-code-format = true
|
| 67 |
+
|
| 68 |
+
[tool.ruff.lint]
|
| 69 |
+
ignore = [
|
| 70 |
+
"E203", # 冒号前的空格
|
| 71 |
+
"E261", # 注释前至少两个空格
|
| 72 |
+
"E501", # 行太长
|
| 73 |
+
"E741", # 变量名歧义
|
| 74 |
+
"F841", # 未使用的变量
|
| 75 |
+
"C901", # 太复杂的函数
|
| 76 |
+
"S101", # use assert
|
| 77 |
+
"SIM", # flake8-simplify
|
| 78 |
+
"ARG002", # unused argument
|
| 79 |
+
"S110", # `try`-`except`-`pass` detected, consider logging the exception
|
| 80 |
+
"B024", # abstract class without abstract methods
|
| 81 |
+
"S112", # `try`-`except`-`continue` detected, consider logging the exception
|
| 82 |
+
"COM812", # missing-trailing-comma
|
| 83 |
+
|
| 84 |
+
]
|
| 85 |
+
select = [
|
| 86 |
+
"E", # pycodestyle 错误
|
| 87 |
+
"F", # Pyflakes
|
| 88 |
+
"N", # PEP8 命名
|
| 89 |
+
"B", # flake8-bugbear
|
| 90 |
+
"I", # isort
|
| 91 |
+
"C", # mccabe
|
| 92 |
+
"UP", # pyupgrade
|
| 93 |
+
"S", # flake8-bandit
|
| 94 |
+
"A", # flake8-builtins
|
| 95 |
+
"COM", # flake8-commas
|
| 96 |
+
"ARG", # flake8-unused-arguments
|
| 97 |
+
"PTH", # 使用 pathlib
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
[tool.ruff.lint.flake8-quotes]
|
| 101 |
+
docstring-quotes = "double"
|
| 102 |
+
|
| 103 |
+
[tool.ruff.lint.flake8-annotations]
|
| 104 |
+
suppress-none-returning = true
|
| 105 |
+
|
| 106 |
+
[tool.ruff.lint.isort]
|
| 107 |
+
force-single-line = true
|
| 108 |
+
|
| 109 |
+
[tool.ruff.lint.pydocstyle]
|
| 110 |
+
convention = "google"
|
| 111 |
+
|
| 112 |
+
# 设置一些规则的特定配置
|
| 113 |
+
[tool.ruff.lint.mccabe]
|
| 114 |
+
max-complexity = 10 # 函数圈复杂度阈值
|
| 115 |
+
|
| 116 |
+
[tool.ruff.lint.per-file-ignores]
|
| 117 |
+
"babeldoc/babeldoc_exception/BabelDOCException.py" = ["N999"]
|
| 118 |
+
"babeldoc/format/pdf/pdfinterp.py" = ["N"] # 忽略命名规范
|
| 119 |
+
"tests/*" = ["S101"] # 在测试文件中允许 assert
|
| 120 |
+
"**/__init__.py" = ["F401"] # 允许未使用的导入
|
| 121 |
+
# 忽略 S311 警告,因为这是有意的
|
| 122 |
+
"babeldoc/format/pdf/document_il/midend/paragraph_finder.py" = ["S311"]
|
| 123 |
+
"docs/*" = ["A001"]
|
| 124 |
+
"babeldoc/pdfminer/*" =["A","F", "I", "N", "S", "B", "C", "COM", "ARG", "PTH", "UP"]
|
| 125 |
+
[dependency-groups]
|
| 126 |
+
dev = [
|
| 127 |
+
"bumpver>=2024.1130",
|
| 128 |
+
"markdown-callouts>=0.4.0",
|
| 129 |
+
"markdown-include>=0.8.1",
|
| 130 |
+
"mkdocs-git-authors-plugin>=0.9.2",
|
| 131 |
+
"mkdocs-git-committers-plugin-2>=2.5.0",
|
| 132 |
+
"mkdocs-git-revision-date-localized-plugin>=1.3.0",
|
| 133 |
+
"mkdocs-material[recommended]>=9.6.4",
|
| 134 |
+
"pre-commit>=4.1.0",
|
| 135 |
+
"pygments>=2.19.1",
|
| 136 |
+
"ruff>=0.9.2",
|
| 137 |
+
"pytest>=8.3.4",
|
| 138 |
+
"pylance>=0.29.0",
|
| 139 |
+
"py-spy>=0.4.0",
|
| 140 |
+
]
|
| 141 |
+
|
| 142 |
+
[tool.pytest.ini_options]
|
| 143 |
+
pythonpath = [".", "src"]
|
| 144 |
+
testpaths = ["tests"]
|
| 145 |
+
|
| 146 |
+
[bumpver]
|
| 147 |
+
current_version = "0.0.1"
|
| 148 |
+
version_pattern = "MAJOR.MINOR.PATCH[.PYTAGNUM]"
|
| 149 |
+
|
| 150 |
+
[bumpver.file_patterns]
|
| 151 |
+
"pyproject.toml" = [
|
| 152 |
+
'current_version = "{version}"',
|
| 153 |
+
'version = "{version}"'
|
| 154 |
+
]
|
| 155 |
+
"babeldoc/__init__.py" = [
|
| 156 |
+
'__version__ = "{version}"'
|
| 157 |
+
]
|
| 158 |
+
"babeldoc/main.py" = [
|
| 159 |
+
'__version__ = "{version}"'
|
| 160 |
+
]
|
| 161 |
+
"babeldoc/const.py" = [
|
| 162 |
+
'__version__ = "{version}"'
|
| 163 |
+
]
|
| 164 |
+
|
| 165 |
+
[tool.uv.sources]
|
| 166 |
+
yadt = { path = ".", editable = true }
|
| 167 |
+
|
| 168 |
+
[tool.pyright]
|
| 169 |
+
pythonVersion = "3.10"
|
| 170 |
+
# typeCheckingMode = "off"
|
| 171 |
+
reportGeneralTypeIssues = false
|
| 172 |
+
reportUnknownVariableType = false
|
| 173 |
+
reportMissingParameterType = false
|
| 174 |
+
reportUnknownParameterType = false
|
pdftranslate_web/pyproject_scnet.toml
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "pdftranslate_web"
|
| 3 |
+
version = "0.0.1"
|
| 4 |
+
description = "用户上传PDF,系统会把它翻译成其他国家语言,保持文档结构不变"
|
| 5 |
+
license = "AGPL-3.0"
|
| 6 |
+
readme = "README.md"
|
| 7 |
+
requires-python = ">=3.10,<3.14"
|
| 8 |
+
authors = [
|
| 9 |
+
{ name = "wwwzhouhui", email = "75271002@qq.com" }
|
| 10 |
+
]
|
| 11 |
+
maintainers = [
|
| 12 |
+
{ name = "wwwzhouhui", email = "75271002@qq.com" }
|
| 13 |
+
]
|
| 14 |
+
classifiers = [
|
| 15 |
+
"Programming Language :: Python :: 3",
|
| 16 |
+
"Operating System :: OS Independent",
|
| 17 |
+
]
|
| 18 |
+
keywords = ["PDF"]
|
| 19 |
+
dependencies = [
|
| 20 |
+
# Core BabelDOC translation engine
|
| 21 |
+
"babeldoc @ git+https://ghfast.top//https://github.com/funstory-ai/BabelDOC.git",
|
| 22 |
+
|
| 23 |
+
"onnxruntime<1.17.0",
|
| 24 |
+
# Additional dependencies for web interface
|
| 25 |
+
"httpx[socks]>=0.27.0", # HTTP client for API calls
|
| 26 |
+
"fastapi>=0.116.1", # Web framework
|
| 27 |
+
"uvicorn[standard]>=0.35.0", # ASGI server
|
| 28 |
+
"python-multipart>=0.0.20", # File upload support
|
| 29 |
+
"gradio>=5.34.1", # Web interface
|
| 30 |
+
"requests>=2.32.4", # HTTP client for gradio
|
| 31 |
+
"Pillow>=11.3.0", # Image processing for previews
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
[project.optional-dependencies]
|
| 35 |
+
directml = ["onnxruntime-directml==1.15.1"]
|
| 36 |
+
cuda = ["onnxruntime-gpu==1.15.1"]
|
| 37 |
+
memray = ["memray>=1.17.1"]
|
| 38 |
+
|
| 39 |
+
[project.urls]
|
| 40 |
+
Homepage = "https://github.com/wwwzhouhui/pdftranslate_web"
|
| 41 |
+
Issues = "https://github.com/wwwzhouhui/pdftranslate_web/issues"
|
| 42 |
+
|
| 43 |
+
[project.scripts]
|
| 44 |
+
pdftranslate = "pdftranslate_web.api_server:main"
|
| 45 |
+
|
| 46 |
+
[build-system]
|
| 47 |
+
requires = ["hatchling"]
|
| 48 |
+
build-backend = "hatchling.build"
|
| 49 |
+
|
| 50 |
+
[tool.hatch.metadata]
|
| 51 |
+
allow-direct-references = true
|
| 52 |
+
|
| 53 |
+
[tool.hatch.build.targets.wheel]
|
| 54 |
+
packages = ["src/pdftranslate_web"]
|
| 55 |
+
|
| 56 |
+
[tool.flake8]
|
| 57 |
+
ignore = ["E203", "E261", "E501", "W503", "E741", "E501"]
|
| 58 |
+
max-line-length = 88
|
| 59 |
+
|
| 60 |
+
[tool.ruff]
|
| 61 |
+
src = ["babeldoc"]
|
| 62 |
+
target-version = "py310"
|
| 63 |
+
show-fixes = true
|
| 64 |
+
|
| 65 |
+
[tool.ruff.format]
|
| 66 |
+
# Enable reformatting of code snippets in docstrings.
|
| 67 |
+
docstring-code-format = true
|
| 68 |
+
|
| 69 |
+
[tool.ruff.lint]
|
| 70 |
+
ignore = [
|
| 71 |
+
"E203", # 冒号前的空格
|
| 72 |
+
"E261", # 注释前至少两个空格
|
| 73 |
+
"E501", # 行太长
|
| 74 |
+
"E741", # 变量名歧义
|
| 75 |
+
"F841", # 未使用的变量
|
| 76 |
+
"C901", # 太复杂的函数
|
| 77 |
+
"S101", # use assert
|
| 78 |
+
"SIM", # flake8-simplify
|
| 79 |
+
"ARG002", # unused argument
|
| 80 |
+
"S110", # `try`-`except`-`pass` detected, consider logging the exception
|
| 81 |
+
"B024", # abstract class without abstract methods
|
| 82 |
+
"S112", # `try`-`except`-`continue` detected, consider logging the exception
|
| 83 |
+
"COM812", # missing-trailing-comma
|
| 84 |
+
|
| 85 |
+
]
|
| 86 |
+
select = [
|
| 87 |
+
"E", # pycodestyle 错误
|
| 88 |
+
"F", # Pyflakes
|
| 89 |
+
"N", # PEP8 命名
|
| 90 |
+
"B", # flake8-bugbear
|
| 91 |
+
"I", # isort
|
| 92 |
+
"C", # mccabe
|
| 93 |
+
"UP", # pyupgrade
|
| 94 |
+
"S", # flake8-bandit
|
| 95 |
+
"A", # flake8-builtins
|
| 96 |
+
"COM", # flake8-commas
|
| 97 |
+
"ARG", # flake8-unused-arguments
|
| 98 |
+
"PTH", # 使用 pathlib
|
| 99 |
+
]
|
| 100 |
+
|
| 101 |
+
[tool.ruff.lint.flake8-quotes]
|
| 102 |
+
docstring-quotes = "double"
|
| 103 |
+
|
| 104 |
+
[tool.ruff.lint.flake8-annotations]
|
| 105 |
+
suppress-none-returning = true
|
| 106 |
+
|
| 107 |
+
[tool.ruff.lint.isort]
|
| 108 |
+
force-single-line = true
|
| 109 |
+
|
| 110 |
+
[tool.ruff.lint.pydocstyle]
|
| 111 |
+
convention = "google"
|
| 112 |
+
|
| 113 |
+
# 设置一些规则的特定配置
|
| 114 |
+
[tool.ruff.lint.mccabe]
|
| 115 |
+
max-complexity = 10 # 函数圈复杂度阈值
|
| 116 |
+
|
| 117 |
+
[tool.ruff.lint.per-file-ignores]
|
| 118 |
+
"babeldoc/babeldoc_exception/BabelDOCException.py" = ["N999"]
|
| 119 |
+
"babeldoc/format/pdf/pdfinterp.py" = ["N"] # 忽略命名规范
|
| 120 |
+
"tests/*" = ["S101"] # 在测试文件中允许 assert
|
| 121 |
+
"**/__init__.py" = ["F401"] # 允许未使用的导入
|
| 122 |
+
# 忽略 S311 警告,因为这是有意的
|
| 123 |
+
"babeldoc/format/pdf/document_il/midend/paragraph_finder.py" = ["S311"]
|
| 124 |
+
"docs/*" = ["A001"]
|
| 125 |
+
"babeldoc/pdfminer/*" =["A","F", "I", "N", "S", "B", "C", "COM", "ARG", "PTH", "UP"]
|
| 126 |
+
[dependency-groups]
|
| 127 |
+
dev = [
|
| 128 |
+
"bumpver>=2024.1130",
|
| 129 |
+
"markdown-callouts>=0.4.0",
|
| 130 |
+
"markdown-include>=0.8.1",
|
| 131 |
+
"mkdocs-git-authors-plugin>=0.9.2",
|
| 132 |
+
"mkdocs-git-committers-plugin-2>=2.5.0",
|
| 133 |
+
"mkdocs-git-revision-date-localized-plugin>=1.3.0",
|
| 134 |
+
"mkdocs-material[recommended]>=9.6.4",
|
| 135 |
+
"pre-commit>=4.1.0",
|
| 136 |
+
"pygments>=2.19.1",
|
| 137 |
+
"ruff>=0.9.2",
|
| 138 |
+
"pytest>=8.3.4",
|
| 139 |
+
"pylance>=0.29.0",
|
| 140 |
+
"py-spy>=0.4.0",
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
[tool.pytest.ini_options]
|
| 144 |
+
pythonpath = [".", "src"]
|
| 145 |
+
testpaths = ["tests"]
|
| 146 |
+
|
| 147 |
+
[bumpver]
|
| 148 |
+
current_version = "0.0.1"
|
| 149 |
+
version_pattern = "MAJOR.MINOR.PATCH[.PYTAGNUM]"
|
| 150 |
+
|
| 151 |
+
[bumpver.file_patterns]
|
| 152 |
+
"pyproject.toml" = [
|
| 153 |
+
'current_version = "{version}"',
|
| 154 |
+
'version = "{version}"'
|
| 155 |
+
]
|
| 156 |
+
"babeldoc/__init__.py" = [
|
| 157 |
+
'__version__ = "{version}"'
|
| 158 |
+
]
|
| 159 |
+
"babeldoc/main.py" = [
|
| 160 |
+
'__version__ = "{version}"'
|
| 161 |
+
]
|
| 162 |
+
"babeldoc/const.py" = [
|
| 163 |
+
'__version__ = "{version}"'
|
| 164 |
+
]
|
| 165 |
+
|
| 166 |
+
[tool.uv.sources]
|
| 167 |
+
yadt = { path = ".", editable = true }
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
[tool.pyright]
|
| 171 |
+
pythonVersion = "3.10"
|
| 172 |
+
# typeCheckingMode = "off"
|
| 173 |
+
reportGeneralTypeIssues = false
|
| 174 |
+
reportUnknownVariableType = false
|
| 175 |
+
reportMissingParameterType = false
|
| 176 |
+
reportUnknownParameterType = false
|
pdftranslate_web/scripts/run_gradio.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
BabelDOC Gradio Client Launcher
|
| 4 |
+
|
| 5 |
+
启动BabelDOC Gradio客户端界面
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
# 添加src目录到Python路径
|
| 12 |
+
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
| 13 |
+
|
| 14 |
+
from pdftranslate_web.gradio_client import main
|
| 15 |
+
|
| 16 |
+
if __name__ == "__main__":
|
| 17 |
+
main()
|
pdftranslate_web/scripts/run_server.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
BabelDOC API Server Launcher
|
| 4 |
+
|
| 5 |
+
启动BabelDOC翻译API服务器
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
# 添加src目录到Python路径
|
| 12 |
+
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
| 13 |
+
|
| 14 |
+
from pdftranslate_web.api_server import start_server
|
| 15 |
+
|
| 16 |
+
if __name__ == "__main__":
|
| 17 |
+
import argparse
|
| 18 |
+
|
| 19 |
+
parser = argparse.ArgumentParser(description="启动BabelDOC翻译API服务器")
|
| 20 |
+
parser.add_argument("--host", default="0.0.0.0", help="服务器主机地址 (默认: 0.0.0.0)")
|
| 21 |
+
parser.add_argument("--port", type=int, default=8000, help="服务器端口 (默认: 8000)")
|
| 22 |
+
|
| 23 |
+
args = parser.parse_args()
|
| 24 |
+
|
| 25 |
+
print(f"正在启动BabelDOC翻译API服务器...")
|
| 26 |
+
print(f"服务器地址: http://{args.host}:{args.port}")
|
| 27 |
+
print(f"API文档: http://{args.host}:{args.port}/docs")
|
| 28 |
+
|
| 29 |
+
start_server(host=args.host, port=args.port)
|
pdftranslate_web/src/pdftranslate_web/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
BabelDOC PDF Translation Package
|
| 3 |
+
|
| 4 |
+
一个基于AI的PDF文档翻译工具包,支持:
|
| 5 |
+
- FastAPI REST API服务
|
| 6 |
+
- Gradio Web界面
|
| 7 |
+
- Python客户端SDK
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
__version__ = "0.0.1"
|
| 11 |
+
__author__ = "wwwzhouhui"
|
| 12 |
+
|
| 13 |
+
from .api_server import app, start_server
|
| 14 |
+
from .api_client import BabelDOCClient
|
| 15 |
+
from .gradio_client import create_gradio_interface, GradioClient
|
| 16 |
+
|
| 17 |
+
__all__ = [
|
| 18 |
+
"app",
|
| 19 |
+
"start_server",
|
| 20 |
+
"BabelDOCClient",
|
| 21 |
+
"create_gradio_interface",
|
| 22 |
+
"GradioClient"
|
| 23 |
+
]
|
pdftranslate_web/src/pdftranslate_web/api_client.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import time
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Optional, Dict, Any
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
class BabelDOCClient:
|
| 8 |
+
def __init__(self, base_url: str = "http://localhost:8000"):
|
| 9 |
+
self.base_url = base_url.rstrip('/')
|
| 10 |
+
self.session = requests.Session()
|
| 11 |
+
|
| 12 |
+
def translate_pdf(
|
| 13 |
+
self,
|
| 14 |
+
pdf_path: str,
|
| 15 |
+
lang_in: Optional[str] = None,
|
| 16 |
+
lang_out: Optional[str] = None,
|
| 17 |
+
qps: Optional[int] = None,
|
| 18 |
+
no_dual: Optional[bool] = None,
|
| 19 |
+
no_mono: Optional[bool] = None,
|
| 20 |
+
watermark_output_mode: Optional[str] = None
|
| 21 |
+
) -> str:
|
| 22 |
+
"""
|
| 23 |
+
提交PDF翻译任务
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
pdf_path: PDF文件路径
|
| 27 |
+
lang_in: 源语言代码 (可选,使用服务器默认配置)
|
| 28 |
+
lang_out: 目标语言代码 (可选,使用服务器默认配置)
|
| 29 |
+
qps: 每秒请求数限制 (可选,使用服务器默认配置)
|
| 30 |
+
no_dual: 不生成双语PDF (可选,使用服务器默认配置)
|
| 31 |
+
no_mono: 不生成单语PDF (可选,使用服务器默认配置)
|
| 32 |
+
watermark_output_mode: 水印模式 (可选,使用服务器默认配置)
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
任务ID
|
| 36 |
+
"""
|
| 37 |
+
if not Path(pdf_path).exists():
|
| 38 |
+
raise FileNotFoundError(f"PDF文件不存在: {pdf_path}")
|
| 39 |
+
|
| 40 |
+
files = {
|
| 41 |
+
'file': ('document.pdf', open(pdf_path, 'rb'), 'application/pdf')
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
data = {}
|
| 45 |
+
if lang_in is not None:
|
| 46 |
+
data['lang_in'] = lang_in
|
| 47 |
+
if lang_out is not None:
|
| 48 |
+
data['lang_out'] = lang_out
|
| 49 |
+
if qps is not None:
|
| 50 |
+
data['qps'] = qps
|
| 51 |
+
if no_dual is not None:
|
| 52 |
+
data['no_dual'] = no_dual
|
| 53 |
+
if no_mono is not None:
|
| 54 |
+
data['no_mono'] = no_mono
|
| 55 |
+
if watermark_output_mode is not None:
|
| 56 |
+
data['watermark_output_mode'] = watermark_output_mode
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
response = self.session.post(
|
| 60 |
+
f"{self.base_url}/translate",
|
| 61 |
+
files=files,
|
| 62 |
+
data=data
|
| 63 |
+
)
|
| 64 |
+
response.raise_for_status()
|
| 65 |
+
result = response.json()
|
| 66 |
+
return result['task_id']
|
| 67 |
+
finally:
|
| 68 |
+
files['file'][1].close()
|
| 69 |
+
|
| 70 |
+
def get_status(self, task_id: str) -> Dict[str, Any]:
|
| 71 |
+
"""
|
| 72 |
+
获取翻译任务状态
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
task_id: 任务ID
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
任务状态信息
|
| 79 |
+
"""
|
| 80 |
+
response = self.session.get(f"{self.base_url}/status/{task_id}")
|
| 81 |
+
response.raise_for_status()
|
| 82 |
+
return response.json()
|
| 83 |
+
|
| 84 |
+
def wait_for_completion(self, task_id: str, check_interval: int = 5, timeout: int = 3600) -> Dict[str, Any]:
|
| 85 |
+
"""
|
| 86 |
+
等待翻译任务完成
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
task_id: 任务ID
|
| 90 |
+
check_interval: 检查间隔秒数 (默认: 5)
|
| 91 |
+
timeout: 超时时间秒数 (默认: 3600)
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
最终任务状态
|
| 95 |
+
"""
|
| 96 |
+
start_time = time.time()
|
| 97 |
+
|
| 98 |
+
while time.time() - start_time < timeout:
|
| 99 |
+
status = self.get_status(task_id)
|
| 100 |
+
|
| 101 |
+
if status['status'] in ['completed', 'failed']:
|
| 102 |
+
return status
|
| 103 |
+
|
| 104 |
+
print(f"任务状态: {status['status']} | 进度: {status['progress']:.1f}% | {status['message']}")
|
| 105 |
+
time.sleep(check_interval)
|
| 106 |
+
|
| 107 |
+
raise TimeoutError(f"任务 {task_id} 在 {timeout} 秒内未完成")
|
| 108 |
+
|
| 109 |
+
def download_result(self, task_id: str, file_type: str, output_path: str) -> bool:
|
| 110 |
+
"""
|
| 111 |
+
下载翻译结果文件
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
task_id: 任务ID
|
| 115 |
+
file_type: 文件类型 ("dual" 或 "mono")
|
| 116 |
+
output_path: 输出文件路径
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
下载是否成功
|
| 120 |
+
"""
|
| 121 |
+
response = self.session.get(f"{self.base_url}/download/{task_id}/{file_type}")
|
| 122 |
+
|
| 123 |
+
if response.status_code == 404:
|
| 124 |
+
return False
|
| 125 |
+
|
| 126 |
+
response.raise_for_status()
|
| 127 |
+
|
| 128 |
+
with open(output_path, 'wb') as f:
|
| 129 |
+
f.write(response.content)
|
| 130 |
+
|
| 131 |
+
return True
|
| 132 |
+
|
| 133 |
+
def translate_and_download(
|
| 134 |
+
self,
|
| 135 |
+
pdf_path: str,
|
| 136 |
+
output_dir: str,
|
| 137 |
+
lang_in: Optional[str] = None,
|
| 138 |
+
lang_out: Optional[str] = None,
|
| 139 |
+
qps: Optional[int] = None,
|
| 140 |
+
no_dual: Optional[bool] = None,
|
| 141 |
+
no_mono: Optional[bool] = None,
|
| 142 |
+
watermark_output_mode: Optional[str] = None,
|
| 143 |
+
check_interval: int = 5,
|
| 144 |
+
timeout: int = 3600
|
| 145 |
+
) -> Dict[str, str]:
|
| 146 |
+
"""
|
| 147 |
+
完整的翻译流程:提交任务 -> 等待完成 -> 下载结果
|
| 148 |
+
|
| 149 |
+
Args:
|
| 150 |
+
pdf_path: PDF文件路径
|
| 151 |
+
output_dir: 输出目录
|
| 152 |
+
其他参数同 translate_pdf 方法
|
| 153 |
+
|
| 154 |
+
Returns:
|
| 155 |
+
下载的文件路径字典 {"dual": "path", "mono": "path"}
|
| 156 |
+
"""
|
| 157 |
+
print(f"正在提交翻译任务: {pdf_path}")
|
| 158 |
+
task_id = self.translate_pdf(
|
| 159 |
+
pdf_path=pdf_path,
|
| 160 |
+
lang_in=lang_in,
|
| 161 |
+
lang_out=lang_out,
|
| 162 |
+
qps=qps,
|
| 163 |
+
no_dual=no_dual,
|
| 164 |
+
no_mono=no_mono,
|
| 165 |
+
watermark_output_mode=watermark_output_mode
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
print(f"任务已创建,ID: {task_id}")
|
| 169 |
+
|
| 170 |
+
print("等待翻译完成...")
|
| 171 |
+
final_status = self.wait_for_completion(task_id, check_interval, timeout)
|
| 172 |
+
|
| 173 |
+
if final_status['status'] != 'completed':
|
| 174 |
+
raise Exception(f"翻译失败: {final_status['message']}")
|
| 175 |
+
|
| 176 |
+
print("翻译完成,正在下载结果...")
|
| 177 |
+
|
| 178 |
+
output_path = Path(output_dir)
|
| 179 |
+
output_path.mkdir(parents=True, exist_ok=True)
|
| 180 |
+
|
| 181 |
+
downloaded_files = {}
|
| 182 |
+
|
| 183 |
+
for file_type in ['dual', 'mono']:
|
| 184 |
+
if file_type in final_status['result_files']:
|
| 185 |
+
filename = f"{Path(pdf_path).stem}.{file_type}.pdf"
|
| 186 |
+
output_file = output_path / filename
|
| 187 |
+
|
| 188 |
+
if self.download_result(task_id, file_type, str(output_file)):
|
| 189 |
+
downloaded_files[file_type] = str(output_file)
|
| 190 |
+
print(f"已下载 {file_type} 文件: {output_file}")
|
| 191 |
+
|
| 192 |
+
return downloaded_files
|
| 193 |
+
|
| 194 |
+
def health_check(self) -> bool:
|
| 195 |
+
"""
|
| 196 |
+
检查服务健康状态
|
| 197 |
+
|
| 198 |
+
Returns:
|
| 199 |
+
服务是否健康
|
| 200 |
+
"""
|
| 201 |
+
try:
|
| 202 |
+
response = self.session.get(f"{self.base_url}/health")
|
| 203 |
+
return response.status_code == 200
|
| 204 |
+
except requests.RequestException:
|
| 205 |
+
return False
|
| 206 |
+
|
| 207 |
+
def get_server_config(self) -> Dict[str, Any]:
|
| 208 |
+
"""
|
| 209 |
+
获取服务器配置信息
|
| 210 |
+
|
| 211 |
+
Returns:
|
| 212 |
+
服务器配置信息
|
| 213 |
+
"""
|
| 214 |
+
response = self.session.get(f"{self.base_url}/")
|
| 215 |
+
response.raise_for_status()
|
| 216 |
+
return response.json()
|
| 217 |
+
|
| 218 |
+
def main():
|
| 219 |
+
"""命令行客户端示例"""
|
| 220 |
+
import argparse
|
| 221 |
+
|
| 222 |
+
parser = argparse.ArgumentParser(description="BabelDOC Translation Client")
|
| 223 |
+
parser.add_argument("pdf_path", help="PDF文件路径")
|
| 224 |
+
parser.add_argument("--output-dir", "-o", default="./output", help="输出目录")
|
| 225 |
+
parser.add_argument("--server-url", default="http://localhost:8000", help="服务器URL")
|
| 226 |
+
parser.add_argument("--lang-in", help="源语言 (可选,使用服务器默认)")
|
| 227 |
+
parser.add_argument("--lang-out", help="目标语言 (可选,使用服务器默认)")
|
| 228 |
+
parser.add_argument("--qps", type=int, help="每秒请求数 (可选,使用服务器默认)")
|
| 229 |
+
parser.add_argument("--no-dual", action="store_true", help="不生成双语PDF")
|
| 230 |
+
parser.add_argument("--no-mono", action="store_true", help="不生成单语PDF")
|
| 231 |
+
parser.add_argument("--watermark-mode", choices=["watermarked", "no_watermark", "both"],
|
| 232 |
+
help="水印模式 (可选,使用服务器默认)")
|
| 233 |
+
|
| 234 |
+
args = parser.parse_args()
|
| 235 |
+
|
| 236 |
+
client = BabelDOCClient(args.server_url)
|
| 237 |
+
|
| 238 |
+
if not client.health_check():
|
| 239 |
+
print(f"错误: 无法连接到服务器 {args.server_url}")
|
| 240 |
+
return
|
| 241 |
+
|
| 242 |
+
# 显示服务器配置
|
| 243 |
+
try:
|
| 244 |
+
server_config = client.get_server_config()
|
| 245 |
+
print("服务器配置:")
|
| 246 |
+
print(f" 模型: {server_config['config']['openai_model']}")
|
| 247 |
+
print(f" 默认语言: {server_config['config']['default_lang_in']} -> {server_config['config']['default_lang_out']}")
|
| 248 |
+
print(f" QPS: {server_config['config']['qps']}")
|
| 249 |
+
print()
|
| 250 |
+
except Exception as e:
|
| 251 |
+
print(f"获取服务器配置失败: {e}")
|
| 252 |
+
|
| 253 |
+
try:
|
| 254 |
+
downloaded_files = client.translate_and_download(
|
| 255 |
+
pdf_path=args.pdf_path,
|
| 256 |
+
output_dir=args.output_dir,
|
| 257 |
+
lang_in=args.lang_in,
|
| 258 |
+
lang_out=args.lang_out,
|
| 259 |
+
qps=args.qps,
|
| 260 |
+
no_dual=args.no_dual,
|
| 261 |
+
no_mono=args.no_mono,
|
| 262 |
+
watermark_output_mode=args.watermark_mode
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
print(f"\n翻译完成! 下载的文件:")
|
| 266 |
+
for file_type, path in downloaded_files.items():
|
| 267 |
+
print(f" {file_type}: {path}")
|
| 268 |
+
|
| 269 |
+
except Exception as e:
|
| 270 |
+
print(f"错误: {e}")
|
| 271 |
+
|
| 272 |
+
if __name__ == "__main__":
|
| 273 |
+
main()
|
pdftranslate_web/src/pdftranslate_web/api_server.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import logging
|
| 3 |
+
import uuid
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Dict, Any, Optional
|
| 6 |
+
import tempfile
|
| 7 |
+
import shutil
|
| 8 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks, Form
|
| 12 |
+
from fastapi.responses import FileResponse
|
| 13 |
+
from pydantic import BaseModel
|
| 14 |
+
import uvicorn
|
| 15 |
+
from dotenv import load_dotenv
|
| 16 |
+
|
| 17 |
+
import babeldoc.format.pdf.high_level
|
| 18 |
+
from babeldoc.format.pdf.translation_config import TranslationConfig, WatermarkOutputMode
|
| 19 |
+
from babeldoc.translator.translator import OpenAITranslator, set_translate_rate_limiter
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
load_dotenv() # 自动加载同目录下的 .env 文件
|
| 23 |
+
# 从环境变量加载配置
|
| 24 |
+
def load_config():
|
| 25 |
+
"""从环境变量加载配置"""
|
| 26 |
+
return {
|
| 27 |
+
"openai": {
|
| 28 |
+
"api_key": os.getenv("OPENAI_API_KEY", ""),
|
| 29 |
+
"model": os.getenv("OPENAI_MODEL", "deepseek-ai/DeepSeek-V3"),
|
| 30 |
+
"base_url": os.getenv("OPENAI_BASE_URL", "https://api.siliconflow.cn/v1")
|
| 31 |
+
},
|
| 32 |
+
"server": {
|
| 33 |
+
"host": os.getenv("SERVER_HOST", "0.0.0.0"),
|
| 34 |
+
"port": int(os.getenv("SERVER_PORT", "8000")),
|
| 35 |
+
"qps": int(os.getenv("QPS", "4"))
|
| 36 |
+
},
|
| 37 |
+
"translation": {
|
| 38 |
+
"default_lang_in": os.getenv("DEFAULT_LANG_IN", "en"),
|
| 39 |
+
"default_lang_out": os.getenv("DEFAULT_LANG_OUT", "zh"),
|
| 40 |
+
"watermark_output_mode": os.getenv("WATERMARK_OUTPUT_MODE", "no_watermark"),
|
| 41 |
+
"no_dual": os.getenv("NO_DUAL", "false").lower() == "true",
|
| 42 |
+
"no_mono": os.getenv("NO_MONO", "false").lower() == "true"
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
config = load_config()
|
| 47 |
+
|
| 48 |
+
# 验证OpenAI配置
|
| 49 |
+
if not config["openai"]["api_key"]:
|
| 50 |
+
logger.error("未找到OpenAI API密钥!请通过环境变量OPENAI_API_KEY提供")
|
| 51 |
+
raise ValueError("Missing OpenAI API key")
|
| 52 |
+
|
| 53 |
+
app = FastAPI(title="BabelDOC Translation API", version="0.4.16")
|
| 54 |
+
|
| 55 |
+
class TranslationRequest(BaseModel):
|
| 56 |
+
lang_in: Optional[str] = None
|
| 57 |
+
lang_out: Optional[str] = None
|
| 58 |
+
qps: Optional[int] = None
|
| 59 |
+
no_dual: Optional[bool] = None
|
| 60 |
+
no_mono: Optional[bool] = None
|
| 61 |
+
watermark_output_mode: Optional[str] = None
|
| 62 |
+
|
| 63 |
+
class TranslationStatus(BaseModel):
|
| 64 |
+
task_id: str
|
| 65 |
+
status: str # pending, processing, completed, failed
|
| 66 |
+
progress: float = 0.0
|
| 67 |
+
message: str = ""
|
| 68 |
+
result_files: Dict[str, str] = {}
|
| 69 |
+
|
| 70 |
+
translation_tasks: Dict[str, TranslationStatus] = {}
|
| 71 |
+
task_files: Dict[str, Dict[str, Path]] = {}
|
| 72 |
+
|
| 73 |
+
async def translate_document(
|
| 74 |
+
task_id: str,
|
| 75 |
+
pdf_file: Path,
|
| 76 |
+
request: TranslationRequest,
|
| 77 |
+
output_dir: Path
|
| 78 |
+
):
|
| 79 |
+
try:
|
| 80 |
+
translation_tasks[task_id].status = "processing"
|
| 81 |
+
translation_tasks[task_id].message = "正在初始化翻译器..."
|
| 82 |
+
|
| 83 |
+
# 使用配置文件中的OpenAI设置
|
| 84 |
+
translator = OpenAITranslator(
|
| 85 |
+
lang_in=request.lang_in or config["translation"]["default_lang_in"],
|
| 86 |
+
lang_out=request.lang_out or config["translation"]["default_lang_out"],
|
| 87 |
+
model=config["openai"]["model"],
|
| 88 |
+
base_url=config["openai"]["base_url"],
|
| 89 |
+
api_key=config["openai"]["api_key"],
|
| 90 |
+
ignore_cache=False,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
set_translate_rate_limiter(request.qps or config["server"]["qps"])
|
| 94 |
+
|
| 95 |
+
from babeldoc.docvision.doclayout import DocLayoutModel
|
| 96 |
+
doc_layout_model = DocLayoutModel.load_onnx()
|
| 97 |
+
|
| 98 |
+
watermark_output_mode = request.watermark_output_mode or config["translation"]["watermark_output_mode"]
|
| 99 |
+
watermark_mode = WatermarkOutputMode.Watermarked
|
| 100 |
+
if watermark_output_mode == "no_watermark":
|
| 101 |
+
watermark_mode = WatermarkOutputMode.NoWatermark
|
| 102 |
+
elif watermark_output_mode == "both":
|
| 103 |
+
watermark_mode = WatermarkOutputMode.Both
|
| 104 |
+
|
| 105 |
+
config_obj = TranslationConfig(
|
| 106 |
+
input_file=str(pdf_file),
|
| 107 |
+
font=None,
|
| 108 |
+
pages=None,
|
| 109 |
+
output_dir=str(output_dir),
|
| 110 |
+
translator=translator,
|
| 111 |
+
debug=False,
|
| 112 |
+
lang_in=request.lang_in or config["translation"]["default_lang_in"],
|
| 113 |
+
lang_out=request.lang_out or config["translation"]["default_lang_out"],
|
| 114 |
+
no_dual=request.no_dual if request.no_dual is not None else config["translation"]["no_dual"],
|
| 115 |
+
no_mono=request.no_mono if request.no_mono is not None else config["translation"]["no_mono"],
|
| 116 |
+
qps=request.qps or config["server"]["qps"],
|
| 117 |
+
formular_font_pattern=None,
|
| 118 |
+
formular_char_pattern=None,
|
| 119 |
+
split_short_lines=False,
|
| 120 |
+
short_line_split_factor=0.8,
|
| 121 |
+
doc_layout_model=doc_layout_model,
|
| 122 |
+
skip_clean=False,
|
| 123 |
+
dual_translate_first=False,
|
| 124 |
+
disable_rich_text_translate=False,
|
| 125 |
+
enhance_compatibility=False,
|
| 126 |
+
use_alternating_pages_dual=False,
|
| 127 |
+
report_interval=0.1,
|
| 128 |
+
min_text_length=5,
|
| 129 |
+
watermark_output_mode=watermark_mode,
|
| 130 |
+
split_strategy=None,
|
| 131 |
+
table_model=None,
|
| 132 |
+
show_char_box=False,
|
| 133 |
+
skip_scanned_detection=False,
|
| 134 |
+
ocr_workaround=False,
|
| 135 |
+
custom_system_prompt=None,
|
| 136 |
+
working_dir=None,
|
| 137 |
+
add_formula_placehold_hint=False,
|
| 138 |
+
glossaries=[],
|
| 139 |
+
pool_max_workers=None,
|
| 140 |
+
auto_extract_glossary=True,
|
| 141 |
+
auto_enable_ocr_workaround=False,
|
| 142 |
+
primary_font_family=None,
|
| 143 |
+
only_include_translated_page=False,
|
| 144 |
+
save_auto_extracted_glossary=False,
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
translation_tasks[task_id].message = "正在翻译文档..."
|
| 148 |
+
|
| 149 |
+
async for event in babeldoc.format.pdf.high_level.async_translate(config_obj):
|
| 150 |
+
if event["type"] == "progress_update":
|
| 151 |
+
translation_tasks[task_id].progress = event.get("overall_progress", 0.0)
|
| 152 |
+
translation_tasks[task_id].message = f"{event.get('stage', '处理中')} ({event.get('stage_current', 0)}/{event.get('stage_total', 100)})"
|
| 153 |
+
elif event["type"] == "error":
|
| 154 |
+
translation_tasks[task_id].status = "failed"
|
| 155 |
+
translation_tasks[task_id].message = f"翻译失败: {event.get('error', '未知错误')}"
|
| 156 |
+
logger.error(f"Translation failed for task {task_id}: {event.get('error')}")
|
| 157 |
+
return
|
| 158 |
+
elif event["type"] == "finish":
|
| 159 |
+
result = event["translate_result"]
|
| 160 |
+
translation_tasks[task_id].status = "completed"
|
| 161 |
+
translation_tasks[task_id].progress = 100.0
|
| 162 |
+
translation_tasks[task_id].message = "翻译完成"
|
| 163 |
+
|
| 164 |
+
result_files = {}
|
| 165 |
+
if result.dual_pdf_path and Path(result.dual_pdf_path).exists():
|
| 166 |
+
result_files["dual"] = str(result.dual_pdf_path)
|
| 167 |
+
if result.mono_pdf_path and Path(result.mono_pdf_path).exists():
|
| 168 |
+
result_files["mono"] = str(result.mono_pdf_path)
|
| 169 |
+
|
| 170 |
+
translation_tasks[task_id].result_files = result_files
|
| 171 |
+
task_files[task_id] = {k: Path(v) for k, v in result_files.items()}
|
| 172 |
+
|
| 173 |
+
logger.info(f"Translation completed for task {task_id}")
|
| 174 |
+
break
|
| 175 |
+
|
| 176 |
+
except Exception as e:
|
| 177 |
+
translation_tasks[task_id].status = "failed"
|
| 178 |
+
translation_tasks[task_id].message = f"翻译过程出错: {str(e)}"
|
| 179 |
+
logger.error(f"Translation error for task {task_id}: {e}", exc_info=True)
|
| 180 |
+
|
| 181 |
+
@app.post("/translate", response_model=dict)
|
| 182 |
+
async def translate_pdf(
|
| 183 |
+
background_tasks: BackgroundTasks,
|
| 184 |
+
file: UploadFile = File(...),
|
| 185 |
+
lang_in: Optional[str] = Form(None),
|
| 186 |
+
lang_out: Optional[str] = Form(None),
|
| 187 |
+
qps: Optional[int] = Form(None),
|
| 188 |
+
no_dual: Optional[bool] = Form(None),
|
| 189 |
+
no_mono: Optional[bool] = Form(None),
|
| 190 |
+
watermark_output_mode: Optional[str] = Form(None)
|
| 191 |
+
):
|
| 192 |
+
if not file.filename.lower().endswith('.pdf'):
|
| 193 |
+
raise HTTPException(status_code=400, detail="只支持PDF文件")
|
| 194 |
+
|
| 195 |
+
task_id = str(uuid.uuid4())
|
| 196 |
+
|
| 197 |
+
temp_dir = Path(tempfile.mkdtemp())
|
| 198 |
+
pdf_path = temp_dir / file.filename
|
| 199 |
+
output_dir = temp_dir / "output"
|
| 200 |
+
output_dir.mkdir(exist_ok=True)
|
| 201 |
+
|
| 202 |
+
with open(pdf_path, "wb") as buffer:
|
| 203 |
+
shutil.copyfileobj(file.file, buffer)
|
| 204 |
+
|
| 205 |
+
request = TranslationRequest(
|
| 206 |
+
lang_in=lang_in,
|
| 207 |
+
lang_out=lang_out,
|
| 208 |
+
qps=qps,
|
| 209 |
+
no_dual=no_dual,
|
| 210 |
+
no_mono=no_mono,
|
| 211 |
+
watermark_output_mode=watermark_output_mode
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
translation_tasks[task_id] = TranslationStatus(
|
| 215 |
+
task_id=task_id,
|
| 216 |
+
status="pending",
|
| 217 |
+
message="任务已创建,等待处理..."
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
background_tasks.add_task(
|
| 221 |
+
translate_document,
|
| 222 |
+
task_id,
|
| 223 |
+
pdf_path,
|
| 224 |
+
request,
|
| 225 |
+
output_dir
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
return {"task_id": task_id, "message": "翻译任务已创建"}
|
| 229 |
+
|
| 230 |
+
@app.get("/status/{task_id}", response_model=TranslationStatus)
|
| 231 |
+
async def get_translation_status(task_id: str):
|
| 232 |
+
if task_id not in translation_tasks:
|
| 233 |
+
raise HTTPException(status_code=404, detail="任务不存在")
|
| 234 |
+
|
| 235 |
+
return translation_tasks[task_id]
|
| 236 |
+
|
| 237 |
+
@app.get("/download/{task_id}/{file_type}")
|
| 238 |
+
async def download_result(task_id: str, file_type: str):
|
| 239 |
+
if task_id not in translation_tasks:
|
| 240 |
+
raise HTTPException(status_code=404, detail="任务不存在")
|
| 241 |
+
|
| 242 |
+
if translation_tasks[task_id].status != "completed":
|
| 243 |
+
raise HTTPException(status_code=400, detail="翻译尚未完成")
|
| 244 |
+
|
| 245 |
+
if task_id not in task_files or file_type not in task_files[task_id]:
|
| 246 |
+
raise HTTPException(status_code=404, detail="文件不存在")
|
| 247 |
+
|
| 248 |
+
file_path = task_files[task_id][file_type]
|
| 249 |
+
|
| 250 |
+
if not file_path.exists():
|
| 251 |
+
raise HTTPException(status_code=404, detail="文件不存在")
|
| 252 |
+
|
| 253 |
+
return FileResponse(
|
| 254 |
+
path=file_path,
|
| 255 |
+
filename=file_path.name,
|
| 256 |
+
media_type='application/pdf'
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
@app.get("/health")
|
| 260 |
+
async def health_check():
|
| 261 |
+
return {"status": "healthy", "service": "BabelDOC Translation API"}
|
| 262 |
+
|
| 263 |
+
@app.get("/")
|
| 264 |
+
async def root():
|
| 265 |
+
return {
|
| 266 |
+
"message": "BabelDOC Translation API",
|
| 267 |
+
"version": "0.4.16",
|
| 268 |
+
"config": {
|
| 269 |
+
"openai_model": config["openai"]["model"],
|
| 270 |
+
"default_lang_in": config["translation"]["default_lang_in"],
|
| 271 |
+
"default_lang_out": config["translation"]["default_lang_out"],
|
| 272 |
+
"qps": config["server"]["qps"]
|
| 273 |
+
},
|
| 274 |
+
"endpoints": {
|
| 275 |
+
"translate": "POST /translate - 上传PDF文件进行翻译",
|
| 276 |
+
"status": "GET /status/{task_id} - 查询翻译状态",
|
| 277 |
+
"download": "GET /download/{task_id}/{file_type} - 下载翻译结果",
|
| 278 |
+
"health": "GET /health - 健康检查"
|
| 279 |
+
}
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
def start_server(host: Optional[str] = None, port: Optional[int] = None):
|
| 283 |
+
babeldoc.format.pdf.high_level.init()
|
| 284 |
+
|
| 285 |
+
logging.basicConfig(level=logging.INFO)
|
| 286 |
+
logging.getLogger("httpx").setLevel("WARNING")
|
| 287 |
+
logging.getLogger("openai").setLevel("WARNING")
|
| 288 |
+
|
| 289 |
+
server_host = host or config["server"]["host"]
|
| 290 |
+
server_port = port or config["server"]["port"]
|
| 291 |
+
|
| 292 |
+
logger.info(f"Using OpenAI model: {config['openai']['model']}")
|
| 293 |
+
logger.info(f"Default languages: {config['translation']['default_lang_in']} -> {config['translation']['default_lang_out']}")
|
| 294 |
+
|
| 295 |
+
uvicorn.run(app, host=server_host, port=server_port)
|
| 296 |
+
|
| 297 |
+
if __name__ == "__main__":
|
| 298 |
+
start_server()
|
pdftranslate_web/src/pdftranslate_web/gradio_client.py
ADDED
|
@@ -0,0 +1,749 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import tempfile
|
| 4 |
+
import time
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Optional, Tuple, Dict, Any
|
| 7 |
+
import fitz # PyMuPDF
|
| 8 |
+
import base64
|
| 9 |
+
from io import BytesIO
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
from pdftranslate_web.api_client import BabelDOCClient
|
| 13 |
+
import json
|
| 14 |
+
|
| 15 |
+
# 管理员密码配置
|
| 16 |
+
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "zhouqingYu666")
|
| 17 |
+
|
| 18 |
+
class GradioClient:
|
| 19 |
+
def __init__(self, server_url: str = "http://localhost:8000"):
|
| 20 |
+
self.client = BabelDOCClient(server_url)
|
| 21 |
+
self.temp_dir = Path(tempfile.mkdtemp())
|
| 22 |
+
self.current_task_id = None
|
| 23 |
+
# 获取项目根目录路径
|
| 24 |
+
self.project_root = Path(__file__).parent.parent.parent
|
| 25 |
+
self.sample_file_path = self.project_root / "simaple" / "11.pdf"
|
| 26 |
+
# 配置缓存
|
| 27 |
+
self.config_cache = {
|
| 28 |
+
"openai_api_key": "",
|
| 29 |
+
"openai_model": "",
|
| 30 |
+
"openai_base_url": ""
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
def update_config(self, api_key: str = None, model: str = None, base_url: str = None) -> str:
|
| 34 |
+
"""更新配置缓存"""
|
| 35 |
+
if api_key is not None:
|
| 36 |
+
self.config_cache["openai_api_key"] = api_key
|
| 37 |
+
if model is not None:
|
| 38 |
+
self.config_cache["openai_model"] = model
|
| 39 |
+
if base_url is not None:
|
| 40 |
+
self.config_cache["openai_base_url"] = base_url
|
| 41 |
+
return "✅ 配置已更新(下次翻译时生效)"
|
| 42 |
+
|
| 43 |
+
def get_masked_api_key(self, api_key: str) -> str:
|
| 44 |
+
"""获取遮蔽的API密钥"""
|
| 45 |
+
if not api_key or len(api_key) < 8:
|
| 46 |
+
return api_key
|
| 47 |
+
return api_key[:4] + "*" * (len(api_key) - 8) + api_key[-4:]
|
| 48 |
+
|
| 49 |
+
def check_server_status(self) -> Tuple[str, Dict]:
|
| 50 |
+
"""检查服务器状态和配置"""
|
| 51 |
+
try:
|
| 52 |
+
if not self.client.health_check():
|
| 53 |
+
return "❌ 服务器离线", {}
|
| 54 |
+
|
| 55 |
+
config = self.client.get_server_config()
|
| 56 |
+
|
| 57 |
+
# 更新配置缓存为服务器当前值
|
| 58 |
+
if not self.config_cache["openai_api_key"]:
|
| 59 |
+
self.config_cache["openai_api_key"] = "sk-****" # 默认占位符
|
| 60 |
+
if not self.config_cache["openai_model"]:
|
| 61 |
+
self.config_cache["openai_model"] = config['config']['openai_model']
|
| 62 |
+
if not self.config_cache["openai_base_url"]:
|
| 63 |
+
self.config_cache["openai_base_url"] = "" # 服务器不返回base_url
|
| 64 |
+
|
| 65 |
+
status_text = f"""✅ 服务器在线
|
| 66 |
+
|
| 67 |
+
**服务器配置:**
|
| 68 |
+
- 模型: {config['config']['openai_model']}
|
| 69 |
+
- 默认语言: {config['config']['default_lang_in']} → {config['config']['default_lang_out']}
|
| 70 |
+
- QPS限制: {config['config']['qps']}
|
| 71 |
+
"""
|
| 72 |
+
return status_text, config['config']
|
| 73 |
+
except Exception as e:
|
| 74 |
+
return f"❌ 连接失败: {str(e)}", {}
|
| 75 |
+
|
| 76 |
+
def pdf_to_images(self, pdf_path: str, max_pages: int = 5) -> list:
|
| 77 |
+
"""将PDF转换为图片预览"""
|
| 78 |
+
if not pdf_path or not os.path.exists(pdf_path):
|
| 79 |
+
return []
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
doc = fitz.open(pdf_path)
|
| 83 |
+
images = []
|
| 84 |
+
|
| 85 |
+
for page_num in range(min(len(doc), max_pages)):
|
| 86 |
+
page = doc[page_num]
|
| 87 |
+
# 设置缩放比例以获得合适的预览大小
|
| 88 |
+
mat = fitz.Matrix(1.5, 1.5)
|
| 89 |
+
pix = page.get_pixmap(matrix=mat)
|
| 90 |
+
img_data = pix.tobytes("png")
|
| 91 |
+
|
| 92 |
+
# 转换为PIL Image
|
| 93 |
+
img = Image.open(BytesIO(img_data))
|
| 94 |
+
images.append(img)
|
| 95 |
+
|
| 96 |
+
doc.close()
|
| 97 |
+
return images
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"PDF预览失败: {e}")
|
| 100 |
+
return []
|
| 101 |
+
|
| 102 |
+
def translate_pdf(
|
| 103 |
+
self,
|
| 104 |
+
pdf_file,
|
| 105 |
+
lang_in: Optional[str] = None,
|
| 106 |
+
lang_out: Optional[str] = None,
|
| 107 |
+
file_type: str = "dual",
|
| 108 |
+
progress=gr.Progress()
|
| 109 |
+
) -> Tuple[str, str, list, str, str]:
|
| 110 |
+
"""翻译PDF文件"""
|
| 111 |
+
if pdf_file is None:
|
| 112 |
+
return "❌ 请先上传PDF文件", "", [], "", ""
|
| 113 |
+
|
| 114 |
+
try:
|
| 115 |
+
# 记录开始时间
|
| 116 |
+
start_time = time.time()
|
| 117 |
+
start_time_str = time.strftime("%H:%M:%S", time.localtime(start_time))
|
| 118 |
+
|
| 119 |
+
progress(0, desc="正在提交翻译任务...")
|
| 120 |
+
|
| 121 |
+
# 保存上传的文件
|
| 122 |
+
pdf_path = self.temp_dir / f"input_{int(time.time())}.pdf"
|
| 123 |
+
with open(pdf_path, "wb") as f:
|
| 124 |
+
f.write(pdf_file)
|
| 125 |
+
|
| 126 |
+
# 提交翻译任务
|
| 127 |
+
task_id = self.client.translate_pdf(
|
| 128 |
+
pdf_path=str(pdf_path),
|
| 129 |
+
lang_in=lang_in if lang_in else None,
|
| 130 |
+
lang_out=lang_out if lang_out else None
|
| 131 |
+
)
|
| 132 |
+
self.current_task_id = task_id
|
| 133 |
+
|
| 134 |
+
progress(0.1, desc=f"任务已创建: {task_id[:8]}...")
|
| 135 |
+
|
| 136 |
+
# 监控翻译进度
|
| 137 |
+
while time.time() - start_time < 3600: # 1小时超时
|
| 138 |
+
status = self.client.get_status(task_id)
|
| 139 |
+
|
| 140 |
+
if status['status'] == 'completed':
|
| 141 |
+
progress(1.0, desc="翻译完成,正在下载结果...")
|
| 142 |
+
|
| 143 |
+
# 记录结束时间
|
| 144 |
+
end_time = time.time()
|
| 145 |
+
end_time_str = time.strftime("%H:%M:%S", time.localtime(end_time))
|
| 146 |
+
duration = end_time - start_time
|
| 147 |
+
duration_str = f"{int(duration//60)}分{int(duration%60)}秒"
|
| 148 |
+
|
| 149 |
+
timing_info = f"开始时间: {start_time_str} | 完成时间: {end_time_str} | 总耗时: {duration_str}"
|
| 150 |
+
|
| 151 |
+
# 下载结果文件
|
| 152 |
+
output_dir = self.temp_dir / f"output_{task_id[:8]}"
|
| 153 |
+
output_dir.mkdir(exist_ok=True)
|
| 154 |
+
|
| 155 |
+
downloaded_files = {}
|
| 156 |
+
for ftype in ['dual', 'mono']:
|
| 157 |
+
if ftype in status['result_files']:
|
| 158 |
+
output_file = output_dir / f"translated_{ftype}.pdf"
|
| 159 |
+
if self.client.download_result(task_id, ftype, str(output_file)):
|
| 160 |
+
downloaded_files[ftype] = str(output_file)
|
| 161 |
+
|
| 162 |
+
# 返回指定类型的文件
|
| 163 |
+
if file_type in downloaded_files:
|
| 164 |
+
result_path = downloaded_files[file_type]
|
| 165 |
+
result_images = self.pdf_to_images(result_path)
|
| 166 |
+
|
| 167 |
+
return (
|
| 168 |
+
f"✅ 翻译完成!共生成 {len(result_images)} 页内容",
|
| 169 |
+
result_path,
|
| 170 |
+
result_images,
|
| 171 |
+
f"任务ID: {task_id}",
|
| 172 |
+
timing_info
|
| 173 |
+
)
|
| 174 |
+
else:
|
| 175 |
+
return (
|
| 176 |
+
f"⚠️ 翻译完成但未找到 {file_type} 文件类型",
|
| 177 |
+
"",
|
| 178 |
+
[],
|
| 179 |
+
f"任务ID: {task_id}",
|
| 180 |
+
timing_info
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
elif status['status'] == 'failed':
|
| 184 |
+
end_time = time.time()
|
| 185 |
+
end_time_str = time.strftime("%H:%M:%S", time.localtime(end_time))
|
| 186 |
+
duration = end_time - start_time
|
| 187 |
+
duration_str = f"{int(duration//60)}分{int(duration%60)}秒"
|
| 188 |
+
timing_info = f"开始时间: {start_time_str} | 失败时间: {end_time_str} | 耗时: {duration_str}"
|
| 189 |
+
|
| 190 |
+
return (
|
| 191 |
+
f"❌ 翻译失败: {status['message']}",
|
| 192 |
+
"",
|
| 193 |
+
[],
|
| 194 |
+
f"任务ID: {task_id}",
|
| 195 |
+
timing_info
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
# 更新进度
|
| 199 |
+
progress_val = status['progress'] / 100.0
|
| 200 |
+
progress(progress_val, desc=f"{status['message']}")
|
| 201 |
+
time.sleep(2)
|
| 202 |
+
|
| 203 |
+
# 超时情况
|
| 204 |
+
end_time = time.time()
|
| 205 |
+
end_time_str = time.strftime("%H:%M:%S", time.localtime(end_time))
|
| 206 |
+
duration = end_time - start_time
|
| 207 |
+
duration_str = f"{int(duration//60)}分{int(duration%60)}秒"
|
| 208 |
+
timing_info = f"开始时间: {start_time_str} | 超时时间: {end_time_str} | 耗时: {duration_str}"
|
| 209 |
+
|
| 210 |
+
return "❌ 翻译超时", "", [], f"任务ID: {task_id}", timing_info
|
| 211 |
+
|
| 212 |
+
except Exception as e:
|
| 213 |
+
return f"❌ 翻译出错: {str(e)}", "", [], "", ""
|
| 214 |
+
|
| 215 |
+
def get_task_status(self, task_id: str) -> str:
|
| 216 |
+
"""获取任务状态"""
|
| 217 |
+
if not task_id:
|
| 218 |
+
return "请输入任务ID"
|
| 219 |
+
|
| 220 |
+
try:
|
| 221 |
+
status = self.client.get_status(task_id)
|
| 222 |
+
return f"""
|
| 223 |
+
**任务状态:** {status['status']}
|
| 224 |
+
**进度:** {status['progress']:.1f}%
|
| 225 |
+
**消息:** {status['message']}
|
| 226 |
+
**结果文件:** {', '.join(status['result_files'].keys()) if status['result_files'] else '无'}
|
| 227 |
+
"""
|
| 228 |
+
except Exception as e:
|
| 229 |
+
return f"查询失败: {str(e)}"
|
| 230 |
+
|
| 231 |
+
def load_sample_file(self) -> Tuple[bytes, list, str]:
|
| 232 |
+
"""加载示例PDF文件"""
|
| 233 |
+
try:
|
| 234 |
+
if not self.sample_file_path.exists():
|
| 235 |
+
return None, [], "❌ 示例文件不存在"
|
| 236 |
+
|
| 237 |
+
# 读取示例文件
|
| 238 |
+
with open(self.sample_file_path, "rb") as f:
|
| 239 |
+
file_data = f.read()
|
| 240 |
+
|
| 241 |
+
# 生成预览图片
|
| 242 |
+
images = self.pdf_to_images(str(self.sample_file_path))
|
| 243 |
+
status = f"✅ 已加载示例文件,共 {len(images)} 页(显示前5页预览)"
|
| 244 |
+
|
| 245 |
+
return file_data, images, status
|
| 246 |
+
except Exception as e:
|
| 247 |
+
return None, [], f"❌ 加载示例文件失败: {str(e)}"
|
| 248 |
+
|
| 249 |
+
def create_gradio_interface(server_url: str = "http://localhost:8000"):
|
| 250 |
+
"""创建Gradio界面"""
|
| 251 |
+
gradio_client = GradioClient(server_url)
|
| 252 |
+
|
| 253 |
+
with gr.Blocks(
|
| 254 |
+
title="pdftranslate PDF翻译工具",
|
| 255 |
+
theme=gr.themes.Soft(),
|
| 256 |
+
css="""
|
| 257 |
+
.main-container { max-width: 1400px; margin: 0 auto; }
|
| 258 |
+
.preview-container { height: 600px; overflow-y: auto; }
|
| 259 |
+
.status-box { background-color: #f8f9fa; padding: 15px; border-radius: 8px; }
|
| 260 |
+
|
| 261 |
+
/* 表格样式配置 */
|
| 262 |
+
.config-table {
|
| 263 |
+
border: 1px solid #e5e7eb;
|
| 264 |
+
border-radius: 8px;
|
| 265 |
+
overflow: hidden;
|
| 266 |
+
margin: 16px 0;
|
| 267 |
+
}
|
| 268 |
+
.config-header-row {
|
| 269 |
+
background-color: #6366f1 !important;
|
| 270 |
+
margin: 0 !important;
|
| 271 |
+
}
|
| 272 |
+
.config-header {
|
| 273 |
+
background-color: #6366f1;
|
| 274 |
+
color: white;
|
| 275 |
+
padding: 12px 16px;
|
| 276 |
+
margin: 0;
|
| 277 |
+
text-align: center;
|
| 278 |
+
font-weight: bold;
|
| 279 |
+
border-right: 1px solid #5b63d4;
|
| 280 |
+
}
|
| 281 |
+
.config-row {
|
| 282 |
+
border-bottom: 1px solid #e5e7eb;
|
| 283 |
+
padding: 12px 16px;
|
| 284 |
+
margin: 0 !important;
|
| 285 |
+
background-color: white;
|
| 286 |
+
}
|
| 287 |
+
.config-row:hover {
|
| 288 |
+
background-color: #f8fafc;
|
| 289 |
+
}
|
| 290 |
+
.config-row:last-child {
|
| 291 |
+
border-bottom: none;
|
| 292 |
+
}
|
| 293 |
+
.config-actions {
|
| 294 |
+
background-color: #f9fafb;
|
| 295 |
+
padding: 16px;
|
| 296 |
+
border-top: 1px solid #e5e7eb;
|
| 297 |
+
margin: 0 !important;
|
| 298 |
+
}
|
| 299 |
+
.config-status {
|
| 300 |
+
background-color: #f0f9ff;
|
| 301 |
+
padding: 12px 16px;
|
| 302 |
+
border-radius: 6px;
|
| 303 |
+
border-left: 4px solid #3b82f6;
|
| 304 |
+
margin: 16px 0;
|
| 305 |
+
}
|
| 306 |
+
"""
|
| 307 |
+
) as demo:
|
| 308 |
+
|
| 309 |
+
gr.Markdown("""
|
| 310 |
+
# 🌍 pdftranslate PDF翻译工具
|
| 311 |
+
|
| 312 |
+
上传PDF文件,选择翻译选项,即可获得翻译后的PDF文件。支持双语对照和纯翻译两种模式。
|
| 313 |
+
""")
|
| 314 |
+
|
| 315 |
+
# 顶部状态和选项栏
|
| 316 |
+
with gr.Row():
|
| 317 |
+
with gr.Column(scale=1):
|
| 318 |
+
server_status = gr.Markdown("检查服务器状态中...")
|
| 319 |
+
|
| 320 |
+
# 主要界面区域,包含配置选项卡
|
| 321 |
+
with gr.Tabs():
|
| 322 |
+
with gr.TabItem("📄 PDF翻译"):
|
| 323 |
+
# 翻译选项区域
|
| 324 |
+
with gr.Row():
|
| 325 |
+
with gr.Column(scale=1):
|
| 326 |
+
gr.Markdown("**⚙️ 翻译选项**")
|
| 327 |
+
with gr.Row():
|
| 328 |
+
lang_in = gr.Textbox(
|
| 329 |
+
label="源语言",
|
| 330 |
+
placeholder="留空使用服务器默认 (en)",
|
| 331 |
+
scale=1,
|
| 332 |
+
container=False
|
| 333 |
+
)
|
| 334 |
+
lang_out = gr.Textbox(
|
| 335 |
+
label="目标语言",
|
| 336 |
+
placeholder="留空使用服务器默认 (zh)",
|
| 337 |
+
scale=1,
|
| 338 |
+
container=False
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
with gr.Row():
|
| 342 |
+
file_type = gr.Radio(
|
| 343 |
+
choices=["dual", "mono"],
|
| 344 |
+
value="dual",
|
| 345 |
+
label="输出类型",
|
| 346 |
+
info="dual: 双语对照, mono: 纯翻译",
|
| 347 |
+
container=False,
|
| 348 |
+
scale=1
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
translate_btn = gr.Button(
|
| 352 |
+
"🚀 开始翻译",
|
| 353 |
+
variant="primary",
|
| 354 |
+
size="sm",
|
| 355 |
+
scale=1
|
| 356 |
+
)
|
| 357 |
+
|
| 358 |
+
with gr.Row():
|
| 359 |
+
# 左侧:上传和控制面板
|
| 360 |
+
with gr.Column(scale=1):
|
| 361 |
+
gr.Markdown("### 📁 文件上传")
|
| 362 |
+
|
| 363 |
+
# 示例文件加载
|
| 364 |
+
with gr.Row():
|
| 365 |
+
sample_btn = gr.Button(
|
| 366 |
+
"📋 加载示例文件 (11.pdf)",
|
| 367 |
+
variant="secondary",
|
| 368 |
+
size="sm",
|
| 369 |
+
scale=3
|
| 370 |
+
)
|
| 371 |
+
with gr.Column(scale=1, min_width=50):
|
| 372 |
+
gr.Markdown("*或*")
|
| 373 |
+
|
| 374 |
+
pdf_input = gr.File(
|
| 375 |
+
label="拖放PDF文件至此或点击选择",
|
| 376 |
+
file_types=[".pdf"],
|
| 377 |
+
type="binary"
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
# 原始PDF预览
|
| 381 |
+
gr.Markdown("### 📄 原始PDF预览")
|
| 382 |
+
original_preview = gr.Gallery(
|
| 383 |
+
label="原始PDF页面",
|
| 384 |
+
show_label=False,
|
| 385 |
+
elem_classes=["preview-container"],
|
| 386 |
+
columns=1,
|
| 387 |
+
rows=2,
|
| 388 |
+
height="400px",
|
| 389 |
+
show_download_button=False,
|
| 390 |
+
interactive=False
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
# 右侧:结果展示
|
| 394 |
+
with gr.Column(scale=1):
|
| 395 |
+
gr.Markdown("### 📊 翻译状态")
|
| 396 |
+
translation_status = gr.Markdown(
|
| 397 |
+
"等待上传文件...",
|
| 398 |
+
elem_classes=["status-box"]
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
gr.Markdown("### 📑 翻译结果预览")
|
| 402 |
+
result_preview = gr.Gallery(
|
| 403 |
+
label="翻译后PDF页面",
|
| 404 |
+
show_label=False,
|
| 405 |
+
elem_classes=["preview-container"],
|
| 406 |
+
columns=1,
|
| 407 |
+
rows=2,
|
| 408 |
+
height="400px",
|
| 409 |
+
show_download_button=False,
|
| 410 |
+
interactive=False
|
| 411 |
+
)
|
| 412 |
+
|
| 413 |
+
# 下载按钮
|
| 414 |
+
download_btn = gr.File(
|
| 415 |
+
label="下载翻译结果",
|
| 416 |
+
visible=False
|
| 417 |
+
)
|
| 418 |
+
|
| 419 |
+
# 任务信息
|
| 420 |
+
task_info = gr.Textbox(
|
| 421 |
+
label="任务信息",
|
| 422 |
+
interactive=False,
|
| 423 |
+
visible=False
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
# 底部:任务状态查询
|
| 427 |
+
with gr.Accordion("🔍 任务状态查询", open=False):
|
| 428 |
+
with gr.Row():
|
| 429 |
+
task_id_input = gr.Textbox(
|
| 430 |
+
label="任务ID",
|
| 431 |
+
placeholder="输入任务ID查询状态"
|
| 432 |
+
)
|
| 433 |
+
query_btn = gr.Button("查询", size="sm")
|
| 434 |
+
|
| 435 |
+
task_status_output = gr.Markdown("输入任务ID进行查询")
|
| 436 |
+
|
| 437 |
+
# 底部时间统计显示
|
| 438 |
+
with gr.Row():
|
| 439 |
+
timing_display = gr.Markdown(
|
| 440 |
+
"⏱️ **处理时间统计**:等待开始翻译...",
|
| 441 |
+
elem_classes=["status-box"]
|
| 442 |
+
)
|
| 443 |
+
|
| 444 |
+
with gr.TabItem("⚙️ 参数设置"):
|
| 445 |
+
gr.Markdown("### 🔧 核心API配置")
|
| 446 |
+
gr.Markdown("在此修改OpenAI API配置,修改后立即生效于下次翻译任务。")
|
| 447 |
+
|
| 448 |
+
# 用于跟踪当前会话是否已认证的内部状态
|
| 449 |
+
session_authenticated = gr.State(False)
|
| 450 |
+
|
| 451 |
+
# 管理员认证模块
|
| 452 |
+
with gr.Group() as auth_group:
|
| 453 |
+
gr.Markdown("#### **管理员认证**")
|
| 454 |
+
admin_password_input = gr.Textbox(
|
| 455 |
+
label="请输入管理员密码以查看或修改敏感配置",
|
| 456 |
+
type="password",
|
| 457 |
+
placeholder=f"默认密码是 'admin123',或通过环境变量 ADMIN_PASSWORD 进行设置"
|
| 458 |
+
)
|
| 459 |
+
unlock_button = gr.Button("🔓 解锁", variant="primary")
|
| 460 |
+
|
| 461 |
+
# 将所有需要被锁定的组件收集到一个列表中
|
| 462 |
+
interactive_settings_components = []
|
| 463 |
+
|
| 464 |
+
# 表格样式的配置界面
|
| 465 |
+
with gr.Group(elem_classes=["config-table"]):
|
| 466 |
+
# 表格头部
|
| 467 |
+
with gr.Row(elem_classes=["config-header-row"]):
|
| 468 |
+
with gr.Column(scale=1):
|
| 469 |
+
gr.HTML("<div class='config-header'>配置项</div>")
|
| 470 |
+
with gr.Column(scale=2):
|
| 471 |
+
gr.HTML("<div class='config-header'>当前值</div>")
|
| 472 |
+
with gr.Column(scale=1):
|
| 473 |
+
gr.HTML("<div class='config-header'>操作</div>")
|
| 474 |
+
|
| 475 |
+
# API Key 行
|
| 476 |
+
with gr.Row(elem_classes=["config-row"]):
|
| 477 |
+
with gr.Column(scale=1):
|
| 478 |
+
gr.Markdown("**OpenAI API Key**")
|
| 479 |
+
with gr.Column(scale=2):
|
| 480 |
+
api_key_input = gr.Textbox(
|
| 481 |
+
placeholder="输入你的API��钥",
|
| 482 |
+
type="password",
|
| 483 |
+
container=False,
|
| 484 |
+
show_label=False,
|
| 485 |
+
interactive=False
|
| 486 |
+
)
|
| 487 |
+
interactive_settings_components.append(api_key_input)
|
| 488 |
+
with gr.Column(scale=1):
|
| 489 |
+
show_api_key_btn = gr.Button("👁 显示", size="sm", interactive=False)
|
| 490 |
+
interactive_settings_components.append(show_api_key_btn)
|
| 491 |
+
|
| 492 |
+
# 模型名称行
|
| 493 |
+
with gr.Row(elem_classes=["config-row"]):
|
| 494 |
+
with gr.Column(scale=1):
|
| 495 |
+
gr.Markdown("**模型名称**")
|
| 496 |
+
with gr.Column(scale=2):
|
| 497 |
+
model_input = gr.Textbox(
|
| 498 |
+
placeholder="如: deepseek-ai/DeepSeek-V3",
|
| 499 |
+
container=False,
|
| 500 |
+
show_label=False,
|
| 501 |
+
interactive=False
|
| 502 |
+
)
|
| 503 |
+
interactive_settings_components.append(model_input)
|
| 504 |
+
with gr.Column(scale=1):
|
| 505 |
+
gr.HTML("<span></span>") # 空占位符
|
| 506 |
+
|
| 507 |
+
# Base URL 行
|
| 508 |
+
with gr.Row(elem_classes=["config-row"]):
|
| 509 |
+
with gr.Column(scale=1):
|
| 510 |
+
gr.Markdown("**Base URL**")
|
| 511 |
+
with gr.Column(scale=2):
|
| 512 |
+
base_url_input = gr.Textbox(
|
| 513 |
+
placeholder="如: https://api.siliconflow.cn/v1",
|
| 514 |
+
container=False,
|
| 515 |
+
show_label=False,
|
| 516 |
+
interactive=False
|
| 517 |
+
)
|
| 518 |
+
interactive_settings_components.append(base_url_input)
|
| 519 |
+
with gr.Column(scale=1):
|
| 520 |
+
gr.HTML("<span></span>") # 空占位符
|
| 521 |
+
|
| 522 |
+
# 操作按钮行
|
| 523 |
+
with gr.Row(elem_classes=["config-actions"]):
|
| 524 |
+
save_config_btn = gr.Button(
|
| 525 |
+
"💾 保存配置",
|
| 526 |
+
variant="primary",
|
| 527 |
+
size="sm",
|
| 528 |
+
interactive=False
|
| 529 |
+
)
|
| 530 |
+
interactive_settings_components.append(save_config_btn)
|
| 531 |
+
|
| 532 |
+
# 配置状态显示
|
| 533 |
+
config_status = gr.Markdown("等待配置...", elem_classes=["config-status"])
|
| 534 |
+
|
| 535 |
+
# 创建一个 State 来传递需要解锁的组件数量
|
| 536 |
+
num_components = gr.State(len(interactive_settings_components))
|
| 537 |
+
|
| 538 |
+
def update_server_status():
|
| 539 |
+
status_text, config = gradio_client.check_server_status()
|
| 540 |
+
return status_text
|
| 541 |
+
|
| 542 |
+
def unlock_settings(password_attempt, num_components_to_unlock):
|
| 543 |
+
"""
|
| 544 |
+
检查管理员密码。如果正确,解锁设置UI并隐藏认证模块。
|
| 545 |
+
"""
|
| 546 |
+
if password_attempt == ADMIN_PASSWORD:
|
| 547 |
+
gr.Info("认证成功!设置已解锁。")
|
| 548 |
+
# 为每一个需要解锁的组件创建一个更新指令
|
| 549 |
+
unlock_updates = [gr.update(interactive=True) for _ in range(num_components_to_unlock)]
|
| 550 |
+
# 返回所有更新指令,以及对认证组和会话状态的更新
|
| 551 |
+
# The * operator unpacks the list into individual arguments for the tuple
|
| 552 |
+
return *unlock_updates, gr.update(visible=False), True
|
| 553 |
+
else:
|
| 554 |
+
# 密码错误时,通过 gr.Error 弹出提示,UI不会有任何变化
|
| 555 |
+
raise gr.Error("管理员密码错误!")
|
| 556 |
+
|
| 557 |
+
def toggle_api_key_visibility(api_key_value):
|
| 558 |
+
"""切换API密钥的显示/隐藏状态"""
|
| 559 |
+
# 根据当前值判断是否为隐藏状态
|
| 560 |
+
# 如果包含*号,则当前是遮蔽状态,需要显示原文
|
| 561 |
+
if api_key_value and "*" in api_key_value:
|
| 562 |
+
# 从缓存中获取原始值
|
| 563 |
+
original_key = gradio_client.config_cache.get('openai_api_key', '')
|
| 564 |
+
return gr.update(value=original_key, type="text")
|
| 565 |
+
else:
|
| 566 |
+
# 当前显示原文,需要遮蔽
|
| 567 |
+
if api_key_value:
|
| 568 |
+
# 保存到缓存
|
| 569 |
+
gradio_client.config_cache['openai_api_key'] = api_key_value
|
| 570 |
+
masked_key = gradio_client.get_masked_api_key(api_key_value)
|
| 571 |
+
return gr.update(value=masked_key, type="password")
|
| 572 |
+
else:
|
| 573 |
+
return gr.update(type="password")
|
| 574 |
+
|
| 575 |
+
def save_config(api_key, model, base_url):
|
| 576 |
+
"""保存配���"""
|
| 577 |
+
status = gradio_client.update_config(api_key=api_key, model=model, base_url=base_url)
|
| 578 |
+
|
| 579 |
+
# 从配置缓存生成当前配置显示
|
| 580 |
+
config_info = f"""**当前配置:**
|
| 581 |
+
- API Key: {gradio_client.get_masked_api_key(gradio_client.config_cache['openai_api_key'])}
|
| 582 |
+
- 模型: {gradio_client.config_cache['openai_model'] or '未设置'}
|
| 583 |
+
- Base URL: {gradio_client.config_cache['openai_base_url'] or '未设置'}
|
| 584 |
+
"""
|
| 585 |
+
return status + "\n\n" + config_info
|
| 586 |
+
|
| 587 |
+
def load_config_from_server():
|
| 588 |
+
"""从环境变量和服务器加载配置到输入框"""
|
| 589 |
+
import os
|
| 590 |
+
from dotenv import load_dotenv
|
| 591 |
+
|
| 592 |
+
# 加载.env文件
|
| 593 |
+
load_dotenv()
|
| 594 |
+
|
| 595 |
+
# 从环境变量读取配置
|
| 596 |
+
api_key = os.getenv("OPENAI_API_KEY", "")
|
| 597 |
+
model = os.getenv("OPENAI_MODEL", "")
|
| 598 |
+
base_url = os.getenv("OPENAI_BASE_URL", "")
|
| 599 |
+
|
| 600 |
+
# 更新配置缓存
|
| 601 |
+
gradio_client.config_cache.update({
|
| 602 |
+
"openai_api_key": api_key,
|
| 603 |
+
"openai_model": model,
|
| 604 |
+
"openai_base_url": base_url
|
| 605 |
+
})
|
| 606 |
+
|
| 607 |
+
# 返回遮蔽后的API Key和其他配置
|
| 608 |
+
masked_api_key = gradio_client.get_masked_api_key(api_key) if api_key else ""
|
| 609 |
+
|
| 610 |
+
return masked_api_key, model, base_url
|
| 611 |
+
|
| 612 |
+
# 定期更新服务器状态
|
| 613 |
+
demo.load(update_server_status, outputs=[server_status])
|
| 614 |
+
|
| 615 |
+
# 事件处理
|
| 616 |
+
def on_pdf_upload(pdf_file):
|
| 617 |
+
"""PDF文件上传时的预览"""
|
| 618 |
+
if pdf_file is None:
|
| 619 |
+
return [], "等待上传文件..."
|
| 620 |
+
|
| 621 |
+
# 保存临时文件用于预览
|
| 622 |
+
temp_path = gradio_client.temp_dir / f"preview_{int(time.time())}.pdf"
|
| 623 |
+
with open(temp_path, "wb") as f:
|
| 624 |
+
f.write(pdf_file)
|
| 625 |
+
|
| 626 |
+
# 生成预览图片
|
| 627 |
+
images = gradio_client.pdf_to_images(str(temp_path))
|
| 628 |
+
status = f"✅ 已上传PDF文件,共 {len(images)} 页(显示前5页预览)"
|
| 629 |
+
|
| 630 |
+
return images, status
|
| 631 |
+
|
| 632 |
+
def on_load_sample():
|
| 633 |
+
"""加载示例文件"""
|
| 634 |
+
file_data, images, status = gradio_client.load_sample_file()
|
| 635 |
+
if file_data:
|
| 636 |
+
# 创建临时文件并返回其路径给gr.File组件
|
| 637 |
+
temp_sample_path = gradio_client.temp_dir / f"sample_{int(time.time())}.pdf"
|
| 638 |
+
with open(temp_sample_path, "wb") as f:
|
| 639 |
+
f.write(file_data)
|
| 640 |
+
return str(temp_sample_path), images, status
|
| 641 |
+
else:
|
| 642 |
+
return None, [], status
|
| 643 |
+
|
| 644 |
+
def on_translate(pdf_file, lang_in, lang_out, file_type, progress=gr.Progress()):
|
| 645 |
+
"""执行翻译"""
|
| 646 |
+
status, result_path, result_images, task_id, timing_info = gradio_client.translate_pdf(
|
| 647 |
+
pdf_file, lang_in, lang_out, file_type, progress
|
| 648 |
+
)
|
| 649 |
+
|
| 650 |
+
# 返回结果
|
| 651 |
+
download_visible = bool(result_path and os.path.exists(result_path))
|
| 652 |
+
task_visible = bool(task_id)
|
| 653 |
+
|
| 654 |
+
# 格式化时间信息显示
|
| 655 |
+
timing_display_text = f"⏱️ **处理时间统计**:{timing_info}" if timing_info else "⏱️ **处理时间统计**:处理异常"
|
| 656 |
+
|
| 657 |
+
return (
|
| 658 |
+
status, # translation_status
|
| 659 |
+
result_images, # result_preview
|
| 660 |
+
gr.File(value=result_path if result_path else None, visible=download_visible), # download_btn
|
| 661 |
+
gr.Textbox(value=task_id, visible=task_visible), # task_info
|
| 662 |
+
timing_display_text # timing_display
|
| 663 |
+
)
|
| 664 |
+
|
| 665 |
+
# 绑定事件
|
| 666 |
+
# 管理员认证事件
|
| 667 |
+
unlock_button.click(
|
| 668 |
+
fn=unlock_settings,
|
| 669 |
+
inputs=[admin_password_input, num_components],
|
| 670 |
+
# outputs 列表现在包含所有被控制的组件、认证组和会话状态
|
| 671 |
+
outputs=interactive_settings_components + [auth_group, session_authenticated]
|
| 672 |
+
)
|
| 673 |
+
|
| 674 |
+
# 配置相关事件
|
| 675 |
+
show_api_key_btn.click(
|
| 676 |
+
toggle_api_key_visibility,
|
| 677 |
+
inputs=[api_key_input],
|
| 678 |
+
outputs=[api_key_input]
|
| 679 |
+
)
|
| 680 |
+
|
| 681 |
+
save_config_btn.click(
|
| 682 |
+
save_config,
|
| 683 |
+
inputs=[api_key_input, model_input, base_url_input],
|
| 684 |
+
outputs=[config_status]
|
| 685 |
+
)
|
| 686 |
+
|
| 687 |
+
# 页面加载时从服务器加载配置
|
| 688 |
+
demo.load(
|
| 689 |
+
load_config_from_server,
|
| 690 |
+
outputs=[api_key_input, model_input, base_url_input]
|
| 691 |
+
)
|
| 692 |
+
|
| 693 |
+
# 示例文件加载
|
| 694 |
+
sample_btn.click(
|
| 695 |
+
on_load_sample,
|
| 696 |
+
outputs=[pdf_input, original_preview, translation_status]
|
| 697 |
+
)
|
| 698 |
+
|
| 699 |
+
pdf_input.change(
|
| 700 |
+
on_pdf_upload,
|
| 701 |
+
inputs=[pdf_input],
|
| 702 |
+
outputs=[original_preview, translation_status]
|
| 703 |
+
)
|
| 704 |
+
|
| 705 |
+
translate_btn.click(
|
| 706 |
+
on_translate,
|
| 707 |
+
inputs=[pdf_input, lang_in, lang_out, file_type],
|
| 708 |
+
outputs=[translation_status, result_preview, download_btn, task_info, timing_display]
|
| 709 |
+
)
|
| 710 |
+
|
| 711 |
+
query_btn.click(
|
| 712 |
+
gradio_client.get_task_status,
|
| 713 |
+
inputs=[task_id_input],
|
| 714 |
+
outputs=[task_status_output]
|
| 715 |
+
)
|
| 716 |
+
|
| 717 |
+
return demo
|
| 718 |
+
|
| 719 |
+
def main():
|
| 720 |
+
"""启动Gradio客户端"""
|
| 721 |
+
import argparse
|
| 722 |
+
|
| 723 |
+
# 显示管理员密码信息
|
| 724 |
+
print("---" * 10)
|
| 725 |
+
print(f"INFO: 管理员密码已设置。请使用 '{ADMIN_PASSWORD}' 在参数设置页面解锁敏感信息。")
|
| 726 |
+
print("---" * 10)
|
| 727 |
+
|
| 728 |
+
parser = argparse.ArgumentParser(description="BabelDOC Gradio客户端")
|
| 729 |
+
parser.add_argument("--server-url", default="http://localhost:8000", help="API服务器地址")
|
| 730 |
+
parser.add_argument("--host", default="0.0.0.0", help="Gradio服务器主机")
|
| 731 |
+
parser.add_argument("--port", type=int, default=7860, help="Gradio服务器端口")
|
| 732 |
+
parser.add_argument("--share", action="store_true", help="创建公共链接")
|
| 733 |
+
|
| 734 |
+
args = parser.parse_args()
|
| 735 |
+
|
| 736 |
+
print(f"正在启动Gradio客户端...")
|
| 737 |
+
print(f"API服务器: {args.server_url}")
|
| 738 |
+
print(f"Gradio地址: http://{args.host}:{args.port}")
|
| 739 |
+
|
| 740 |
+
demo = create_gradio_interface(args.server_url)
|
| 741 |
+
demo.launch(
|
| 742 |
+
server_name=args.host,
|
| 743 |
+
server_port=args.port,
|
| 744 |
+
share=args.share,
|
| 745 |
+
show_error=True
|
| 746 |
+
)
|
| 747 |
+
|
| 748 |
+
if __name__ == "__main__":
|
| 749 |
+
main()
|
pdftranslate_web/tests/test2.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
load_dotenv() # 自动加载同目录下的 .env 文件
|
| 3 |
+
import os
|
| 4 |
+
print(os.getenv("OPENAI_API_KEY")) # 验证是否加载成功
|
pdftranslate_web/tests/test_structure.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 基础测试文件
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
def test_project_structure():
|
| 5 |
+
"""测试项目结构是否正确"""
|
| 6 |
+
project_root = Path(__file__).parent.parent
|
| 7 |
+
|
| 8 |
+
# 检查核心目录
|
| 9 |
+
assert (project_root / "src" / "pdftranslate_web").exists()
|
| 10 |
+
assert (project_root / "scripts").exists()
|
| 11 |
+
assert (project_root / "docs").exists()
|
| 12 |
+
|
| 13 |
+
# 检查核心文件
|
| 14 |
+
assert (project_root / "src" / "pdftranslate_web" / "__init__.py").exists()
|
| 15 |
+
assert (project_root / "src" / "pdftranslate_web" / "api_server.py").exists()
|
| 16 |
+
assert (project_root / "src" / "pdftranslate_web" / "api_client.py").exists()
|
| 17 |
+
assert (project_root / "src" / "pdftranslate_web" / "gradio_client.py").exists()
|
| 18 |
+
|
| 19 |
+
# 检查配置文件
|
| 20 |
+
assert (project_root / ".env.example").exists()
|
| 21 |
+
assert (project_root / "pyproject.toml").exists()
|
| 22 |
+
assert (project_root / "README.md").exists()
|
| 23 |
+
|
| 24 |
+
# 检查Docker文件
|
| 25 |
+
assert (project_root / "Dockerfile").exists()
|
| 26 |
+
assert (project_root / "docker-compose.yml").exists()
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
test_project_structure()
|
| 30 |
+
print("✅ 项目结构测试通过")
|
pdftranslate_web/uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
jupyterlab==4.4.5
|
| 2 |
+
tornado==6.2
|
| 3 |
+
ipywidgets
|
| 4 |
+
uv==0.8.4
|
start_server.sh
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
JUPYTER_TOKEN="${JUPYTER_TOKEN:=huggingface}"
|
| 3 |
+
|
| 4 |
+
NOTEBOOK_DIR="/data"
|
| 5 |
+
|
| 6 |
+
jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
|
| 7 |
+
|
| 8 |
+
jupyter-lab \
|
| 9 |
+
--ip 0.0.0.0 \
|
| 10 |
+
--port 7860 \
|
| 11 |
+
--no-browser \
|
| 12 |
+
--allow-root \
|
| 13 |
+
--ServerApp.token="$JUPYTER_TOKEN" \
|
| 14 |
+
--ServerApp.tornado_settings="{'headers': {'Content-Security-Policy': 'frame-ancestors *'}}" \
|
| 15 |
+
--ServerApp.cookie_options="{'SameSite': 'None', 'Secure': True}" \
|
| 16 |
+
--ServerApp.disable_check_xsrf=True \
|
| 17 |
+
--LabApp.news_url=None \
|
| 18 |
+
--LabApp.check_for_updates_class="jupyterlab.NeverCheckForUpdate" \
|
| 19 |
+
--notebook-dir=$NOTEBOOK_DIR
|