Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
|
|
| 1 |
from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Body
|
| 2 |
from fastapi.staticfiles import StaticFiles
|
| 3 |
-
from fastapi.responses import StreamingResponse, FileResponse
|
| 4 |
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
import uvicorn
|
| 6 |
import asyncio
|
|
@@ -9,8 +10,6 @@ import json
|
|
| 9 |
import webbrowser
|
| 10 |
from typing import List, Dict, Optional
|
| 11 |
import os
|
| 12 |
-
import tempfile
|
| 13 |
-
import shutil
|
| 14 |
from pathlib import Path
|
| 15 |
import pypdf
|
| 16 |
from docx import Document
|
|
@@ -25,6 +24,11 @@ import hashlib
|
|
| 25 |
import re
|
| 26 |
from io import BytesIO
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
app = FastAPI()
|
| 29 |
|
| 30 |
# 配置CORS
|
|
@@ -36,51 +40,8 @@ app.add_middleware(
|
|
| 36 |
allow_headers=["*"],
|
| 37 |
)
|
| 38 |
|
| 39 |
-
# 创建临时目录管理器
|
| 40 |
-
class TempDirManager:
|
| 41 |
-
def __init__(self):
|
| 42 |
-
self.temp_base = tempfile.mkdtemp()
|
| 43 |
-
# 创建必要的子目录
|
| 44 |
-
self.static_dir = os.path.join(self.temp_base, "static")
|
| 45 |
-
self.temp_dir = os.path.join(self.temp_base, "temp")
|
| 46 |
-
self.memory_dir = os.path.join(self.temp_base, "translation_memory")
|
| 47 |
-
|
| 48 |
-
os.makedirs(self.static_dir, exist_ok=True)
|
| 49 |
-
os.makedirs(self.temp_dir, exist_ok=True)
|
| 50 |
-
os.makedirs(self.memory_dir, exist_ok=True)
|
| 51 |
-
|
| 52 |
-
# 创建默认的index.html
|
| 53 |
-
self._create_default_index()
|
| 54 |
-
|
| 55 |
-
def _create_default_index(self):
|
| 56 |
-
index_html = """
|
| 57 |
-
<!DOCTYPE html>
|
| 58 |
-
<html>
|
| 59 |
-
<head>
|
| 60 |
-
<title>Translation Service</title>
|
| 61 |
-
<meta charset="UTF-8">
|
| 62 |
-
<style>
|
| 63 |
-
body { font-family: Arial, sans-serif; margin: 40px; }
|
| 64 |
-
h1 { color: #333; }
|
| 65 |
-
</style>
|
| 66 |
-
</head>
|
| 67 |
-
<body>
|
| 68 |
-
<h1>Translation Service</h1>
|
| 69 |
-
<p>API service is running. Please use the API endpoints to interact with the service.</p>
|
| 70 |
-
</body>
|
| 71 |
-
</html>
|
| 72 |
-
"""
|
| 73 |
-
with open(os.path.join(self.static_dir, "index.html"), "w", encoding="utf-8") as f:
|
| 74 |
-
f.write(index_html)
|
| 75 |
-
|
| 76 |
-
def cleanup(self):
|
| 77 |
-
shutil.rmtree(self.temp_base)
|
| 78 |
-
|
| 79 |
-
# 创建临时目录管理器实例
|
| 80 |
-
temp_dir_manager = TempDirManager()
|
| 81 |
-
|
| 82 |
# 挂载静态文件目录
|
| 83 |
-
app.mount("/static", StaticFiles(directory=
|
| 84 |
|
| 85 |
class DocumentSegment(BaseModel):
|
| 86 |
text: str
|
|
@@ -108,9 +69,16 @@ class ExportRequest(BaseModel):
|
|
| 108 |
|
| 109 |
class TranslationMemory:
|
| 110 |
def __init__(self):
|
| 111 |
-
self.memory_file =
|
| 112 |
-
self.
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
def save_memory(self):
|
| 116 |
with open(self.memory_file, 'w', encoding='utf-8') as f:
|
|
@@ -131,9 +99,16 @@ class TranslationMemory:
|
|
| 131 |
|
| 132 |
class TerminologyManager:
|
| 133 |
def __init__(self):
|
| 134 |
-
self.terminology_file =
|
| 135 |
-
self.
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
def save_terminology(self):
|
| 139 |
with open(self.terminology_file, 'w', encoding='utf-8') as f:
|
|
@@ -159,6 +134,7 @@ class DocumentProcessor:
|
|
| 159 |
|
| 160 |
if file_ext == 'txt':
|
| 161 |
text = content.decode('utf-8')
|
|
|
|
| 162 |
paragraphs = re.split(r'\n\s*\n|\r\n\s*\r\n', text)
|
| 163 |
for i, para in enumerate(paragraphs):
|
| 164 |
if para.strip():
|
|
@@ -170,7 +146,7 @@ class DocumentProcessor:
|
|
| 170 |
))
|
| 171 |
|
| 172 |
elif file_ext == 'pdf':
|
| 173 |
-
temp_path =
|
| 174 |
with open(temp_path, 'wb') as f:
|
| 175 |
f.write(content)
|
| 176 |
|
|
@@ -196,7 +172,7 @@ class DocumentProcessor:
|
|
| 196 |
os.remove(temp_path)
|
| 197 |
|
| 198 |
elif file_ext == 'docx':
|
| 199 |
-
temp_path =
|
| 200 |
with open(temp_path, 'wb') as f:
|
| 201 |
f.write(content)
|
| 202 |
|
|
@@ -427,6 +403,39 @@ class TranslationManager:
|
|
| 427 |
replacements[placeholder] = target_term
|
| 428 |
text_to_translate = text_to_translate.replace(source_term, placeholder)
|
| 429 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
async with aiohttp.ClientSession() as session:
|
| 431 |
try:
|
| 432 |
# 使用环境变量获取Deepl翻译API的URL
|
|
@@ -550,11 +559,6 @@ async def export_document(request: ExportRequest):
|
|
| 550 |
async def read_root():
|
| 551 |
return FileResponse('static/index.html')
|
| 552 |
|
| 553 |
-
# 程序退出时清理临时目录
|
| 554 |
-
@app.on_event("shutdown")
|
| 555 |
-
async def shutdown_event():
|
| 556 |
-
temp_dir_manager.cleanup()
|
| 557 |
-
|
| 558 |
def open_browser():
|
| 559 |
webbrowser.open('http://localhost:7860')
|
| 560 |
|
|
|
|
| 1 |
+
# main.py
|
| 2 |
from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Body
|
| 3 |
from fastapi.staticfiles import StaticFiles
|
| 4 |
+
from fastapi.responses import StreamingResponse, FileResponse
|
| 5 |
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
import uvicorn
|
| 7 |
import asyncio
|
|
|
|
| 10 |
import webbrowser
|
| 11 |
from typing import List, Dict, Optional
|
| 12 |
import os
|
|
|
|
|
|
|
| 13 |
from pathlib import Path
|
| 14 |
import pypdf
|
| 15 |
from docx import Document
|
|
|
|
| 24 |
import re
|
| 25 |
from io import BytesIO
|
| 26 |
|
| 27 |
+
# 创建必要的目录
|
| 28 |
+
os.makedirs("static", exist_ok=True)
|
| 29 |
+
os.makedirs("temp", exist_ok=True)
|
| 30 |
+
os.makedirs("translation_memory", exist_ok=True)
|
| 31 |
+
|
| 32 |
app = FastAPI()
|
| 33 |
|
| 34 |
# 配置CORS
|
|
|
|
| 40 |
allow_headers=["*"],
|
| 41 |
)
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
# 挂载静态文件目录
|
| 44 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 45 |
|
| 46 |
class DocumentSegment(BaseModel):
|
| 47 |
text: str
|
|
|
|
| 69 |
|
| 70 |
class TranslationMemory:
|
| 71 |
def __init__(self):
|
| 72 |
+
self.memory_file = "translation_memory/memory.json"
|
| 73 |
+
self.load_memory()
|
| 74 |
+
|
| 75 |
+
def load_memory(self):
|
| 76 |
+
if os.path.exists(self.memory_file):
|
| 77 |
+
with open(self.memory_file, 'r', encoding='utf-8') as f:
|
| 78 |
+
self.memory = json.load(f)
|
| 79 |
+
else:
|
| 80 |
+
self.memory = {}
|
| 81 |
+
self.save_memory()
|
| 82 |
|
| 83 |
def save_memory(self):
|
| 84 |
with open(self.memory_file, 'w', encoding='utf-8') as f:
|
|
|
|
| 99 |
|
| 100 |
class TerminologyManager:
|
| 101 |
def __init__(self):
|
| 102 |
+
self.terminology_file = "translation_memory/terminology.json"
|
| 103 |
+
self.load_terminology()
|
| 104 |
+
|
| 105 |
+
def load_terminology(self):
|
| 106 |
+
if os.path.exists(self.terminology_file):
|
| 107 |
+
with open(self.terminology_file, 'r', encoding='utf-8') as f:
|
| 108 |
+
self.terminology = json.load(f)
|
| 109 |
+
else:
|
| 110 |
+
self.terminology = {}
|
| 111 |
+
self.save_terminology()
|
| 112 |
|
| 113 |
def save_terminology(self):
|
| 114 |
with open(self.terminology_file, 'w', encoding='utf-8') as f:
|
|
|
|
| 134 |
|
| 135 |
if file_ext == 'txt':
|
| 136 |
text = content.decode('utf-8')
|
| 137 |
+
# 优化分段逻辑,支持更多分隔符
|
| 138 |
paragraphs = re.split(r'\n\s*\n|\r\n\s*\r\n', text)
|
| 139 |
for i, para in enumerate(paragraphs):
|
| 140 |
if para.strip():
|
|
|
|
| 146 |
))
|
| 147 |
|
| 148 |
elif file_ext == 'pdf':
|
| 149 |
+
temp_path = f"temp/{file.filename}"
|
| 150 |
with open(temp_path, 'wb') as f:
|
| 151 |
f.write(content)
|
| 152 |
|
|
|
|
| 172 |
os.remove(temp_path)
|
| 173 |
|
| 174 |
elif file_ext == 'docx':
|
| 175 |
+
temp_path = f"temp/{file.filename}"
|
| 176 |
with open(temp_path, 'wb') as f:
|
| 177 |
f.write(content)
|
| 178 |
|
|
|
|
| 403 |
replacements[placeholder] = target_term
|
| 404 |
text_to_translate = text_to_translate.replace(source_term, placeholder)
|
| 405 |
|
| 406 |
+
# async with aiohttp.ClientSession() as session:
|
| 407 |
+
# try:
|
| 408 |
+
# # DeepL翻译
|
| 409 |
+
# async with session.post(
|
| 410 |
+
# 'https://api.deeplx.org/..../translate',
|
| 411 |
+
# json={
|
| 412 |
+
# "text": text_to_translate,
|
| 413 |
+
# "source_lang": source_lang,
|
| 414 |
+
# "target_lang": target_lang
|
| 415 |
+
# }
|
| 416 |
+
# ) as response:
|
| 417 |
+
# result = await response.json()
|
| 418 |
+
# if result.get('code') == 200:
|
| 419 |
+
# translated_text = result['data']
|
| 420 |
+
# # 恢复术语替换
|
| 421 |
+
# for placeholder, term in replacements.items():
|
| 422 |
+
# translated_text = translated_text.replace(placeholder, term)
|
| 423 |
+
|
| 424 |
+
# # 保存到翻译记忆
|
| 425 |
+
# self.memory.add_translation(text, translated_text, source_lang, target_lang)
|
| 426 |
+
|
| 427 |
+
# return {
|
| 428 |
+
# 'translated': translated_text,
|
| 429 |
+
# 'alternatives': result.get('alternatives', []),
|
| 430 |
+
# 'from_memory': False,
|
| 431 |
+
# 'confidence': 0.8 if replacements else 0.7
|
| 432 |
+
# }
|
| 433 |
+
# else:
|
| 434 |
+
# raise HTTPException(status_code=500, detail="Translation API error")
|
| 435 |
+
# except Exception as e:
|
| 436 |
+
# raise HTTPException(status_code=500, detail=str(e))
|
| 437 |
+
|
| 438 |
+
|
| 439 |
async with aiohttp.ClientSession() as session:
|
| 440 |
try:
|
| 441 |
# 使用环境变量获取Deepl翻译API的URL
|
|
|
|
| 559 |
async def read_root():
|
| 560 |
return FileResponse('static/index.html')
|
| 561 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
def open_browser():
|
| 563 |
webbrowser.open('http://localhost:7860')
|
| 564 |
|