mistpe commited on
Commit
92cb7b7
·
verified ·
1 Parent(s): 7993fc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -60
app.py CHANGED
@@ -1,6 +1,7 @@
 
1
  from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Body
2
  from fastapi.staticfiles import StaticFiles
3
- from fastapi.responses import StreamingResponse, FileResponse, HTMLResponse
4
  from fastapi.middleware.cors import CORSMiddleware
5
  import uvicorn
6
  import asyncio
@@ -9,8 +10,6 @@ import json
9
  import webbrowser
10
  from typing import List, Dict, Optional
11
  import os
12
- import tempfile
13
- import shutil
14
  from pathlib import Path
15
  import pypdf
16
  from docx import Document
@@ -25,6 +24,11 @@ import hashlib
25
  import re
26
  from io import BytesIO
27
 
 
 
 
 
 
28
  app = FastAPI()
29
 
30
  # 配置CORS
@@ -36,51 +40,8 @@ app.add_middleware(
36
  allow_headers=["*"],
37
  )
38
 
39
- # 创建临时目录管理器
40
- class TempDirManager:
41
- def __init__(self):
42
- self.temp_base = tempfile.mkdtemp()
43
- # 创建必要的子目录
44
- self.static_dir = os.path.join(self.temp_base, "static")
45
- self.temp_dir = os.path.join(self.temp_base, "temp")
46
- self.memory_dir = os.path.join(self.temp_base, "translation_memory")
47
-
48
- os.makedirs(self.static_dir, exist_ok=True)
49
- os.makedirs(self.temp_dir, exist_ok=True)
50
- os.makedirs(self.memory_dir, exist_ok=True)
51
-
52
- # 创建默认的index.html
53
- self._create_default_index()
54
-
55
- def _create_default_index(self):
56
- index_html = """
57
- <!DOCTYPE html>
58
- <html>
59
- <head>
60
- <title>Translation Service</title>
61
- <meta charset="UTF-8">
62
- <style>
63
- body { font-family: Arial, sans-serif; margin: 40px; }
64
- h1 { color: #333; }
65
- </style>
66
- </head>
67
- <body>
68
- <h1>Translation Service</h1>
69
- <p>API service is running. Please use the API endpoints to interact with the service.</p>
70
- </body>
71
- </html>
72
- """
73
- with open(os.path.join(self.static_dir, "index.html"), "w", encoding="utf-8") as f:
74
- f.write(index_html)
75
-
76
- def cleanup(self):
77
- shutil.rmtree(self.temp_base)
78
-
79
- # 创建临时目录管理器实例
80
- temp_dir_manager = TempDirManager()
81
-
82
  # 挂载静态文件目录
83
- app.mount("/static", StaticFiles(directory=temp_dir_manager.static_dir), name="static")
84
 
85
  class DocumentSegment(BaseModel):
86
  text: str
@@ -108,9 +69,16 @@ class ExportRequest(BaseModel):
108
 
109
  class TranslationMemory:
110
  def __init__(self):
111
- self.memory_file = os.path.join(temp_dir_manager.memory_dir, "memory.json")
112
- self.memory = {}
113
- self.save_memory()
 
 
 
 
 
 
 
114
 
115
  def save_memory(self):
116
  with open(self.memory_file, 'w', encoding='utf-8') as f:
@@ -131,9 +99,16 @@ class TranslationMemory:
131
 
132
  class TerminologyManager:
133
  def __init__(self):
134
- self.terminology_file = os.path.join(temp_dir_manager.memory_dir, "terminology.json")
135
- self.terminology = {}
136
- self.save_terminology()
 
 
 
 
 
 
 
137
 
138
  def save_terminology(self):
139
  with open(self.terminology_file, 'w', encoding='utf-8') as f:
@@ -159,6 +134,7 @@ class DocumentProcessor:
159
 
160
  if file_ext == 'txt':
161
  text = content.decode('utf-8')
 
162
  paragraphs = re.split(r'\n\s*\n|\r\n\s*\r\n', text)
163
  for i, para in enumerate(paragraphs):
164
  if para.strip():
@@ -170,7 +146,7 @@ class DocumentProcessor:
170
  ))
171
 
172
  elif file_ext == 'pdf':
173
- temp_path = os.path.join(temp_dir_manager.temp_dir, file.filename)
174
  with open(temp_path, 'wb') as f:
175
  f.write(content)
176
 
@@ -196,7 +172,7 @@ class DocumentProcessor:
196
  os.remove(temp_path)
197
 
198
  elif file_ext == 'docx':
199
- temp_path = os.path.join(temp_dir_manager.temp_dir, file.filename)
200
  with open(temp_path, 'wb') as f:
201
  f.write(content)
202
 
@@ -427,6 +403,39 @@ class TranslationManager:
427
  replacements[placeholder] = target_term
428
  text_to_translate = text_to_translate.replace(source_term, placeholder)
429
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  async with aiohttp.ClientSession() as session:
431
  try:
432
  # 使用环境变量获取Deepl翻译API的URL
@@ -550,11 +559,6 @@ async def export_document(request: ExportRequest):
550
  async def read_root():
551
  return FileResponse('static/index.html')
552
 
553
- # 程序退出时清理临时目录
554
- @app.on_event("shutdown")
555
- async def shutdown_event():
556
- temp_dir_manager.cleanup()
557
-
558
  def open_browser():
559
  webbrowser.open('http://localhost:7860')
560
 
 
1
+ # main.py
2
  from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Body
3
  from fastapi.staticfiles import StaticFiles
4
+ from fastapi.responses import StreamingResponse, FileResponse
5
  from fastapi.middleware.cors import CORSMiddleware
6
  import uvicorn
7
  import asyncio
 
10
  import webbrowser
11
  from typing import List, Dict, Optional
12
  import os
 
 
13
  from pathlib import Path
14
  import pypdf
15
  from docx import Document
 
24
  import re
25
  from io import BytesIO
26
 
27
+ # 创建必要的目录
28
+ os.makedirs("static", exist_ok=True)
29
+ os.makedirs("temp", exist_ok=True)
30
+ os.makedirs("translation_memory", exist_ok=True)
31
+
32
  app = FastAPI()
33
 
34
  # 配置CORS
 
40
  allow_headers=["*"],
41
  )
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # 挂载静态文件目录
44
+ app.mount("/static", StaticFiles(directory="static"), name="static")
45
 
46
  class DocumentSegment(BaseModel):
47
  text: str
 
69
 
70
  class TranslationMemory:
71
  def __init__(self):
72
+ self.memory_file = "translation_memory/memory.json"
73
+ self.load_memory()
74
+
75
+ def load_memory(self):
76
+ if os.path.exists(self.memory_file):
77
+ with open(self.memory_file, 'r', encoding='utf-8') as f:
78
+ self.memory = json.load(f)
79
+ else:
80
+ self.memory = {}
81
+ self.save_memory()
82
 
83
  def save_memory(self):
84
  with open(self.memory_file, 'w', encoding='utf-8') as f:
 
99
 
100
  class TerminologyManager:
101
  def __init__(self):
102
+ self.terminology_file = "translation_memory/terminology.json"
103
+ self.load_terminology()
104
+
105
+ def load_terminology(self):
106
+ if os.path.exists(self.terminology_file):
107
+ with open(self.terminology_file, 'r', encoding='utf-8') as f:
108
+ self.terminology = json.load(f)
109
+ else:
110
+ self.terminology = {}
111
+ self.save_terminology()
112
 
113
  def save_terminology(self):
114
  with open(self.terminology_file, 'w', encoding='utf-8') as f:
 
134
 
135
  if file_ext == 'txt':
136
  text = content.decode('utf-8')
137
+ # 优化分段逻辑,支持更多分隔符
138
  paragraphs = re.split(r'\n\s*\n|\r\n\s*\r\n', text)
139
  for i, para in enumerate(paragraphs):
140
  if para.strip():
 
146
  ))
147
 
148
  elif file_ext == 'pdf':
149
+ temp_path = f"temp/{file.filename}"
150
  with open(temp_path, 'wb') as f:
151
  f.write(content)
152
 
 
172
  os.remove(temp_path)
173
 
174
  elif file_ext == 'docx':
175
+ temp_path = f"temp/{file.filename}"
176
  with open(temp_path, 'wb') as f:
177
  f.write(content)
178
 
 
403
  replacements[placeholder] = target_term
404
  text_to_translate = text_to_translate.replace(source_term, placeholder)
405
 
406
+ # async with aiohttp.ClientSession() as session:
407
+ # try:
408
+ # # DeepL翻译
409
+ # async with session.post(
410
+ # 'https://api.deeplx.org/..../translate',
411
+ # json={
412
+ # "text": text_to_translate,
413
+ # "source_lang": source_lang,
414
+ # "target_lang": target_lang
415
+ # }
416
+ # ) as response:
417
+ # result = await response.json()
418
+ # if result.get('code') == 200:
419
+ # translated_text = result['data']
420
+ # # 恢复术语替换
421
+ # for placeholder, term in replacements.items():
422
+ # translated_text = translated_text.replace(placeholder, term)
423
+
424
+ # # 保存到翻译记忆
425
+ # self.memory.add_translation(text, translated_text, source_lang, target_lang)
426
+
427
+ # return {
428
+ # 'translated': translated_text,
429
+ # 'alternatives': result.get('alternatives', []),
430
+ # 'from_memory': False,
431
+ # 'confidence': 0.8 if replacements else 0.7
432
+ # }
433
+ # else:
434
+ # raise HTTPException(status_code=500, detail="Translation API error")
435
+ # except Exception as e:
436
+ # raise HTTPException(status_code=500, detail=str(e))
437
+
438
+
439
  async with aiohttp.ClientSession() as session:
440
  try:
441
  # 使用环境变量获取Deepl翻译API的URL
 
559
  async def read_root():
560
  return FileResponse('static/index.html')
561
 
 
 
 
 
 
562
  def open_browser():
563
  webbrowser.open('http://localhost:7860')
564