Arsive2 commited on
Commit
8720cc4
·
1 Parent(s): d0d0352

updated api server

Browse files
Files changed (2) hide show
  1. api_server.py +19 -2
  2. app/models/html_processor.py +23 -0
api_server.py CHANGED
@@ -47,6 +47,7 @@ class TranslationRequest(BaseModel):
47
  text: str
48
  source_lang_code: str
49
  target_lang_code: str
 
50
 
51
  class TranslationResponse(BaseModel):
52
  translated_text: str
@@ -55,6 +56,7 @@ class HTMLTranslationRequest(BaseModel):
55
  html: str
56
  source_lang_code: str
57
  target_lang_code: str
 
58
 
59
  class HTMLTranslationResponse(BaseModel):
60
  translated_html: str
@@ -93,6 +95,9 @@ async def translate_text(request: TranslationRequest):
93
  try:
94
  logger.info(f"Translating from {request.source_lang_code} to {request.target_lang_code}")
95
 
 
 
 
96
  chunks = text_chunker.create_chunks(request.text)
97
  translated_chunks = []
98
 
@@ -125,6 +130,14 @@ async def translate_html(request: HTMLTranslationRequest):
125
  if not text_fragments:
126
  return {"translated_html": request.html} # No text to translate
127
 
 
 
 
 
 
 
 
 
128
  translated_fragments = []
129
 
130
  batch_size = 10
@@ -155,6 +168,7 @@ async def process_document(
155
  file: UploadFile = File(...),
156
  source_lang_code: str = Form(...),
157
  target_lang_code: str = Form(...),
 
158
  use_ocr: bool = Form(False)
159
  ):
160
  """Process and translate document (PDF or image)"""
@@ -175,6 +189,10 @@ async def process_document(
175
  status_code=400,
176
  detail="No text could be extracted from the document"
177
  )
 
 
 
 
178
 
179
  translated_text = model.translate(
180
  extracted_text,
@@ -191,5 +209,4 @@ async def process_document(
191
  raise HTTPException(status_code=500, detail=str(e))
192
 
193
  if __name__ == "__main__":
194
- uvicorn.run("api_server:app", host="0.0.0.0", port=7860, reload=True)
195
-
 
47
  text: str
48
  source_lang_code: str
49
  target_lang_code: str
50
+ special_token: str = ""
51
 
52
  class TranslationResponse(BaseModel):
53
  translated_text: str
 
56
  html: str
57
  source_lang_code: str
58
  target_lang_code: str
59
+ special_token: str = ""
60
 
61
  class HTMLTranslationResponse(BaseModel):
62
  translated_html: str
 
95
  try:
96
  logger.info(f"Translating from {request.source_lang_code} to {request.target_lang_code}")
97
 
98
+ if request.special_token:
99
+ logger.info(f"Using special language token: {request.special_token}")
100
+
101
  chunks = text_chunker.create_chunks(request.text)
102
  translated_chunks = []
103
 
 
130
  if not text_fragments:
131
  return {"translated_html": request.html} # No text to translate
132
 
133
+ # Apply special token to each text fragment if needed
134
+ if request.special_token:
135
+ logger.info(f"Using special language token for HTML: {request.special_token}")
136
+ text_fragments = html_processor.prepare_fragments_with_token(
137
+ text_fragments,
138
+ request.special_token
139
+ )
140
+
141
  translated_fragments = []
142
 
143
  batch_size = 10
 
168
  file: UploadFile = File(...),
169
  source_lang_code: str = Form(...),
170
  target_lang_code: str = Form(...),
171
+ special_token: str = Form(""),
172
  use_ocr: bool = Form(False)
173
  ):
174
  """Process and translate document (PDF or image)"""
 
189
  status_code=400,
190
  detail="No text could be extracted from the document"
191
  )
192
+
193
+ if special_token:
194
+ logger.info(f"Using special language token for document: {special_token}")
195
+ extracted_text = f"{special_token}{extracted_text}"
196
 
197
  translated_text = model.translate(
198
  extracted_text,
 
209
  raise HTTPException(status_code=500, detail=str(e))
210
 
211
  if __name__ == "__main__":
212
+ uvicorn.run("api_server:app", host="0.0.0.0", port=7860, reload=True)
 
app/models/html_processor.py CHANGED
@@ -100,3 +100,26 @@ class HTMLProcessor:
100
  except Exception as e:
101
  logger.error(f"Error replacing text in HTML: {str(e)}")
102
  return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  except Exception as e:
101
  logger.error(f"Error replacing text in HTML: {str(e)}")
102
  return ""
103
+
104
+ def prepare_fragments_with_token(self, fragments: List[str], special_token: str) -> List[str]:
105
+ """
106
+ Prepare text fragments by adding special language token to each fragment.
107
+
108
+ Args:
109
+ fragments: List of text fragments
110
+ special_token: Special language token to add (e.g., '>>tam<<')
111
+
112
+ Returns:
113
+ List of fragments with token added
114
+ """
115
+ if not special_token:
116
+ return fragments
117
+
118
+ prepared_fragments = []
119
+ for fragment in fragments:
120
+ if fragment.strip():
121
+ prepared_fragments.append(f"{special_token}{fragment}")
122
+ else:
123
+ prepared_fragments.append(fragment)
124
+
125
+ return prepared_fragments