Spaces:

Arsive
/

lt_space

Build error

App Files Files Community

Arsive2 commited on Apr 15, 2025

Commit

8720cc4

1 Parent(s): d0d0352

updated api server

Browse files

Files changed (2) hide show

api_server.py +19 -2
app/models/html_processor.py +23 -0

api_server.py CHANGED Viewed

@@ -47,6 +47,7 @@ class TranslationRequest(BaseModel):
     text: str
     source_lang_code: str
     target_lang_code: str
 class TranslationResponse(BaseModel):
     translated_text: str
@@ -55,6 +56,7 @@ class HTMLTranslationRequest(BaseModel):
     html: str
     source_lang_code: str
     target_lang_code: str
 class HTMLTranslationResponse(BaseModel):
     translated_html: str
@@ -93,6 +95,9 @@ async def translate_text(request: TranslationRequest):
     try:
         logger.info(f"Translating from {request.source_lang_code} to {request.target_lang_code}")
         chunks = text_chunker.create_chunks(request.text)
         translated_chunks = []
@@ -125,6 +130,14 @@ async def translate_html(request: HTMLTranslationRequest):
         if not text_fragments:
             return {"translated_html": request.html}  # No text to translate
         translated_fragments = []
         batch_size = 10
@@ -155,6 +168,7 @@ async def process_document(
     file: UploadFile = File(...),
     source_lang_code: str = Form(...),
     target_lang_code: str = Form(...),
     use_ocr: bool = Form(False)
 ):
     """Process and translate document (PDF or image)"""
@@ -175,6 +189,10 @@ async def process_document(
                 status_code=400,
                 detail="No text could be extracted from the document"
             )
         translated_text = model.translate(
             extracted_text,
@@ -191,5 +209,4 @@ async def process_document(
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
-    uvicorn.run("api_server:app", host="0.0.0.0", port=7860, reload=True)

     text: str
     source_lang_code: str
     target_lang_code: str
+    special_token: str = ""
 class TranslationResponse(BaseModel):
     translated_text: str
     html: str
     source_lang_code: str
     target_lang_code: str
+    special_token: str = ""
 class HTMLTranslationResponse(BaseModel):
     translated_html: str
     try:
         logger.info(f"Translating from {request.source_lang_code} to {request.target_lang_code}")
+        if request.special_token:
+            logger.info(f"Using special language token: {request.special_token}")
         chunks = text_chunker.create_chunks(request.text)
         translated_chunks = []
         if not text_fragments:
             return {"translated_html": request.html}  # No text to translate
+        # Apply special token to each text fragment if needed
+        if request.special_token:
+            logger.info(f"Using special language token for HTML: {request.special_token}")
+            text_fragments = html_processor.prepare_fragments_with_token(
+                text_fragments,
+                request.special_token
+            )
         translated_fragments = []
         batch_size = 10
     file: UploadFile = File(...),
     source_lang_code: str = Form(...),
     target_lang_code: str = Form(...),
+    special_token: str = Form(""),
     use_ocr: bool = Form(False)
 ):
     """Process and translate document (PDF or image)"""
                 status_code=400,
                 detail="No text could be extracted from the document"
             )
+        if special_token:
+            logger.info(f"Using special language token for document: {special_token}")
+            extracted_text = f"{special_token}{extracted_text}"
         translated_text = model.translate(
             extracted_text,
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
+    uvicorn.run("api_server:app", host="0.0.0.0", port=7860, reload=True)

app/models/html_processor.py CHANGED Viewed

@@ -100,3 +100,26 @@ class HTMLProcessor:
         except Exception as e:
             logger.error(f"Error replacing text in HTML: {str(e)}")
             return ""

         except Exception as e:
             logger.error(f"Error replacing text in HTML: {str(e)}")
             return ""
+    def prepare_fragments_with_token(self, fragments: List[str], special_token: str) -> List[str]:
+        """
+        Prepare text fragments by adding special language token to each fragment.
+        Args:
+            fragments: List of text fragments
+            special_token: Special language token to add (e.g., '>>tam<<')
+        Returns:
+            List of fragments with token added
+        """
+        if not special_token:
+            return fragments
+        prepared_fragments = []
+        for fragment in fragments:
+            if fragment.strip():
+                prepared_fragments.append(f"{special_token}{fragment}")
+            else:
+                prepared_fragments.append(fragment)
+        return prepared_fragments