angre369 commited on
Commit
79ea48e
·
1 Parent(s): bf0364c

fix: handle long text translation by chunking

Browse files
Files changed (1) hide show
  1. app.py +29 -2
app.py CHANGED
@@ -57,8 +57,35 @@ async def translate_text(request: TranslationRequest):
57
  if not state["model_loaded"]:
58
  return JSONResponse(content={"message": "Model is not loaded yet"}, status_code=503)
59
 
60
- result = state["translator"](request.text)
61
- return {"translated_text": result[0]["translation_text"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  @app.get("/health")
64
  async def health_check():
 
57
  if not state["model_loaded"]:
58
  return JSONResponse(content={"message": "Model is not loaded yet"}, status_code=503)
59
 
60
+ # Split the text into chunks
61
+ text_chunks = split_text(request.text)
62
+
63
+ # Translate each chunk
64
+ translated_chunks = []
65
+ for chunk in text_chunks:
66
+ # The translator returns a list of dictionaries
67
+ translated_chunk = state["translator"](chunk, max_length=512)
68
+ translated_chunks.append(translated_chunk[0]['translation_text'])
69
+
70
+ # Join the translated chunks
71
+ translated_text = "".join(translated_chunks)
72
+
73
+ return {"translated_text": translated_text}
74
+
75
+ def split_text(text: str, max_length: int = 512):
76
+ # A simple way to split text by chunks of max_length
77
+ # A more sophisticated approach could split by sentences.
78
+ text_chunks = []
79
+ while len(text) > max_length:
80
+ # Find the last space to avoid splitting words
81
+ split_at = text.rfind(' ', 0, max_length)
82
+ if split_at == -1:
83
+ # No space found, split at max_length
84
+ split_at = max_length
85
+ text_chunks.append(text[:split_at])
86
+ text = text[split_at:].lstrip()
87
+ text_chunks.append(text)
88
+ return text_chunks
89
 
90
  @app.get("/health")
91
  async def health_check():