SCBconsulting commited on
Commit
8d0f5c4
·
verified ·
1 Parent(s): 58d7808

Update utils/translator.py

Browse files
Files changed (1) hide show
  1. utils/translator.py +5 -3
utils/translator.py CHANGED
@@ -17,7 +17,7 @@ except OSError:
17
  spacy.cli.download("en_core_web_sm")
18
  nlp = spacy.load("en_core_web_sm")
19
 
20
- def split_into_chunks(text, max_chunk_len=500):
21
  """
22
  Split input into sentence-aware chunks using spaCy.
23
  """
@@ -39,14 +39,16 @@ def split_into_chunks(text, max_chunk_len=500):
39
 
40
  def translate_text(text):
41
  """
42
- Translate contract text from English to Brazilian Portuguese using Hugging Face model.
43
- Sentence-level chunking and timing included.
44
  """
45
  import time
46
 
47
  if not text.strip():
48
  return "No input to translate."
49
 
 
 
50
  chunks = split_into_chunks(text)
51
  translated_chunks = []
52
 
 
17
  spacy.cli.download("en_core_web_sm")
18
  nlp = spacy.load("en_core_web_sm")
19
 
20
+ def split_into_chunks(text, max_chunk_len=300):
21
  """
22
  Split input into sentence-aware chunks using spaCy.
23
  """
 
39
 
40
  def translate_text(text):
41
  """
42
+ Translate contract from English to Brazilian Portuguese using Hugging Face model.
43
+ Includes chunking, cleanup, and progress timing.
44
  """
45
  import time
46
 
47
  if not text.strip():
48
  return "No input to translate."
49
 
50
+ # Clean input
51
+ text = text.replace("\n", " ").replace(" ", " ").strip()
52
  chunks = split_into_chunks(text)
53
  translated_chunks = []
54