Spaces:
Sleeping
Sleeping
Update utils/translator.py
Browse files- utils/translator.py +5 -3
utils/translator.py
CHANGED
|
@@ -17,7 +17,7 @@ except OSError:
|
|
| 17 |
spacy.cli.download("en_core_web_sm")
|
| 18 |
nlp = spacy.load("en_core_web_sm")
|
| 19 |
|
| 20 |
-
def split_into_chunks(text, max_chunk_len=
|
| 21 |
"""
|
| 22 |
Split input into sentence-aware chunks using spaCy.
|
| 23 |
"""
|
|
@@ -39,14 +39,16 @@ def split_into_chunks(text, max_chunk_len=500):
|
|
| 39 |
|
| 40 |
def translate_text(text):
|
| 41 |
"""
|
| 42 |
-
Translate contract
|
| 43 |
-
|
| 44 |
"""
|
| 45 |
import time
|
| 46 |
|
| 47 |
if not text.strip():
|
| 48 |
return "No input to translate."
|
| 49 |
|
|
|
|
|
|
|
| 50 |
chunks = split_into_chunks(text)
|
| 51 |
translated_chunks = []
|
| 52 |
|
|
|
|
| 17 |
spacy.cli.download("en_core_web_sm")
|
| 18 |
nlp = spacy.load("en_core_web_sm")
|
| 19 |
|
| 20 |
+
def split_into_chunks(text, max_chunk_len=300):
|
| 21 |
"""
|
| 22 |
Split input into sentence-aware chunks using spaCy.
|
| 23 |
"""
|
|
|
|
| 39 |
|
| 40 |
def translate_text(text):
|
| 41 |
"""
|
| 42 |
+
Translate contract from English to Brazilian Portuguese using Hugging Face model.
|
| 43 |
+
Includes chunking, cleanup, and progress timing.
|
| 44 |
"""
|
| 45 |
import time
|
| 46 |
|
| 47 |
if not text.strip():
|
| 48 |
return "No input to translate."
|
| 49 |
|
| 50 |
+
# Clean input
|
| 51 |
+
text = text.replace("\n", " ").replace(" ", " ").strip()
|
| 52 |
chunks = split_into_chunks(text)
|
| 53 |
translated_chunks = []
|
| 54 |
|