Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -26,57 +26,61 @@ def preprocess_capitalization(text: str) -> str:
|
|
| 26 |
|
| 27 |
return " ".join(processed_words)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
def preprocess_text(text: str):
|
| 30 |
"""Process text and return corrections with position information."""
|
| 31 |
result = {
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
-
"
|
| 35 |
}
|
| 36 |
|
| 37 |
# Apply capitalization preprocessing
|
| 38 |
capitalized_text = preprocess_capitalization(text)
|
| 39 |
if capitalized_text != text:
|
| 40 |
-
result["
|
| 41 |
-
|
| 42 |
-
"corrected": capitalized_text
|
| 43 |
-
})
|
| 44 |
text = capitalized_text # Update text for further processing
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
doc = nlp(text)
|
| 48 |
-
|
| 49 |
-
# TextBlob spell check with position tracking
|
| 50 |
blob = TextBlob(text)
|
| 51 |
corrected = str(blob.correct())
|
| 52 |
if corrected != text:
|
| 53 |
-
result["
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
# Transformer spell check
|
| 59 |
spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
|
| 60 |
if spell_checked != text and spell_checked != corrected:
|
| 61 |
-
result["
|
| 62 |
-
|
| 63 |
-
"corrected": spell_checked
|
| 64 |
-
})
|
| 65 |
|
| 66 |
-
# Add entities
|
|
|
|
| 67 |
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
| 68 |
-
result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
|
| 69 |
|
| 70 |
-
return
|
| 71 |
|
| 72 |
def preprocess_and_forward(text: str):
|
| 73 |
"""Process text and forward to translation service."""
|
| 74 |
-
|
| 75 |
|
| 76 |
-
# Forward
|
| 77 |
client = Client("Frenchizer/space_17")
|
| 78 |
try:
|
| 79 |
-
translation = client.predict(
|
| 80 |
return translation, preprocessing_result
|
| 81 |
except Exception as e:
|
| 82 |
return f"Error: {str(e)}", preprocessing_result
|
|
|
|
| 26 |
|
| 27 |
return " ".join(processed_words)
|
| 28 |
|
| 29 |
+
def find_differences(original: str, corrected: str):
|
| 30 |
+
"""Find differences between original and corrected text."""
|
| 31 |
+
differences = []
|
| 32 |
+
for i, (orig_char, corr_char) in enumerate(zip(original, corrected)):
|
| 33 |
+
if orig_char != corr_char:
|
| 34 |
+
differences.append({
|
| 35 |
+
"position": i,
|
| 36 |
+
"original": orig_char,
|
| 37 |
+
"corrected": corr_char
|
| 38 |
+
})
|
| 39 |
+
return differences
|
| 40 |
+
|
| 41 |
def preprocess_text(text: str):
|
| 42 |
"""Process text and return corrections with position information."""
|
| 43 |
result = {
|
| 44 |
+
"corrected_text": "",
|
| 45 |
+
"differences": [],
|
| 46 |
+
"entities": []
|
| 47 |
}
|
| 48 |
|
| 49 |
# Apply capitalization preprocessing
|
| 50 |
capitalized_text = preprocess_capitalization(text)
|
| 51 |
if capitalized_text != text:
|
| 52 |
+
result["corrected_text"] = capitalized_text
|
| 53 |
+
result["differences"] = find_differences(text, capitalized_text)
|
|
|
|
|
|
|
| 54 |
text = capitalized_text # Update text for further processing
|
| 55 |
|
| 56 |
+
# TextBlob spell check
|
|
|
|
|
|
|
|
|
|
| 57 |
blob = TextBlob(text)
|
| 58 |
corrected = str(blob.correct())
|
| 59 |
if corrected != text:
|
| 60 |
+
result["corrected_text"] = corrected
|
| 61 |
+
result["differences"] = find_differences(text, corrected)
|
| 62 |
+
text = corrected # Update text for further processing
|
| 63 |
+
|
|
|
|
| 64 |
# Transformer spell check
|
| 65 |
spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
|
| 66 |
if spell_checked != text and spell_checked != corrected:
|
| 67 |
+
result["corrected_text"] = spell_checked
|
| 68 |
+
result["differences"] = find_differences(text, spell_checked)
|
|
|
|
|
|
|
| 69 |
|
| 70 |
+
# Add entities
|
| 71 |
+
doc = nlp(text)
|
| 72 |
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
|
|
|
| 73 |
|
| 74 |
+
return result
|
| 75 |
|
| 76 |
def preprocess_and_forward(text: str):
|
| 77 |
"""Process text and forward to translation service."""
|
| 78 |
+
preprocessing_result = preprocess_text(text)
|
| 79 |
|
| 80 |
+
# Forward corrected text to translation service
|
| 81 |
client = Client("Frenchizer/space_17")
|
| 82 |
try:
|
| 83 |
+
translation = client.predict(preprocessing_result["corrected_text"])
|
| 84 |
return translation, preprocessing_result
|
| 85 |
except Exception as e:
|
| 86 |
return f"Error: {str(e)}", preprocessing_result
|