Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,6 @@ from transformers import pipeline
|
|
| 3 |
import spacy
|
| 4 |
from gradio_client import Client
|
| 5 |
import re
|
| 6 |
-
import httpx
|
| 7 |
|
| 8 |
# Initialize models
|
| 9 |
nlp = spacy.load("en_core_web_sm")
|
|
@@ -15,50 +14,52 @@ def preprocess_capitalization(text: str) -> str:
|
|
| 15 |
processed_words = []
|
| 16 |
|
| 17 |
for word in words:
|
|
|
|
| 18 |
if re.match(r"^[A-Z]+$", word):
|
| 19 |
processed_words.append(word) # Leave acronyms unchanged
|
|
|
|
| 20 |
elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
|
| 21 |
processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
|
| 22 |
else:
|
| 23 |
-
processed_words.append(word)
|
| 24 |
|
| 25 |
return " ".join(processed_words)
|
| 26 |
|
| 27 |
def preprocess_text(text: str):
|
| 28 |
"""Process text and return corrections with position information."""
|
| 29 |
result = {
|
| 30 |
-
"
|
|
|
|
|
|
|
|
|
|
| 31 |
}
|
| 32 |
|
| 33 |
-
#
|
| 34 |
capitalized_text = preprocess_capitalization(text)
|
| 35 |
if capitalized_text != text:
|
| 36 |
-
result["
|
| 37 |
"original": text,
|
| 38 |
"corrected": capitalized_text,
|
| 39 |
"type": "spell"
|
| 40 |
})
|
| 41 |
text = capitalized_text # Update text for further processing
|
| 42 |
|
| 43 |
-
#
|
| 44 |
-
spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
# If the difference is only punctuation, skip adding it here
|
| 48 |
-
pass
|
| 49 |
-
elif spell_checked != text:
|
| 50 |
-
result["suggestions"].append({
|
| 51 |
"original": text,
|
| 52 |
"corrected": spell_checked,
|
| 53 |
"type": "spell"
|
| 54 |
})
|
| 55 |
-
text = spell_checked # Update text only for true spell corrections
|
| 56 |
|
| 57 |
-
#
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
| 62 |
"type": "other"
|
| 63 |
})
|
| 64 |
|
|
@@ -70,15 +71,14 @@ def preprocess_text(text: str):
|
|
| 70 |
return text, result
|
| 71 |
|
| 72 |
def preprocess_and_forward(text: str):
|
| 73 |
-
"""Process text and forward to translation service
|
| 74 |
original_text, preprocessing_result = preprocess_text(text)
|
| 75 |
|
|
|
|
|
|
|
| 76 |
try:
|
| 77 |
-
client = Client("Frenchizer/space_21", httpx_timeout=httpx.Timeout(60.0))
|
| 78 |
translation = client.predict(original_text)
|
| 79 |
return translation, preprocessing_result
|
| 80 |
-
except httpx.ReadTimeout:
|
| 81 |
-
return "Error: Translation service timed out after 60 seconds. Please try again later.", preprocessing_result
|
| 82 |
except Exception as e:
|
| 83 |
return f"Error: {str(e)}", preprocessing_result
|
| 84 |
|
|
|
|
| 3 |
import spacy
|
| 4 |
from gradio_client import Client
|
| 5 |
import re
|
|
|
|
| 6 |
|
| 7 |
# Initialize models
|
| 8 |
nlp = spacy.load("en_core_web_sm")
|
|
|
|
| 14 |
processed_words = []
|
| 15 |
|
| 16 |
for word in words:
|
| 17 |
+
# Check if the word is an acronym (all uppercase letters)
|
| 18 |
if re.match(r"^[A-Z]+$", word):
|
| 19 |
processed_words.append(word) # Leave acronyms unchanged
|
| 20 |
+
# Check if the word has mixed capitalization (e.g., "HEllo")
|
| 21 |
elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
|
| 22 |
processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
|
| 23 |
else:
|
| 24 |
+
processed_words.append(word) # Leave other words unchanged
|
| 25 |
|
| 26 |
return " ".join(processed_words)
|
| 27 |
|
| 28 |
def preprocess_text(text: str):
|
| 29 |
"""Process text and return corrections with position information."""
|
| 30 |
result = {
|
| 31 |
+
"spell_suggestions": [],
|
| 32 |
+
"other_suggestions": [],
|
| 33 |
+
"entities": [],
|
| 34 |
+
"tags": []
|
| 35 |
}
|
| 36 |
|
| 37 |
+
# Apply capitalization preprocessing
|
| 38 |
capitalized_text = preprocess_capitalization(text)
|
| 39 |
if capitalized_text != text:
|
| 40 |
+
result["spell_suggestions"].append({
|
| 41 |
"original": text,
|
| 42 |
"corrected": capitalized_text,
|
| 43 |
"type": "spell"
|
| 44 |
})
|
| 45 |
text = capitalized_text # Update text for further processing
|
| 46 |
|
| 47 |
+
# Transformer spell check
|
| 48 |
+
spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
|
| 49 |
+
if spell_checked != text:
|
| 50 |
+
result["spell_suggestions"].append({
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
"original": text,
|
| 52 |
"corrected": spell_checked,
|
| 53 |
"type": "spell"
|
| 54 |
})
|
|
|
|
| 55 |
|
| 56 |
+
# Add example other suggestions (you would replace this with your actual logic)
|
| 57 |
+
for word in text.split():
|
| 58 |
+
if word.endswith('.') or word.endswith('?') or word.endswith('!'):
|
| 59 |
+
continue
|
| 60 |
+
result["other_suggestions"].append({
|
| 61 |
+
"original": word,
|
| 62 |
+
"corrected": word + "!",
|
| 63 |
"type": "other"
|
| 64 |
})
|
| 65 |
|
|
|
|
| 71 |
return text, result
|
| 72 |
|
| 73 |
def preprocess_and_forward(text: str):
|
| 74 |
+
"""Process text and forward to translation service."""
|
| 75 |
original_text, preprocessing_result = preprocess_text(text)
|
| 76 |
|
| 77 |
+
# Forward original text to translation service
|
| 78 |
+
client = Client("Frenchizer/space_21")
|
| 79 |
try:
|
|
|
|
| 80 |
translation = client.predict(original_text)
|
| 81 |
return translation, preprocessing_result
|
|
|
|
|
|
|
| 82 |
except Exception as e:
|
| 83 |
return f"Error: {str(e)}", preprocessing_result
|
| 84 |
|