Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -39,34 +39,31 @@ def preprocess_text(text: str):
|
|
| 39 |
if capitalized_text != text:
|
| 40 |
result["spell_suggestions"].append({
|
| 41 |
"original": text,
|
| 42 |
-
"corrected": capitalized_text
|
| 43 |
-
"source": "capitalization" # Track the source of the correction
|
| 44 |
})
|
| 45 |
text = capitalized_text # Update text for further processing
|
| 46 |
|
| 47 |
-
#
|
|
|
|
|
|
|
|
|
|
| 48 |
blob = TextBlob(text)
|
| 49 |
corrected = str(blob.correct())
|
| 50 |
if corrected != text:
|
| 51 |
-
print(f"TextBlob suggestion: {text} → {corrected}") # Debugging
|
| 52 |
result["spell_suggestions"].append({
|
| 53 |
"original": text,
|
| 54 |
-
"corrected": corrected
|
| 55 |
-
"source": "TextBlob" # Track the source of the correction
|
| 56 |
})
|
| 57 |
-
|
| 58 |
# Transformer spell check
|
| 59 |
spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
|
| 60 |
if spell_checked != text and spell_checked != corrected:
|
| 61 |
-
print(f"Transformer suggestion: {text} → {spell_checked}") # Debugging
|
| 62 |
result["spell_suggestions"].append({
|
| 63 |
"original": text,
|
| 64 |
-
"corrected": spell_checked
|
| 65 |
-
"source": "Transformer" # Track the source of the correction
|
| 66 |
})
|
| 67 |
|
| 68 |
# Add entities and tags
|
| 69 |
-
doc = nlp(text)
|
| 70 |
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
| 71 |
result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
|
| 72 |
|
|
|
|
| 39 |
if capitalized_text != text:
|
| 40 |
result["spell_suggestions"].append({
|
| 41 |
"original": text,
|
| 42 |
+
"corrected": capitalized_text
|
|
|
|
| 43 |
})
|
| 44 |
text = capitalized_text # Update text for further processing
|
| 45 |
|
| 46 |
+
# Find and record positions of corrections
|
| 47 |
+
doc = nlp(text)
|
| 48 |
+
|
| 49 |
+
# TextBlob spell check with position tracking
|
| 50 |
blob = TextBlob(text)
|
| 51 |
corrected = str(blob.correct())
|
| 52 |
if corrected != text:
|
|
|
|
| 53 |
result["spell_suggestions"].append({
|
| 54 |
"original": text,
|
| 55 |
+
"corrected": corrected
|
|
|
|
| 56 |
})
|
| 57 |
+
|
| 58 |
# Transformer spell check
|
| 59 |
spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
|
| 60 |
if spell_checked != text and spell_checked != corrected:
|
|
|
|
| 61 |
result["spell_suggestions"].append({
|
| 62 |
"original": text,
|
| 63 |
+
"corrected": spell_checked
|
|
|
|
| 64 |
})
|
| 65 |
|
| 66 |
# Add entities and tags
|
|
|
|
| 67 |
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
| 68 |
result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
|
| 69 |
|