Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,7 +27,7 @@ def preprocess_text(text: str, is_spell_corrected: bool = False):
|
|
| 27 |
"""Process text and return corrections with position information."""
|
| 28 |
result = {
|
| 29 |
"spell_suggestions": [],
|
| 30 |
-
"other_suggestions": [], #
|
| 31 |
"entities": [],
|
| 32 |
"tags": []
|
| 33 |
}
|
|
@@ -51,17 +51,29 @@ def preprocess_text(text: str, is_spell_corrected: bool = False):
|
|
| 51 |
})
|
| 52 |
text = spell_checked # Update text after spell correction
|
| 53 |
|
| 54 |
-
# Add
|
| 55 |
-
|
| 56 |
-
for
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# Add entities and tags
|
| 64 |
-
doc = nlp(text)
|
| 65 |
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
| 66 |
result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
|
| 67 |
|
|
|
|
| 27 |
"""Process text and return corrections with position information."""
|
| 28 |
result = {
|
| 29 |
"spell_suggestions": [],
|
| 30 |
+
"other_suggestions": [], # For NLP-based style/grammar suggestions
|
| 31 |
"entities": [],
|
| 32 |
"tags": []
|
| 33 |
}
|
|
|
|
| 51 |
})
|
| 52 |
text = spell_checked # Update text after spell correction
|
| 53 |
|
| 54 |
+
# Add NLP-based "other" suggestions using spaCy
|
| 55 |
+
doc = nlp(text)
|
| 56 |
+
for token in doc:
|
| 57 |
+
# Example: Suggest adding an article before a noun if missing
|
| 58 |
+
if token.pos_ == "NOUN" and token.dep_ != "compound" and token.i > 0:
|
| 59 |
+
prev_token = doc[token.i - 1]
|
| 60 |
+
if prev_token.pos_ not in ("DET", "PRON") and not prev_token.text.endswith("'s"):
|
| 61 |
+
suggested = f"{text[:token.idx]}the {text[token.idx:]}"
|
| 62 |
+
result["other_suggestions"].append({
|
| 63 |
+
"original": text,
|
| 64 |
+
"corrected": suggested
|
| 65 |
+
})
|
| 66 |
+
# Example: Suggest "is" for subject-verb agreement (rudimentary)
|
| 67 |
+
elif token.pos_ == "NOUN" and token.dep_ == "nsubj" and token.i + 1 < len(doc):
|
| 68 |
+
next_token = doc[token.i + 1]
|
| 69 |
+
if next_token.pos_ != "VERB":
|
| 70 |
+
suggested = f"{text[:next_token.idx]}is {text[next_token.idx:]}"
|
| 71 |
+
result["other_suggestions"].append({
|
| 72 |
+
"original": text,
|
| 73 |
+
"corrected": suggested
|
| 74 |
+
})
|
| 75 |
|
| 76 |
# Add entities and tags
|
|
|
|
| 77 |
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
| 78 |
result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
|
| 79 |
|