space_22

Sleeping

App Files Files Community

Frenchizer commited on Feb 25, 2025

Commit

330dfff

verified ·

1 Parent(s): 575f1d9

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -34

app.py CHANGED Viewed

@@ -27,53 +27,31 @@ def preprocess_text(text: str, is_spell_corrected: bool = False):
     """Process text and return corrections with position information."""
     result = {
         "spell_suggestions": [],
-        "other_suggestions": [],  # For NLP-based style/grammar suggestions
         "entities": [],
         "tags": []
     }
-    # Apply capitalization preprocessing
     capitalized_text = preprocess_capitalization(text)
-    if capitalized_text != text:
         result["spell_suggestions"].append({
             "original": text,
             "corrected": capitalized_text
         })
         text = capitalized_text  # Update text for further processing
-    # Transformer spell check, skipped if already spell-corrected
-    if not is_spell_corrected:
-        spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
-        if spell_checked != text:
-            result["spell_suggestions"].append({
-                "original": text,
-                "corrected": spell_checked
-            })
-            text = spell_checked  # Update text after spell correction
-    # Add NLP-based "other" suggestions using spaCy
-    doc = nlp(text)
-    for token in doc:
-        # Example: Suggest adding an article before a noun if missing
-        if token.pos_ == "NOUN" and token.dep_ != "compound" and token.i > 0:
-            prev_token = doc[token.i - 1]
-            if prev_token.pos_ not in ("DET", "PRON") and not prev_token.text.endswith("'s"):
-                suggested = f"{text[:token.idx]}the {text[token.idx:]}"
-                result["other_suggestions"].append({
-                    "original": text,
-                    "corrected": suggested
-                })
-        # Example: Suggest "is" for subject-verb agreement (rudimentary)
-        elif token.pos_ == "NOUN" and token.dep_ == "nsubj" and token.i + 1 < len(doc):
-            next_token = doc[token.i + 1]
-            if next_token.pos_ != "VERB":
-                suggested = f"{text[:next_token.idx]}is {text[next_token.idx:]}"
-                result["other_suggestions"].append({
-                    "original": text,
-                    "corrected": suggested
-                })
     # Add entities and tags
     result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
     result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]

     """Process text and return corrections with position information."""
     result = {
         "spell_suggestions": [],
+        "other_suggestions": [],  # For spell_checker suggestions
         "entities": [],
         "tags": []
     }
+    # Apply capitalization preprocessing (spell suggestions)
     capitalized_text = preprocess_capitalization(text)
+    if capitalized_text != text and not is_spell_corrected:
         result["spell_suggestions"].append({
             "original": text,
             "corrected": capitalized_text
         })
         text = capitalized_text  # Update text for further processing
+    # Transformer spell check (other suggestions)
+    spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
+    if spell_checked != text:
+        result["other_suggestions"].append({
+            "original": text,
+            "corrected": spell_checked
+        })
+        text = spell_checked  # Update text after spell correction
     # Add entities and tags
+    doc = nlp(text)
     result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
     result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]