Frenchizer commited on
Commit
575f1d9
·
verified ·
1 Parent(s): 708dbc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -10
app.py CHANGED
@@ -27,7 +27,7 @@ def preprocess_text(text: str, is_spell_corrected: bool = False):
27
  """Process text and return corrections with position information."""
28
  result = {
29
  "spell_suggestions": [],
30
- "other_suggestions": [], # Added to distinguish style suggestions
31
  "entities": [],
32
  "tags": []
33
  }
@@ -51,17 +51,29 @@ def preprocess_text(text: str, is_spell_corrected: bool = False):
51
  })
52
  text = spell_checked # Update text after spell correction
53
 
54
- # Add style suggestions (other_suggestions)
55
- words = text.split()
56
- for word in words:
57
- if not word.endswith(('.', '?', '!')): # Skip if already punctuated
58
- result["other_suggestions"].append({
59
- "original": word,
60
- "corrected": word + "!"
61
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # Add entities and tags
64
- doc = nlp(text)
65
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
66
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
67
 
 
27
  """Process text and return corrections with position information."""
28
  result = {
29
  "spell_suggestions": [],
30
+ "other_suggestions": [], # For NLP-based style/grammar suggestions
31
  "entities": [],
32
  "tags": []
33
  }
 
51
  })
52
  text = spell_checked # Update text after spell correction
53
 
54
+ # Add NLP-based "other" suggestions using spaCy
55
+ doc = nlp(text)
56
+ for token in doc:
57
+ # Example: Suggest adding an article before a noun if missing
58
+ if token.pos_ == "NOUN" and token.dep_ != "compound" and token.i > 0:
59
+ prev_token = doc[token.i - 1]
60
+ if prev_token.pos_ not in ("DET", "PRON") and not prev_token.text.endswith("'s"):
61
+ suggested = f"{text[:token.idx]}the {text[token.idx:]}"
62
+ result["other_suggestions"].append({
63
+ "original": text,
64
+ "corrected": suggested
65
+ })
66
+ # Example: Suggest "is" for subject-verb agreement (rudimentary)
67
+ elif token.pos_ == "NOUN" and token.dep_ == "nsubj" and token.i + 1 < len(doc):
68
+ next_token = doc[token.i + 1]
69
+ if next_token.pos_ != "VERB":
70
+ suggested = f"{text[:next_token.idx]}is {text[next_token.idx:]}"
71
+ result["other_suggestions"].append({
72
+ "original": text,
73
+ "corrected": suggested
74
+ })
75
 
76
  # Add entities and tags
 
77
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
78
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
79