Frenchizer commited on
Commit
330dfff
·
verified ·
1 Parent(s): 575f1d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -34
app.py CHANGED
@@ -27,53 +27,31 @@ def preprocess_text(text: str, is_spell_corrected: bool = False):
27
  """Process text and return corrections with position information."""
28
  result = {
29
  "spell_suggestions": [],
30
- "other_suggestions": [], # For NLP-based style/grammar suggestions
31
  "entities": [],
32
  "tags": []
33
  }
34
 
35
- # Apply capitalization preprocessing
36
  capitalized_text = preprocess_capitalization(text)
37
- if capitalized_text != text:
38
  result["spell_suggestions"].append({
39
  "original": text,
40
  "corrected": capitalized_text
41
  })
42
  text = capitalized_text # Update text for further processing
43
 
44
- # Transformer spell check, skipped if already spell-corrected
45
- if not is_spell_corrected:
46
- spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
47
- if spell_checked != text:
48
- result["spell_suggestions"].append({
49
- "original": text,
50
- "corrected": spell_checked
51
- })
52
- text = spell_checked # Update text after spell correction
53
-
54
- # Add NLP-based "other" suggestions using spaCy
55
- doc = nlp(text)
56
- for token in doc:
57
- # Example: Suggest adding an article before a noun if missing
58
- if token.pos_ == "NOUN" and token.dep_ != "compound" and token.i > 0:
59
- prev_token = doc[token.i - 1]
60
- if prev_token.pos_ not in ("DET", "PRON") and not prev_token.text.endswith("'s"):
61
- suggested = f"{text[:token.idx]}the {text[token.idx:]}"
62
- result["other_suggestions"].append({
63
- "original": text,
64
- "corrected": suggested
65
- })
66
- # Example: Suggest "is" for subject-verb agreement (rudimentary)
67
- elif token.pos_ == "NOUN" and token.dep_ == "nsubj" and token.i + 1 < len(doc):
68
- next_token = doc[token.i + 1]
69
- if next_token.pos_ != "VERB":
70
- suggested = f"{text[:next_token.idx]}is {text[next_token.idx:]}"
71
- result["other_suggestions"].append({
72
- "original": text,
73
- "corrected": suggested
74
- })
75
 
76
  # Add entities and tags
 
77
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
78
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
79
 
 
27
  """Process text and return corrections with position information."""
28
  result = {
29
  "spell_suggestions": [],
30
+ "other_suggestions": [], # For spell_checker suggestions
31
  "entities": [],
32
  "tags": []
33
  }
34
 
35
+ # Apply capitalization preprocessing (spell suggestions)
36
  capitalized_text = preprocess_capitalization(text)
37
+ if capitalized_text != text and not is_spell_corrected:
38
  result["spell_suggestions"].append({
39
  "original": text,
40
  "corrected": capitalized_text
41
  })
42
  text = capitalized_text # Update text for further processing
43
 
44
+ # Transformer spell check (other suggestions)
45
+ spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
46
+ if spell_checked != text:
47
+ result["other_suggestions"].append({
48
+ "original": text,
49
+ "corrected": spell_checked
50
+ })
51
+ text = spell_checked # Update text after spell correction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Add entities and tags
54
+ doc = nlp(text)
55
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
56
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
57