Frenchizer commited on
Commit
a3aabf5
·
verified ·
1 Parent(s): 7821050

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -11
app.py CHANGED
@@ -39,34 +39,31 @@ def preprocess_text(text: str):
39
  if capitalized_text != text:
40
  result["spell_suggestions"].append({
41
  "original": text,
42
- "corrected": capitalized_text,
43
- "source": "capitalization" # Track the source of the correction
44
  })
45
  text = capitalized_text # Update text for further processing
46
 
47
- # TextBlob spell check
 
 
 
48
  blob = TextBlob(text)
49
  corrected = str(blob.correct())
50
  if corrected != text:
51
- print(f"TextBlob suggestion: {text} → {corrected}") # Debugging
52
  result["spell_suggestions"].append({
53
  "original": text,
54
- "corrected": corrected,
55
- "source": "TextBlob" # Track the source of the correction
56
  })
57
-
58
  # Transformer spell check
59
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
60
  if spell_checked != text and spell_checked != corrected:
61
- print(f"Transformer suggestion: {text} → {spell_checked}") # Debugging
62
  result["spell_suggestions"].append({
63
  "original": text,
64
- "corrected": spell_checked,
65
- "source": "Transformer" # Track the source of the correction
66
  })
67
 
68
  # Add entities and tags
69
- doc = nlp(text)
70
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
71
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
72
 
 
39
  if capitalized_text != text:
40
  result["spell_suggestions"].append({
41
  "original": text,
42
+ "corrected": capitalized_text
 
43
  })
44
  text = capitalized_text # Update text for further processing
45
 
46
+ # Find and record positions of corrections
47
+ doc = nlp(text)
48
+
49
+ # TextBlob spell check with position tracking
50
  blob = TextBlob(text)
51
  corrected = str(blob.correct())
52
  if corrected != text:
 
53
  result["spell_suggestions"].append({
54
  "original": text,
55
+ "corrected": corrected
 
56
  })
57
+
58
  # Transformer spell check
59
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
60
  if spell_checked != text and spell_checked != corrected:
 
61
  result["spell_suggestions"].append({
62
  "original": text,
63
+ "corrected": spell_checked
 
64
  })
65
 
66
  # Add entities and tags
 
67
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
68
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
69