Sagar32 commited on
Commit
a1741e1
·
verified ·
1 Parent(s): 78fbd27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -4,14 +4,12 @@ from collections import defaultdict
4
 
5
  import torch
6
  import gradio as gr
 
7
 
8
  from gensim.models import FastText as FT
9
  from transformers import AutoTokenizer, MT5ForConditionalGeneration
10
  from huggingface_hub import snapshot_download
11
 
12
- from nltk.corpus import words
13
- import nltk
14
- nltk.download('words', quiet=True)
15
 
16
  # =========================
17
  # 2) Auth (optional if repo is public)
@@ -55,11 +53,12 @@ for entry in phonetic_data.values():
55
  variant_to_base[v.lower()] = base
56
 
57
 
58
- ENGLISH_WORDS = set()
59
  try:
60
- ENGLISH_WORDS = set(w.lower() for w in words.words())
61
  except:
62
- pass
 
 
63
  # --- Normalization hyperparams
64
  TOP_K = 8
65
  SIM_THRESHOLD = 0.65
@@ -79,7 +78,9 @@ def preprocess_sentence(sentence: str) -> str:
79
  return " ".join(out)
80
 
81
  def is_english_word(word: str) -> bool:
82
- return word.lower() in ENGLISH_WORDS
 
 
83
 
84
  def normalize_word(word: str) -> str:
85
  if is_english_word(word):
 
4
 
5
  import torch
6
  import gradio as gr
7
+ import enchant
8
 
9
  from gensim.models import FastText as FT
10
  from transformers import AutoTokenizer, MT5ForConditionalGeneration
11
  from huggingface_hub import snapshot_download
12
 
 
 
 
13
 
14
  # =========================
15
  # 2) Auth (optional if repo is public)
 
53
  variant_to_base[v.lower()] = base
54
 
55
 
 
56
  try:
57
+ english_dict = enchant.Dict("en_US")
58
  except:
59
+ english_dict = None
60
+
61
+
62
  # --- Normalization hyperparams
63
  TOP_K = 8
64
  SIM_THRESHOLD = 0.65
 
78
  return " ".join(out)
79
 
80
  def is_english_word(word: str) -> bool:
81
+ if english_dict is None:
82
+ return False
83
+ return english_dict.check(word)
84
 
85
  def normalize_word(word: str) -> str:
86
  if is_english_word(word):