ajayinsac commited on
Commit
183842b
·
verified ·
1 Parent(s): 51abd9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -15
app.py CHANGED
@@ -10,14 +10,13 @@ import os
10
  import re
11
  import string
12
  from collections import OrderedDict
13
-
14
  import gradio as gr
15
 
16
- # Detect if running on Hugging Face Spaces (don't use share=True there)
17
  IN_SPACES = bool(os.getenv("SPACE_ID") or os.getenv("HF_SPACE_ID"))
18
 
19
- # ---- Optional NLTK pieces (no downloads at startup) ----
20
- # Use real stopwords/lemmatizer if available; otherwise fall back.
21
  try:
22
  import nltk # noqa: F401
23
  from nltk.corpus import stopwords as nltk_stopwords
@@ -29,18 +28,27 @@ except Exception:
29
  "there","these","they","this","to","was","will","with","were","from","your"
30
  }
31
 
 
 
 
 
32
  try:
 
33
  from nltk.stem import WordNetLemmatizer
34
- _lemmatizer = WordNetLemmatizer()
35
- _use_porter = False
36
- except Exception:
37
  try:
 
 
 
 
38
  from nltk.stem import PorterStemmer
39
  _stemmer = PorterStemmer()
40
  _use_porter = True
41
- except Exception:
42
- _lemmatizer = None
43
- _use_porter = None
 
 
44
 
45
 
46
  # ---- Pipeline helpers ----
@@ -68,11 +76,25 @@ def remove_stopwords(tokens):
68
  return [w for w in tokens if w not in _STOPWORDS]
69
 
70
  def lemmatize_list(tokens):
71
- if _use_porter is True:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  return [_stemmer.stem(w) for w in tokens]
73
- elif _use_porter is False:
74
- return [_lemmatizer.lemmatize(w) for w in tokens]
75
  else:
 
76
  return tokens
77
 
78
 
@@ -126,10 +148,16 @@ iface = gr.Interface(
126
  inputs=gr.Textbox(lines=4, label="Enter text to normalize"),
127
  outputs=gr.HTML(label="Step-by-step normalization"),
128
  examples=[[ex] for ex in examples],
 
 
129
  title="Text Normalization Pipeline",
130
  description="Enter text or select an example to see each step of the normalization process."
131
  )
132
 
133
  if __name__ == "__main__":
134
- # share=True only when running locally (avoids Spaces warning)
135
- iface.launch(server_name="0.0.0.0", server_port=7860, share=(not IN_SPACES))
 
 
 
 
 
10
  import re
11
  import string
12
  from collections import OrderedDict
 
13
  import gradio as gr
14
 
15
+ # Detect if running on Hugging Face Spaces (avoid share=True there)
16
  IN_SPACES = bool(os.getenv("SPACE_ID") or os.getenv("HF_SPACE_ID"))
17
 
18
+ # ---- Optional NLTK pieces (NO downloads at startup) ----
19
+ # Use real stopwords if available; otherwise fall back to a small set.
20
  try:
21
  import nltk # noqa: F401
22
  from nltk.corpus import stopwords as nltk_stopwords
 
28
  "there","these","they","this","to","was","will","with","were","from","your"
29
  }
30
 
31
+ # Decide lemmatizer vs stemmer based on whether the *corpus* exists
32
+ _use_porter = True
33
+ _lemmatizer = None
34
+ _stemmer = None
35
  try:
36
+ import nltk
37
  from nltk.stem import WordNetLemmatizer
38
+ # Only use WordNetLemmatizer if the *wordnet* corpus is present
 
 
39
  try:
40
+ nltk.data.find("corpora/wordnet")
41
+ _lemmatizer = WordNetLemmatizer()
42
+ _use_porter = False
43
+ except LookupError:
44
  from nltk.stem import PorterStemmer
45
  _stemmer = PorterStemmer()
46
  _use_porter = True
47
+ except Exception:
48
+ # If NLTK isn't fully available, fall back to identity later
49
+ _lemmatizer = None
50
+ _stemmer = None
51
+ _use_porter = None
52
 
53
 
54
  # ---- Pipeline helpers ----
 
76
  return [w for w in tokens if w not in _STOPWORDS]
77
 
78
  def lemmatize_list(tokens):
79
+ """Lemmatize if wordnet is present; otherwise stem; otherwise identity.
80
+ Also guards against runtime LookupError during example caching."""
81
+ global _use_porter, _lemmatizer, _stemmer
82
+ if _use_porter is False and _lemmatizer is not None:
83
+ try:
84
+ return [_lemmatizer.lemmatize(w) for w in tokens]
85
+ except LookupError:
86
+ # WordNet corpus not actually present; switch to Porter
87
+ try:
88
+ from nltk.stem import PorterStemmer
89
+ _stemmer = PorterStemmer()
90
+ _use_porter = True
91
+ return [_stemmer.stem(w) for w in tokens]
92
+ except Exception:
93
+ return tokens
94
+ elif _use_porter is True and _stemmer is not None:
95
  return [_stemmer.stem(w) for w in tokens]
 
 
96
  else:
97
+ # Last resort: return as-is
98
  return tokens
99
 
100
 
 
148
  inputs=gr.Textbox(lines=4, label="Enter text to normalize"),
149
  outputs=gr.HTML(label="Step-by-step normalization"),
150
  examples=[[ex] for ex in examples],
151
+ cache_examples=False, # <-- avoid startup caching (which runs the fn at launch)
152
+ allow_flagging="never",
153
  title="Text Normalization Pipeline",
154
  description="Enter text or select an example to see each step of the normalization process."
155
  )
156
 
157
  if __name__ == "__main__":
158
+ iface.launch(
159
+ server_name="0.0.0.0",
160
+ server_port=7860,
161
+ ssr_mode=False, # <-- disable SSR (prevents blank/fragile startup)
162
+ share=(not IN_SPACES), # <-- no share warning on Spaces; public link when local
163
+ )