Spaces:

ajayinsac
/

Nlp_example

Sleeping

App Files Files Community

ajayinsac commited on Aug 25, 2025

Commit

183842b

verified ·

1 Parent(s): 51abd9e

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -15

app.py CHANGED Viewed

@@ -10,14 +10,13 @@ import os
 import re
 import string
 from collections import OrderedDict
 import gradio as gr
-# Detect if running on Hugging Face Spaces (don't use share=True there)
 IN_SPACES = bool(os.getenv("SPACE_ID") or os.getenv("HF_SPACE_ID"))
-# ---- Optional NLTK pieces (no downloads at startup) ----
-# Use real stopwords/lemmatizer if available; otherwise fall back.
 try:
     import nltk  # noqa: F401
     from nltk.corpus import stopwords as nltk_stopwords
@@ -29,18 +28,27 @@ except Exception:
         "there","these","they","this","to","was","will","with","were","from","your"
     }
 try:
     from nltk.stem import WordNetLemmatizer
-    _lemmatizer = WordNetLemmatizer()
-    _use_porter = False
-except Exception:
     try:
         from nltk.stem import PorterStemmer
         _stemmer = PorterStemmer()
         _use_porter = True
-    except Exception:
-        _lemmatizer = None
-        _use_porter = None
 # ---- Pipeline helpers ----
@@ -68,11 +76,25 @@ def remove_stopwords(tokens):
     return [w for w in tokens if w not in _STOPWORDS]
 def lemmatize_list(tokens):
-    if _use_porter is True:
         return [_stemmer.stem(w) for w in tokens]
-    elif _use_porter is False:
-        return [_lemmatizer.lemmatize(w) for w in tokens]
     else:
         return tokens
@@ -126,10 +148,16 @@ iface = gr.Interface(
     inputs=gr.Textbox(lines=4, label="Enter text to normalize"),
     outputs=gr.HTML(label="Step-by-step normalization"),
     examples=[[ex] for ex in examples],
     title="Text Normalization Pipeline",
     description="Enter text or select an example to see each step of the normalization process."
 )
 if __name__ == "__main__":
-    # share=True only when running locally (avoids Spaces warning)
-    iface.launch(server_name="0.0.0.0", server_port=7860, share=(not IN_SPACES))

 import re
 import string
 from collections import OrderedDict
 import gradio as gr
+# Detect if running on Hugging Face Spaces (avoid share=True there)
 IN_SPACES = bool(os.getenv("SPACE_ID") or os.getenv("HF_SPACE_ID"))
+# ---- Optional NLTK pieces (NO downloads at startup) ----
+# Use real stopwords if available; otherwise fall back to a small set.
 try:
     import nltk  # noqa: F401
     from nltk.corpus import stopwords as nltk_stopwords
         "there","these","they","this","to","was","will","with","were","from","your"
     }
+# Decide lemmatizer vs stemmer based on whether the *corpus* exists
+_use_porter = True
+_lemmatizer = None
+_stemmer = None
 try:
+    import nltk
     from nltk.stem import WordNetLemmatizer
+    # Only use WordNetLemmatizer if the *wordnet* corpus is present
     try:
+        nltk.data.find("corpora/wordnet")
+        _lemmatizer = WordNetLemmatizer()
+        _use_porter = False
+    except LookupError:
         from nltk.stem import PorterStemmer
         _stemmer = PorterStemmer()
         _use_porter = True
+except Exception:
+    # If NLTK isn't fully available, fall back to identity later
+    _lemmatizer = None
+    _stemmer = None
+    _use_porter = None
 # ---- Pipeline helpers ----
     return [w for w in tokens if w not in _STOPWORDS]
 def lemmatize_list(tokens):
+    """Lemmatize if wordnet is present; otherwise stem; otherwise identity.
+       Also guards against runtime LookupError during example caching."""
+    global _use_porter, _lemmatizer, _stemmer
+    if _use_porter is False and _lemmatizer is not None:
+        try:
+            return [_lemmatizer.lemmatize(w) for w in tokens]
+        except LookupError:
+            # WordNet corpus not actually present; switch to Porter
+            try:
+                from nltk.stem import PorterStemmer
+                _stemmer = PorterStemmer()
+                _use_porter = True
+                return [_stemmer.stem(w) for w in tokens]
+            except Exception:
+                return tokens
+    elif _use_porter is True and _stemmer is not None:
         return [_stemmer.stem(w) for w in tokens]
     else:
+        # Last resort: return as-is
         return tokens
     inputs=gr.Textbox(lines=4, label="Enter text to normalize"),
     outputs=gr.HTML(label="Step-by-step normalization"),
     examples=[[ex] for ex in examples],
+    cache_examples=False,             # <-- avoid startup caching (which runs the fn at launch)
+    allow_flagging="never",
     title="Text Normalization Pipeline",
     description="Enter text or select an example to see each step of the normalization process."
 )
 if __name__ == "__main__":
+    iface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        ssr_mode=False,               # <-- disable SSR (prevents blank/fragile startup)
+        share=(not IN_SPACES),        # <-- no share warning on Spaces; public link when local
+    )