ajayinsac commited on
Commit
51abd9e
·
verified ·
1 Parent(s): 4657ed8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -13,11 +13,11 @@ from collections import OrderedDict
13
 
14
  import gradio as gr
15
 
16
- # Detect if running on Hugging Face Spaces (so we don't set share=True there)
17
  IN_SPACES = bool(os.getenv("SPACE_ID") or os.getenv("HF_SPACE_ID"))
18
 
19
  # ---- Optional NLTK pieces (no downloads at startup) ----
20
- # We try to use real stopwords & lemmatizer if available; otherwise fall back.
21
  try:
22
  import nltk # noqa: F401
23
  from nltk.corpus import stopwords as nltk_stopwords
@@ -39,15 +39,13 @@ except Exception:
39
  _stemmer = PorterStemmer()
40
  _use_porter = True
41
  except Exception:
42
- # Last-resort fallback: identity lemmatizer
43
  _lemmatizer = None
44
  _use_porter = None
45
 
46
 
47
  # ---- Pipeline helpers ----
48
  def tokenize(text: str):
49
- # Simple, dependency-free tokenizer:
50
- # split into "word" blocks and single non-space symbols to preserve punctuation step
51
  return re.findall(r"\w+|[^\w\s]", text or "", flags=re.UNICODE)
52
 
53
  def remove_non_ascii(tokens):
@@ -75,7 +73,6 @@ def lemmatize_list(tokens):
75
  elif _use_porter is False:
76
  return [_lemmatizer.lemmatize(w) for w in tokens]
77
  else:
78
- # identity if no lemmatizer/stemmer
79
  return tokens
80
 
81
 
@@ -105,7 +102,7 @@ def normalize(text: str) -> OrderedDict:
105
  return steps
106
 
107
 
108
- # ---- Gradio wiring (keeps your original Interface style) ----
109
  examples = [
110
  "The quick brown fox jumps over the lazy dog!",
111
  "NLTK is a leading platform for building Python programs to work with human language data.",
@@ -115,14 +112,14 @@ examples = [
115
 
116
  def show_steps(text):
117
  steps = normalize(text)
118
- html = []
119
  for step, value in steps.items():
120
  if isinstance(value, list):
121
  pretty = " ".join(value)
122
- html.append(f"<b>{step}</b>: {pretty} <small>({len(value)} tokens)</small>")
123
  else:
124
- html.append(f"<b>{step}</b>: {value}")
125
- return "<br>".join(html)
126
 
127
  iface = gr.Interface(
128
  fn=show_steps,
@@ -130,11 +127,9 @@ iface = gr.Interface(
130
  outputs=gr.HTML(label="Step-by-step normalization"),
131
  examples=[[ex] for ex in examples],
132
  title="Text Normalization Pipeline",
133
- description="Enter text or select an example to see each step of the normalization process.",
134
  )
135
 
136
  if __name__ == "__main__":
137
- iface.launch(
138
- server_name="0.0.0.0",
139
- server_port=7860,
140
- share=not
 
13
 
14
  import gradio as gr
15
 
16
+ # Detect if running on Hugging Face Spaces (don't use share=True there)
17
  IN_SPACES = bool(os.getenv("SPACE_ID") or os.getenv("HF_SPACE_ID"))
18
 
19
  # ---- Optional NLTK pieces (no downloads at startup) ----
20
+ # Use real stopwords/lemmatizer if available; otherwise fall back.
21
  try:
22
  import nltk # noqa: F401
23
  from nltk.corpus import stopwords as nltk_stopwords
 
39
  _stemmer = PorterStemmer()
40
  _use_porter = True
41
  except Exception:
 
42
  _lemmatizer = None
43
  _use_porter = None
44
 
45
 
46
  # ---- Pipeline helpers ----
47
  def tokenize(text: str):
48
+ # Simple, dependency-free tokenizer: words or single non-space symbols
 
49
  return re.findall(r"\w+|[^\w\s]", text or "", flags=re.UNICODE)
50
 
51
  def remove_non_ascii(tokens):
 
73
  elif _use_porter is False:
74
  return [_lemmatizer.lemmatize(w) for w in tokens]
75
  else:
 
76
  return tokens
77
 
78
 
 
102
  return steps
103
 
104
 
105
+ # ---- Gradio wiring ----
106
  examples = [
107
  "The quick brown fox jumps over the lazy dog!",
108
  "NLTK is a leading platform for building Python programs to work with human language data.",
 
112
 
113
  def show_steps(text):
114
  steps = normalize(text)
115
+ parts = []
116
  for step, value in steps.items():
117
  if isinstance(value, list):
118
  pretty = " ".join(value)
119
+ parts.append(f"<b>{step}</b>: {pretty} <small>({len(value)} tokens)</small>")
120
  else:
121
+ parts.append(f"<b>{step}</b>: {value}")
122
+ return "<br>".join(parts)
123
 
124
  iface = gr.Interface(
125
  fn=show_steps,
 
127
  outputs=gr.HTML(label="Step-by-step normalization"),
128
  examples=[[ex] for ex in examples],
129
  title="Text Normalization Pipeline",
130
+ description="Enter text or select an example to see each step of the normalization process."
131
  )
132
 
133
  if __name__ == "__main__":
134
+ # share=True only when running locally (avoids Spaces warning)
135
+ iface.launch(server_name="0.0.0.0", server_port=7860, share=(not IN_SPACES))