whuang06 commited on
Commit
d25e34a
·
verified ·
1 Parent(s): e2cb0f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -49,13 +49,12 @@ def split_text(text):
49
  def process(text, excluded=[], lang="fr", scaling=5, upperbnd=float("inf"), lowerbnd=0):
50
  tokens = set(split_text(text))
51
  wordlist = []
52
-
53
- for phrase in tokens:
54
  if phrase not in excluded:
55
  result = get_relevance(phrase, lang, scaling)
56
- if 0 < result <= upperbnd and result >= lowerbnd:
57
  wordlist.append([phrase, result])
58
-
59
  wordlist = sorted(wordlist, key=lambda x: x[1], reverse=True)
60
  return wordlist
61
 
@@ -91,31 +90,37 @@ common_words = {
91
  excluded = st.text_input("Common words to exclude:", common_words[lang])
92
  excluded = excluded.replace(" ", "").lower().split(",")
93
 
94
- upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=100.0)
95
- lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=0.01)
96
 
97
  langMP = {"French": "fr", "German": "de", "Spanish": "es"}
98
 
99
- output = process(text, excluded, lang, 5, upperbnd=upper_bound, lowerbnd=lower_bound)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- df = pd.DataFrame(output, columns=["Word", "N-Gram"])
 
102
 
103
- fig, ax = plt.subplots()
104
- ax.spines['top'].set_visible(False)
105
- ax.spines['right'].set_visible(False)
106
- ax.spines['bottom'].set_visible(False)
107
- ax.spines['left'].set_visible(False)
108
- ax.barh(df["Word"], df["N-Gram"])
109
- # ax.get_xaxis().set_ticks([])
110
- ax.set_ylabel("Words")
111
 
112
- st.subheader("Word Relevance")
113
- st.pyplot(fig)
114
 
115
- definitions = []
116
- langcode = langMP[lang]
117
- for word in df["Word"].tolist():
118
- definitions.append(f'<a target="_blank" href="https://www.wordreference.com/{langcode}en/{word}">{word}</a>')
119
 
120
- st.subheader("WordReference Links")
121
- st.markdown("<br>".join(definitions), unsafe_allow_html=True)
 
49
  def process(text, excluded=[], lang="fr", scaling=5, upperbnd=float("inf"), lowerbnd=0):
50
  tokens = set(split_text(text))
51
  wordlist = []
52
+ for i, phrase in enumerate(tokens):
53
+ my_bar.progress((i + 1)/len(tokens), text=f"Calculating N-grams {round((i + 1)/len(tokens) * 100)}%")
54
  if phrase not in excluded:
55
  result = get_relevance(phrase, lang, scaling)
56
+ if lowerbnd <= result <= upperbnd:
57
  wordlist.append([phrase, result])
 
58
  wordlist = sorted(wordlist, key=lambda x: x[1], reverse=True)
59
  return wordlist
60
 
 
90
  excluded = st.text_input("Common words to exclude:", common_words[lang])
91
  excluded = excluded.replace(" ", "").lower().split(",")
92
 
93
+ upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=10.0)
94
+ lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=1e-19)
95
 
96
  langMP = {"French": "fr", "German": "de", "Spanish": "es"}
97
 
98
+ if st.button("Calculate"):
99
+ my_bar = st.progress(0, text="Calculating N-grams 0%")
100
+
101
+ output = process(text, excluded, lang, 5, upperbnd=upper_bound, lowerbnd=lower_bound)
102
+
103
+ df = pd.DataFrame(output, columns=["Word", "N-Gram"])
104
+
105
+ fig, ax = plt.subplots()
106
+ ax.spines['top'].set_visible(False)
107
+ ax.spines['right'].set_visible(False)
108
+ ax.spines['bottom'].set_visible(False)
109
+ ax.spines['left'].set_visible(False)
110
+ ax.barh(df["Word"], df["N-Gram"])
111
+ # ax.get_xaxis().set_ticks([])
112
+ ax.set_ylabel("Words")
113
 
114
+ st.subheader("Word Relevance")
115
+ st.pyplot(fig)
116
 
117
+ definitions = []
118
+ langcode = langMP[lang]
119
+ for word in df["Word"].tolist():
120
+ definitions.append(f'<a target="_blank" href="https://www.wordreference.com/{langcode}en/{word}">{word}</a>')
 
 
 
 
121
 
122
+ st.subheader("WordReference Links")
123
+ st.markdown("<br>".join(definitions), unsafe_allow_html=True)
124
 
125
+ my_bar.empty()
 
 
 
126