Spaces:

whuang06
/

wordrank

Sleeping

App Files Files Community

whuang06 commited on Jan 19, 2024

Commit

d25e34a

verified ·

1 Parent(s): e2cb0f6

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -24

app.py CHANGED Viewed

@@ -49,13 +49,12 @@ def split_text(text):
 def process(text, excluded=[], lang="fr", scaling=5, upperbnd=float("inf"), lowerbnd=0):
     tokens = set(split_text(text))
     wordlist = []
-    for phrase in tokens:
         if phrase not in excluded:
             result = get_relevance(phrase, lang, scaling)
-            if 0 < result <= upperbnd and result >= lowerbnd:
                 wordlist.append([phrase, result])
     wordlist = sorted(wordlist, key=lambda x: x[1], reverse=True)
     return wordlist
@@ -91,31 +90,37 @@ common_words = {
 excluded = st.text_input("Common words to exclude:", common_words[lang])
 excluded = excluded.replace(" ", "").lower().split(",")
-upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=100.0)
-lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=0.01)
 langMP = {"French": "fr", "German": "de", "Spanish": "es"}
-output = process(text, excluded, lang, 5, upperbnd=upper_bound, lowerbnd=lower_bound)
-df = pd.DataFrame(output, columns=["Word", "N-Gram"])
-fig, ax = plt.subplots()
-ax.spines['top'].set_visible(False)
-ax.spines['right'].set_visible(False)
-ax.spines['bottom'].set_visible(False)
-ax.spines['left'].set_visible(False)
-ax.barh(df["Word"], df["N-Gram"])
-# ax.get_xaxis().set_ticks([])
-ax.set_ylabel("Words")
-st.subheader("Word Relevance")
-st.pyplot(fig)
-definitions = []
-langcode = langMP[lang]
-for word in df["Word"].tolist():
-    definitions.append(f'<a target="_blank" href="https://www.wordreference.com/{langcode}en/{word}">{word}</a>')
-st.subheader("WordReference Links")
-st.markdown("<br>".join(definitions), unsafe_allow_html=True)

 def process(text, excluded=[], lang="fr", scaling=5, upperbnd=float("inf"), lowerbnd=0):
     tokens = set(split_text(text))
     wordlist = []
+    for i, phrase in enumerate(tokens):
+        my_bar.progress((i + 1)/len(tokens), text=f"Calculating N-grams {round((i + 1)/len(tokens) * 100)}%")
         if phrase not in excluded:
             result = get_relevance(phrase, lang, scaling)
+            if lowerbnd <= result <= upperbnd:
                 wordlist.append([phrase, result])
     wordlist = sorted(wordlist, key=lambda x: x[1], reverse=True)
     return wordlist
 excluded = st.text_input("Common words to exclude:", common_words[lang])
 excluded = excluded.replace(" ", "").lower().split(",")
+upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=10.0)
+lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=1e-19)
 langMP = {"French": "fr", "German": "de", "Spanish": "es"}
+if st.button("Calculate"):
+    my_bar = st.progress(0, text="Calculating N-grams 0%")
+    output = process(text, excluded, lang, 5, upperbnd=upper_bound, lowerbnd=lower_bound)
+    df = pd.DataFrame(output, columns=["Word", "N-Gram"])
+    fig, ax = plt.subplots()
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.spines['bottom'].set_visible(False)
+    ax.spines['left'].set_visible(False)
+    ax.barh(df["Word"], df["N-Gram"])
+    # ax.get_xaxis().set_ticks([])
+    ax.set_ylabel("Words")
+    st.subheader("Word Relevance")
+    st.pyplot(fig)
+    definitions = []
+    langcode = langMP[lang]
+    for word in df["Word"].tolist():
+        definitions.append(f'<a target="_blank" href="https://www.wordreference.com/{langcode}en/{word}">{word}</a>')
+    st.subheader("WordReference Links")
+    st.markdown("<br>".join(definitions), unsafe_allow_html=True)
+    my_bar.empty()