Update app.py
Browse files
app.py
CHANGED
|
@@ -49,13 +49,12 @@ def split_text(text):
|
|
| 49 |
def process(text, excluded=[], lang="fr", scaling=5, upperbnd=float("inf"), lowerbnd=0):
|
| 50 |
tokens = set(split_text(text))
|
| 51 |
wordlist = []
|
| 52 |
-
|
| 53 |
-
|
| 54 |
if phrase not in excluded:
|
| 55 |
result = get_relevance(phrase, lang, scaling)
|
| 56 |
-
if
|
| 57 |
wordlist.append([phrase, result])
|
| 58 |
-
|
| 59 |
wordlist = sorted(wordlist, key=lambda x: x[1], reverse=True)
|
| 60 |
return wordlist
|
| 61 |
|
|
@@ -91,31 +90,37 @@ common_words = {
|
|
| 91 |
excluded = st.text_input("Common words to exclude:", common_words[lang])
|
| 92 |
excluded = excluded.replace(" ", "").lower().split(",")
|
| 93 |
|
| 94 |
-
upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=
|
| 95 |
-
lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=
|
| 96 |
|
| 97 |
langMP = {"French": "fr", "German": "de", "Spanish": "es"}
|
| 98 |
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
|
|
|
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
ax.spines['left'].set_visible(False)
|
| 108 |
-
ax.barh(df["Word"], df["N-Gram"])
|
| 109 |
-
# ax.get_xaxis().set_ticks([])
|
| 110 |
-
ax.set_ylabel("Words")
|
| 111 |
|
| 112 |
-
st.subheader("
|
| 113 |
-
st.
|
| 114 |
|
| 115 |
-
|
| 116 |
-
langcode = langMP[lang]
|
| 117 |
-
for word in df["Word"].tolist():
|
| 118 |
-
definitions.append(f'<a target="_blank" href="https://www.wordreference.com/{langcode}en/{word}">{word}</a>')
|
| 119 |
|
| 120 |
-
st.subheader("WordReference Links")
|
| 121 |
-
st.markdown("<br>".join(definitions), unsafe_allow_html=True)
|
|
|
|
| 49 |
def process(text, excluded=[], lang="fr", scaling=5, upperbnd=float("inf"), lowerbnd=0):
|
| 50 |
tokens = set(split_text(text))
|
| 51 |
wordlist = []
|
| 52 |
+
for i, phrase in enumerate(tokens):
|
| 53 |
+
my_bar.progress((i + 1)/len(tokens), text=f"Calculating N-grams {round((i + 1)/len(tokens) * 100)}%")
|
| 54 |
if phrase not in excluded:
|
| 55 |
result = get_relevance(phrase, lang, scaling)
|
| 56 |
+
if lowerbnd <= result <= upperbnd:
|
| 57 |
wordlist.append([phrase, result])
|
|
|
|
| 58 |
wordlist = sorted(wordlist, key=lambda x: x[1], reverse=True)
|
| 59 |
return wordlist
|
| 60 |
|
|
|
|
| 90 |
excluded = st.text_input("Common words to exclude:", common_words[lang])
|
| 91 |
excluded = excluded.replace(" ", "").lower().split(",")
|
| 92 |
|
| 93 |
+
upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=10.0)
|
| 94 |
+
lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=1e-19)
|
| 95 |
|
| 96 |
langMP = {"French": "fr", "German": "de", "Spanish": "es"}
|
| 97 |
|
| 98 |
+
if st.button("Calculate"):
|
| 99 |
+
my_bar = st.progress(0, text="Calculating N-grams 0%")
|
| 100 |
+
|
| 101 |
+
output = process(text, excluded, lang, 5, upperbnd=upper_bound, lowerbnd=lower_bound)
|
| 102 |
+
|
| 103 |
+
df = pd.DataFrame(output, columns=["Word", "N-Gram"])
|
| 104 |
+
|
| 105 |
+
fig, ax = plt.subplots()
|
| 106 |
+
ax.spines['top'].set_visible(False)
|
| 107 |
+
ax.spines['right'].set_visible(False)
|
| 108 |
+
ax.spines['bottom'].set_visible(False)
|
| 109 |
+
ax.spines['left'].set_visible(False)
|
| 110 |
+
ax.barh(df["Word"], df["N-Gram"])
|
| 111 |
+
# ax.get_xaxis().set_ticks([])
|
| 112 |
+
ax.set_ylabel("Words")
|
| 113 |
|
| 114 |
+
st.subheader("Word Relevance")
|
| 115 |
+
st.pyplot(fig)
|
| 116 |
|
| 117 |
+
definitions = []
|
| 118 |
+
langcode = langMP[lang]
|
| 119 |
+
for word in df["Word"].tolist():
|
| 120 |
+
definitions.append(f'<a target="_blank" href="https://www.wordreference.com/{langcode}en/{word}">{word}</a>')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
st.subheader("WordReference Links")
|
| 123 |
+
st.markdown("<br>".join(definitions), unsafe_allow_html=True)
|
| 124 |
|
| 125 |
+
my_bar.empty()
|
|
|
|
|
|
|
|
|
|
| 126 |
|
|
|
|
|
|