Update app.py
Browse files
app.py
CHANGED
|
@@ -2,13 +2,15 @@ import requests
|
|
| 2 |
import spacy
|
| 3 |
import json
|
| 4 |
import time
|
|
|
|
| 5 |
import streamlit as st
|
| 6 |
import pandas as pd
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
|
| 9 |
headers = {'Accept': 'application/json'}
|
| 10 |
languages = {"French": "30", "German": "31", "Spanish": "32"}
|
| 11 |
-
|
|
|
|
| 12 |
try:
|
| 13 |
nlp = spacy.load("en_core_web_sm")
|
| 14 |
except:
|
|
@@ -20,13 +22,17 @@ except:
|
|
| 20 |
# 32 - spanish
|
| 21 |
|
| 22 |
def get_relevance(text, language, scaling=5):
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
r = requests.get(link, headers=headers)
|
| 25 |
try:
|
| 26 |
ngrams = r.json()[0]['timeseries']
|
| 27 |
avg = sum(ngrams) / len(ngrams)
|
| 28 |
except:
|
| 29 |
avg = 0.0
|
|
|
|
| 30 |
return avg * pow(10, scaling)
|
| 31 |
|
| 32 |
def remove_punc(text):
|
|
@@ -88,13 +94,18 @@ common_words = {
|
|
| 88 |
}
|
| 89 |
|
| 90 |
excluded = st.text_input("Common words to exclude:", common_words[lang])
|
|
|
|
|
|
|
| 91 |
excluded = excluded.replace(" ", "").lower().split(",")
|
| 92 |
|
|
|
|
| 93 |
upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=10.0)
|
| 94 |
lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=1e-19)
|
| 95 |
|
| 96 |
langMP = {"French": "fr", "German": "de", "Spanish": "es"}
|
| 97 |
|
|
|
|
|
|
|
| 98 |
if st.button("Calculate"):
|
| 99 |
my_bar = st.progress(0, text="Calculating N-grams 0%")
|
| 100 |
|
|
@@ -102,7 +113,8 @@ if st.button("Calculate"):
|
|
| 102 |
|
| 103 |
df = pd.DataFrame(output, columns=["Word", "N-Gram"])
|
| 104 |
|
| 105 |
-
fig, ax = plt.subplots()
|
|
|
|
| 106 |
ax.spines['top'].set_visible(False)
|
| 107 |
ax.spines['right'].set_visible(False)
|
| 108 |
ax.spines['bottom'].set_visible(False)
|
|
@@ -112,6 +124,7 @@ if st.button("Calculate"):
|
|
| 112 |
ax.set_ylabel("Words")
|
| 113 |
|
| 114 |
st.subheader("Word Relevance")
|
|
|
|
| 115 |
st.pyplot(fig)
|
| 116 |
|
| 117 |
definitions = []
|
|
|
|
| 2 |
import spacy
|
| 3 |
import json
|
| 4 |
import time
|
| 5 |
+
import datetime
|
| 6 |
import streamlit as st
|
| 7 |
import pandas as pd
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
|
| 10 |
headers = {'Accept': 'application/json'}
|
| 11 |
languages = {"French": "30", "German": "31", "Spanish": "32"}
|
| 12 |
+
if "memo" not in st.session_state:
|
| 13 |
+
st.session_state["memo"] = {}
|
| 14 |
try:
|
| 15 |
nlp = spacy.load("en_core_web_sm")
|
| 16 |
except:
|
|
|
|
| 22 |
# 32 - spanish
|
| 23 |
|
| 24 |
def get_relevance(text, language, scaling=5):
|
| 25 |
+
if f"{text}_{language}_{scaling}" in st.session_state["memo"]:
|
| 26 |
+
return st.session_state["memo"][f"{text}_{language}_{scaling}"]
|
| 27 |
+
|
| 28 |
+
link = f"https://books.google.com/ngrams/json?content={'+'.join(text.split(' '))}&year_start=1990&year_end=2019&corpus={languages[language]}&smoothing=0"
|
| 29 |
r = requests.get(link, headers=headers)
|
| 30 |
try:
|
| 31 |
ngrams = r.json()[0]['timeseries']
|
| 32 |
avg = sum(ngrams) / len(ngrams)
|
| 33 |
except:
|
| 34 |
avg = 0.0
|
| 35 |
+
st.session_state["memo"][f"{text}_{language}_{scaling}"] = avg * pow(10, scaling)
|
| 36 |
return avg * pow(10, scaling)
|
| 37 |
|
| 38 |
def remove_punc(text):
|
|
|
|
| 94 |
}
|
| 95 |
|
| 96 |
excluded = st.text_input("Common words to exclude:", common_words[lang])
|
| 97 |
+
|
| 98 |
+
|
| 99 |
excluded = excluded.replace(" ", "").lower().split(",")
|
| 100 |
|
| 101 |
+
|
| 102 |
upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=10.0)
|
| 103 |
lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=1e-19)
|
| 104 |
|
| 105 |
langMP = {"French": "fr", "German": "de", "Spanish": "es"}
|
| 106 |
|
| 107 |
+
|
| 108 |
+
|
| 109 |
if st.button("Calculate"):
|
| 110 |
my_bar = st.progress(0, text="Calculating N-grams 0%")
|
| 111 |
|
|
|
|
| 113 |
|
| 114 |
df = pd.DataFrame(output, columns=["Word", "N-Gram"])
|
| 115 |
|
| 116 |
+
fig, ax = plt.subplots(figsize=(5, int((len(set(df["Word"].tolist()))) ** 0.6)))
|
| 117 |
+
|
| 118 |
ax.spines['top'].set_visible(False)
|
| 119 |
ax.spines['right'].set_visible(False)
|
| 120 |
ax.spines['bottom'].set_visible(False)
|
|
|
|
| 124 |
ax.set_ylabel("Words")
|
| 125 |
|
| 126 |
st.subheader("Word Relevance")
|
| 127 |
+
|
| 128 |
st.pyplot(fig)
|
| 129 |
|
| 130 |
definitions = []
|