whuang06 commited on
Commit
bfbd92e
·
verified ·
1 Parent(s): d25e34a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -3
app.py CHANGED
@@ -2,13 +2,15 @@ import requests
2
  import spacy
3
  import json
4
  import time
 
5
  import streamlit as st
6
  import pandas as pd
7
  import matplotlib.pyplot as plt
8
 
9
  headers = {'Accept': 'application/json'}
10
  languages = {"French": "30", "German": "31", "Spanish": "32"}
11
-
 
12
  try:
13
  nlp = spacy.load("en_core_web_sm")
14
  except:
@@ -20,13 +22,17 @@ except:
20
  # 32 - spanish
21
 
22
  def get_relevance(text, language, scaling=5):
23
- link = f"https://books.google.com/ngrams/json?content={'+'.join(text.split(' '))}&year_start=2000&year_end=2019&corpus={languages[language]}&smoothing=0"
 
 
 
24
  r = requests.get(link, headers=headers)
25
  try:
26
  ngrams = r.json()[0]['timeseries']
27
  avg = sum(ngrams) / len(ngrams)
28
  except:
29
  avg = 0.0
 
30
  return avg * pow(10, scaling)
31
 
32
  def remove_punc(text):
@@ -88,13 +94,18 @@ common_words = {
88
  }
89
 
90
  excluded = st.text_input("Common words to exclude:", common_words[lang])
 
 
91
  excluded = excluded.replace(" ", "").lower().split(",")
92
 
 
93
  upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=10.0)
94
  lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=1e-19)
95
 
96
  langMP = {"French": "fr", "German": "de", "Spanish": "es"}
97
 
 
 
98
  if st.button("Calculate"):
99
  my_bar = st.progress(0, text="Calculating N-grams 0%")
100
 
@@ -102,7 +113,8 @@ if st.button("Calculate"):
102
 
103
  df = pd.DataFrame(output, columns=["Word", "N-Gram"])
104
 
105
- fig, ax = plt.subplots()
 
106
  ax.spines['top'].set_visible(False)
107
  ax.spines['right'].set_visible(False)
108
  ax.spines['bottom'].set_visible(False)
@@ -112,6 +124,7 @@ if st.button("Calculate"):
112
  ax.set_ylabel("Words")
113
 
114
  st.subheader("Word Relevance")
 
115
  st.pyplot(fig)
116
 
117
  definitions = []
 
2
  import spacy
3
  import json
4
  import time
5
+ import datetime
6
  import streamlit as st
7
  import pandas as pd
8
  import matplotlib.pyplot as plt
9
 
10
  headers = {'Accept': 'application/json'}
11
  languages = {"French": "30", "German": "31", "Spanish": "32"}
12
+ if "memo" not in st.session_state:
13
+ st.session_state["memo"] = {}
14
  try:
15
  nlp = spacy.load("en_core_web_sm")
16
  except:
 
22
  # 32 - spanish
23
 
24
  def get_relevance(text, language, scaling=5):
25
+ if f"{text}_{language}_{scaling}" in st.session_state["memo"]:
26
+ return st.session_state["memo"][f"{text}_{language}_{scaling}"]
27
+
28
+ link = f"https://books.google.com/ngrams/json?content={'+'.join(text.split(' '))}&year_start=1990&year_end=2019&corpus={languages[language]}&smoothing=0"
29
  r = requests.get(link, headers=headers)
30
  try:
31
  ngrams = r.json()[0]['timeseries']
32
  avg = sum(ngrams) / len(ngrams)
33
  except:
34
  avg = 0.0
35
+ st.session_state["memo"][f"{text}_{language}_{scaling}"] = avg * pow(10, scaling)
36
  return avg * pow(10, scaling)
37
 
38
  def remove_punc(text):
 
94
  }
95
 
96
  excluded = st.text_input("Common words to exclude:", common_words[lang])
97
+
98
+
99
  excluded = excluded.replace(" ", "").lower().split(",")
100
 
101
+
102
  upper_bound = st.number_input('Upper bound N-gram score', 0.0, 1000.0, value=10.0)
103
  lower_bound = st.number_input('Lower bound N-gram score', 0.0, 1000.0, value=1e-19)
104
 
105
  langMP = {"French": "fr", "German": "de", "Spanish": "es"}
106
 
107
+
108
+
109
  if st.button("Calculate"):
110
  my_bar = st.progress(0, text="Calculating N-grams 0%")
111
 
 
113
 
114
  df = pd.DataFrame(output, columns=["Word", "N-Gram"])
115
 
116
+ fig, ax = plt.subplots(figsize=(5, int((len(set(df["Word"].tolist()))) ** 0.6)))
117
+
118
  ax.spines['top'].set_visible(False)
119
  ax.spines['right'].set_visible(False)
120
  ax.spines['bottom'].set_visible(False)
 
124
  ax.set_ylabel("Words")
125
 
126
  st.subheader("Word Relevance")
127
+
128
  st.pyplot(fig)
129
 
130
  definitions = []