jadenhochh commited on
Commit
6ed60e1
·
verified ·
1 Parent(s): 4a6436f

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +12 -6
src/streamlit_app.py CHANGED
@@ -7,6 +7,7 @@ import io
7
  import re
8
  from nltk.stem import PorterStemmer
9
  import nltk
 
10
 
11
  nltk.download('stopwords')
12
 
@@ -26,14 +27,19 @@ tfidf_matrix, tfidf_vectorizer, df = load_data()
26
  st.title("Arxiv Expert Finder")
27
  st.sidebar.header("Query")
28
 
 
 
 
 
 
 
 
 
 
 
29
  user_query = st.text_input("Suchtext eingeben", "")
30
 
31
- if user_query:
32
- # Remove numbers and special characters, convert to lowercase
33
- user_query = re.sub(r'[^a-zA-Z\s]', ' ', user_query).lower()
34
- # Stem words
35
- stemmer = PorterStemmer()
36
- user_query = " ".join([stemmer.stem(word) for word in user_query.split()])
37
 
38
  num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
39
 
 
7
  import re
8
  from nltk.stem import PorterStemmer
9
  import nltk
10
+ from functools import lru_cache
11
 
12
  nltk.download('stopwords')
13
 
 
27
  st.title("Arxiv Expert Finder")
28
  st.sidebar.header("Query")
29
 
30
+ @lru_cache(maxsize=200_000)
31
+ def stem_cached(w: str) -> str:
32
+ return stemmer.stem(w)
33
+
34
+ def text_reinigen_fast(text: str) -> str:
35
+ if not isinstance(text, str) or not text:
36
+ return ""
37
+ words = re_words.findall(text.lower())
38
+ return " ".join(stem_cached(w) for w in words if w not in stop)
39
+
40
  user_query = st.text_input("Suchtext eingeben", "")
41
 
42
+ user_query = text_reinigen_fast(user_query)
 
 
 
 
 
43
 
44
  num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
45