jadenhochh commited on
Commit
1aaa40d
·
verified ·
1 Parent(s): 6193c09

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +13 -4
src/streamlit_app.py CHANGED
@@ -2,11 +2,12 @@ import streamlit as st
2
  import pandas as pd
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import joblib
5
-
6
- st.title("Arxiv Expert Finder")
7
-
8
  from pathlib import Path
9
  import urllib.request
 
 
 
 
10
 
11
  # URLs
12
  BASE_URL = "https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main"
@@ -34,7 +35,15 @@ except Exception as e:
34
  print(f"Fehler: {e}")
35
 
36
  st.sidebar.header("Query")
37
- user_query = st.text_input("Suchtext eingeben", "")
 
 
 
 
 
 
 
 
38
  num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
39
 
40
  if user_query:
 
2
  import pandas as pd
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import joblib
 
 
 
5
  from pathlib import Path
6
  import urllib.request
7
+ import re
8
+ from nltk.stem import PorterStemmer
9
+
10
+ st.title("Arxiv Expert Finder")
11
 
12
  # URLs
13
  BASE_URL = "https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main"
 
35
  print(f"Fehler: {e}")
36
 
37
  st.sidebar.header("Query")
38
+
39
+ if user_query:
40
+ # Remove numbers and special characters, convert to lowercase
41
+ user_query = re.sub(r'[^a-zA-Z\s]', ' ', user_query).lower()
42
+ # Stem words
43
+ stemmer = PorterStemmer()
44
+ user_query = " ".join([stemmer.stem(word) for word in user_query.split()])
45
+
46
+
47
  num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
48
 
49
  if user_query: