Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +13 -4
src/streamlit_app.py
CHANGED
|
@@ -2,11 +2,12 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 4 |
import joblib
|
| 5 |
-
|
| 6 |
-
st.title("Arxiv Expert Finder")
|
| 7 |
-
|
| 8 |
from pathlib import Path
|
| 9 |
import urllib.request
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# URLs
|
| 12 |
BASE_URL = "https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main"
|
|
@@ -34,7 +35,15 @@ except Exception as e:
|
|
| 34 |
print(f"Fehler: {e}")
|
| 35 |
|
| 36 |
st.sidebar.header("Query")
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
|
| 39 |
|
| 40 |
if user_query:
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 4 |
import joblib
|
|
|
|
|
|
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
import urllib.request
|
| 7 |
+
import re
|
| 8 |
+
from nltk.stem import PorterStemmer
|
| 9 |
+
|
| 10 |
+
st.title("Arxiv Expert Finder")
|
| 11 |
|
| 12 |
# URLs
|
| 13 |
BASE_URL = "https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main"
|
|
|
|
| 35 |
print(f"Fehler: {e}")
|
| 36 |
|
| 37 |
st.sidebar.header("Query")
|
| 38 |
+
|
| 39 |
+
if user_query:
|
| 40 |
+
# Remove numbers and special characters, convert to lowercase
|
| 41 |
+
user_query = re.sub(r'[^a-zA-Z\s]', ' ', user_query).lower()
|
| 42 |
+
# Stem words
|
| 43 |
+
stemmer = PorterStemmer()
|
| 44 |
+
user_query = " ".join([stemmer.stem(word) for word in user_query.split()])
|
| 45 |
+
|
| 46 |
+
|
| 47 |
num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
|
| 48 |
|
| 49 |
if user_query:
|