Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +15 -5
src/streamlit_app.py
CHANGED
|
@@ -2,17 +2,28 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 4 |
import joblib
|
|
|
|
|
|
|
| 5 |
import re
|
| 6 |
from nltk.stem import PorterStemmer
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
tfidf_matrix =
|
| 10 |
-
tfidf_vectorizer = joblib.load(r'https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/tfidf_vectorizer.pkl')
|
| 11 |
-
df = pd.read_csv("https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/clean_processed_dataset.csv")
|
| 12 |
|
| 13 |
st.title("Arxiv Expert Finder")
|
| 14 |
st.sidebar.header("Query")
|
| 15 |
-
|
|
|
|
| 16 |
|
| 17 |
if user_query:
|
| 18 |
# Remove numbers and special characters, convert to lowercase
|
|
@@ -21,7 +32,6 @@ if user_query:
|
|
| 21 |
stemmer = PorterStemmer()
|
| 22 |
user_query = " ".join([stemmer.stem(word) for word in user_query.split()])
|
| 23 |
|
| 24 |
-
|
| 25 |
num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
|
| 26 |
|
| 27 |
if user_query:
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 4 |
import joblib
|
| 5 |
+
import requests
|
| 6 |
+
import io
|
| 7 |
import re
|
| 8 |
from nltk.stem import PorterStemmer
|
| 9 |
|
| 10 |
+
@st.cache_resource
|
| 11 |
+
def load_data():
|
| 12 |
+
matrix_res = requests.get('https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/tfidf_matrix.pkl')
|
| 13 |
+
vectorizer_res = requests.get('https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/tfidf_vectorizer.pkl')
|
| 14 |
+
|
| 15 |
+
tfidf_matrix = joblib.load(io.BytesIO(matrix_res.content))
|
| 16 |
+
tfidf_vectorizer = joblib.load(io.BytesIO(vectorizer_res.content))
|
| 17 |
+
df = pd.read_csv("https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/clean_processed_dataset.csv")
|
| 18 |
+
|
| 19 |
+
return tfidf_matrix, tfidf_vectorizer, df
|
| 20 |
|
| 21 |
+
tfidf_matrix, tfidf_vectorizer, df = load_data()
|
|
|
|
|
|
|
| 22 |
|
| 23 |
st.title("Arxiv Expert Finder")
|
| 24 |
st.sidebar.header("Query")
|
| 25 |
+
|
| 26 |
+
user_query = st.text_input("Suchtext eingeben", "")
|
| 27 |
|
| 28 |
if user_query:
|
| 29 |
# Remove numbers and special characters, convert to lowercase
|
|
|
|
| 32 |
stemmer = PorterStemmer()
|
| 33 |
user_query = " ".join([stemmer.stem(word) for word in user_query.split()])
|
| 34 |
|
|
|
|
| 35 |
num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
|
| 36 |
|
| 37 |
if user_query:
|