jadenhochh commited on
Commit
72dc1c4
·
verified ·
1 Parent(s): beedacb

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +15 -5
src/streamlit_app.py CHANGED
@@ -2,17 +2,28 @@ import streamlit as st
2
  import pandas as pd
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import joblib
 
 
5
  import re
6
  from nltk.stem import PorterStemmer
7
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- tfidf_matrix = joblib.load(r'https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/tfidf_matrix.pkl')
10
- tfidf_vectorizer = joblib.load(r'https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/tfidf_vectorizer.pkl')
11
- df = pd.read_csv("https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/clean_processed_dataset.csv")
12
 
13
  st.title("Arxiv Expert Finder")
14
  st.sidebar.header("Query")
15
- user_query = st.sidebar.text_input("Suchtext eingeben", "")
 
16
 
17
  if user_query:
18
  # Remove numbers and special characters, convert to lowercase
@@ -21,7 +32,6 @@ if user_query:
21
  stemmer = PorterStemmer()
22
  user_query = " ".join([stemmer.stem(word) for word in user_query.split()])
23
 
24
-
25
  num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
26
 
27
  if user_query:
 
2
  import pandas as pd
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import joblib
5
+ import requests
6
+ import io
7
  import re
8
  from nltk.stem import PorterStemmer
9
 
10
+ @st.cache_resource
11
+ def load_data():
12
+ matrix_res = requests.get('https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/tfidf_matrix.pkl')
13
+ vectorizer_res = requests.get('https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/tfidf_vectorizer.pkl')
14
+
15
+ tfidf_matrix = joblib.load(io.BytesIO(matrix_res.content))
16
+ tfidf_vectorizer = joblib.load(io.BytesIO(vectorizer_res.content))
17
+ df = pd.read_csv("https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/clean_processed_dataset.csv")
18
+
19
+ return tfidf_matrix, tfidf_vectorizer, df
20
 
21
+ tfidf_matrix, tfidf_vectorizer, df = load_data()
 
 
22
 
23
  st.title("Arxiv Expert Finder")
24
  st.sidebar.header("Query")
25
+
26
+ user_query = st.text_input("Suchtext eingeben", "")
27
 
28
  if user_query:
29
  # Remove numbers and special characters, convert to lowercase
 
32
  stemmer = PorterStemmer()
33
  user_query = " ".join([stemmer.stem(word) for word in user_query.split()])
34
 
 
35
  num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
36
 
37
  if user_query: