Spaces:

jadenhochh
/

Expert_Finder_TF_IDF

Sleeping

jadenhochh commited on Jan 10

Commit

3c748a9

verified ·

1 Parent(s): 36bff7f

Update src/streamlit_app.py

Files changed (1) hide show

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,31 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+from sklearn.metrics.pairwise import cosine_similarity
+import joblib
+st.title("Arxiv Expert Finder")
+tfidf_matrix = joblib.load(r'https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/tfidf_matrix.pkl')
+tfidf_vectorizer = joblib.load(r'https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/tfidf_vectorizer.pkl')
+df = pd.read_csv("https://huggingface.co/datasets/jadenhochh/TF_IDF/resolve/main/clean_processed_dataset.csv")
+st.sidebar.header("Query")
+user_query = st.text_input("Suchtext eingeben", "")
+num_experts = st.sidebar.number_input("Anzahl Experten", min_value=1, max_value=10, value=5, step=1)
+if user_query:
+    similarities = cosine_similarity(tfidf_vectorizer.transform([user_query]), tfidf_matrix).flatten()
+    top_results = pd.Series(similarities, index=df.index) \
+        .sort_values(ascending=False) \
+        .loc[lambda x: x >= 0.1] \
+        .head(num_experts)
+    for rank, (idx, score) in enumerate(top_results.items(), 1):
+        row = df.loc[idx]
+        st.write(f"**Rank:** {rank} | **Similarity Score:** {score:.4f} | **Index:** {idx}")
+        st.write(f"**Autoren:** {row['authors']}")
+        st.write(f"**Titel:** {row['title']}")
+        with st.expander("Abstract anzeigen"):
+            st.write(row['abstract'])
+        st.divider()