Spaces:

jadenhoch
/

Expert-Finder-BiCA-base

Sleeping

App Files Files Community

jadenhoch commited on 21 days ago

Commit

77d191a

verified ·

1 Parent(s): ad7c1d7

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +47 -39

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,48 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+import numpy as np
+from sentence_transformers import SentenceTransformer, util
+from huggingface_hub import hf_hub_download
+import os
+st.set_page_config(page_title="ArXiv Expert Finder", page_icon="🔬", layout="wide")
+st.title("ArXiv Expert Finder")
+@st.cache_resource
+def load_model():
+    return SentenceTransformer("bisectgroup/BiCA-base")
+@st.cache_data
+def load_data():
+    parquet_path = hf_hub_download(
+        repo_id="jadenhoch/Expert-Finder-BiCA-base",
+        filename="arxiv_2025_zstd.parquet",
+        repo_type="space"
+    )
+    npy_path = hf_hub_download(
+        repo_id="jadenhoch/BiCA-base",
+        filename="BiCA-base.npy",
+        repo_type="dataset"
+    )
+    return pd.read_parquet(parquet_path), np.load(npy_path)
+model = load_model()
+df, corpus_embeddings = load_data()
+top_k = st.sidebar.slider("Number of results", 1, 20, 6)
+query = st.text_area("🔍 Text eingeben:", height=200)
+if st.button("Suchen") and query:
+    query_emb = model.encode(query)
+    results = util.semantic_search(query_emb, corpus_embeddings, top_k=top_k)[0]
+    for rank, hit in enumerate(results, 1):
+        idx = hit["corpus_id"]
+        st.markdown(f"### {rank} | Similarity Score: {hit['score']:.4f} | Index: {idx}")
+        st.write(f"**Autoren:** {df.iloc[idx]['authors']}")
+        st.write(f"**Titel:** {df.iloc[idx]['title']}")
+        with st.expander("Abstract"):
+            st.write(df.iloc[idx]['abstract'])
+        st.divider()