Spaces:

nullHawk
/

arxive-semantic-search

Sleeping

App Files Files Community

nullHawk commited on Nov 16, 2025

Commit

2f9fb02

verified ·

1 Parent(s): c184121

add: streamlit app

Browse files

Files changed (1) hide show

app.py +102 -0

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from huggingface_hub import hf_hub_download
+from gensim.models import Word2Vec
+import faiss
+import streamlit as st
+import pandas as pd
+import dask.dataframe as dd
+@st.cache_data
+def get_dask_df(df_path='bin/data.parquet'):
+    return dd.read_parquet(df_path)
+@st.cache_data
+def query_rows(rows: list):
+    df = get_dask_df()
+@st.cache_data
+def get_model():
+    model_path = hf_hub_download(
+        repo_id="nullHawk/word2vec-skipgram-arxive",
+        filename="word2vec_arxiv_skipgram.model"
+    )
+    model_npy_path = hf_hub_download(
+        repo_id="nullHawk/word2vec-skipgram-arxive",
+        filename="word2vec_arxiv_skipgram.model.syn1neg.npy"
+    )
+    model_wv_path2 = hf_hub_download(
+        repo_id="nullHawk/word2vec-skipgram-arxive",
+        filename="word2vec_arxiv_skipgram.model.wv.vectors.npy"
+    )
+    return Word2Vec.load(model_path)
+@st.cache_data
+def get_faiss_index():
+    return faiss.read_index("faiss_index.bin")
+# --------------------------------------------------------------
+# Placeholder: You will plug your search code here.
+# Should return a list of paper dicts with:
+# { "title": ..., "authors": ..., "abstract": ..., "url": ... }
+# --------------------------------------------------------------
+def run_semantic_search(query, top_k):
+    # ---- Replace with your search logic ----
+    # Example dummy results:
+    return [
+        {
+            "title": "Example Paper Title",
+            "authors": "John Doe, Jane Smith",
+            "abstract": "This is a sample abstract describing the research paper...",
+            "url": "https://arxiv.org/abs/1234.5678"
+        }
+    ] * top_k
+# ----------------------------------
+# Streamlit Page Setup
+# ----------------------------------
+st.set_page_config(page_title="ArXiv Semantic Search", layout="wide")
+st.title("🔎 ArXiv Semantic Search Engine")
+st.write("Search over millions of research papers using semantic similarity.")
+# Sidebar
+st.sidebar.header("⚙️ Search Options")
+top_k = st.sidebar.slider("Top K Results", 5, 50, 10)
+# Main Search Bar
+query = st.text_input(
+    "Enter your search query:",
+    placeholder="e.g. diffusion models for text-to-image, graph neural networks, LLM alignment..."
+)
+search_button = st.button("Search")
+# --------------------------------------------------------------
+# Handle search click
+# --------------------------------------------------------------
+if search_button and query.strip():
+    with st.spinner("Searching... 🚀"):
+        results = run_semantic_search(query, top_k)
+    st.subheader(f"Top {top_k} Results")
+    # ----------------------------------------------------------
+    # Display results (card-style)
+    # ----------------------------------------------------------
+    for i, paper in enumerate(results, start=1):
+        st.markdown(f"### **{i}. {paper['title']}**")
+        st.markdown(f"**Authors:** {paper['authors']}")
+        st.markdown(f"[🔗 View on arXiv]({paper['url']})")
+        with st.expander("Abstract Preview"):
+            st.write(paper["abstract"][:600] + "...")
+        st.markdown("---")