nullHawk's picture
add: streamlit app
2f9fb02 verified
raw
history blame
3.09 kB
from huggingface_hub import hf_hub_download
from gensim.models import Word2Vec
import faiss
import streamlit as st
import pandas as pd
import dask.dataframe as dd
@st.cache_data
def get_dask_df(df_path='bin/data.parquet'):
return dd.read_parquet(df_path)
@st.cache_data
def query_rows(rows: list):
df = get_dask_df()
@st.cache_data
def get_model():
model_path = hf_hub_download(
repo_id="nullHawk/word2vec-skipgram-arxive",
filename="word2vec_arxiv_skipgram.model"
)
model_npy_path = hf_hub_download(
repo_id="nullHawk/word2vec-skipgram-arxive",
filename="word2vec_arxiv_skipgram.model.syn1neg.npy"
)
model_wv_path2 = hf_hub_download(
repo_id="nullHawk/word2vec-skipgram-arxive",
filename="word2vec_arxiv_skipgram.model.wv.vectors.npy"
)
return Word2Vec.load(model_path)
@st.cache_data
def get_faiss_index():
return faiss.read_index("faiss_index.bin")
# --------------------------------------------------------------
# Placeholder: You will plug your search code here.
# Should return a list of paper dicts with:
# { "title": ..., "authors": ..., "abstract": ..., "url": ... }
# --------------------------------------------------------------
def run_semantic_search(query, top_k):
# ---- Replace with your search logic ----
# Example dummy results:
return [
{
"title": "Example Paper Title",
"authors": "John Doe, Jane Smith",
"abstract": "This is a sample abstract describing the research paper...",
"url": "https://arxiv.org/abs/1234.5678"
}
] * top_k
# ----------------------------------
# Streamlit Page Setup
# ----------------------------------
st.set_page_config(page_title="ArXiv Semantic Search", layout="wide")
st.title("πŸ”Ž ArXiv Semantic Search Engine")
st.write("Search over millions of research papers using semantic similarity.")
# Sidebar
st.sidebar.header("βš™οΈ Search Options")
top_k = st.sidebar.slider("Top K Results", 5, 50, 10)
# Main Search Bar
query = st.text_input(
"Enter your search query:",
placeholder="e.g. diffusion models for text-to-image, graph neural networks, LLM alignment..."
)
search_button = st.button("Search")
# --------------------------------------------------------------
# Handle search click
# --------------------------------------------------------------
if search_button and query.strip():
with st.spinner("Searching... πŸš€"):
results = run_semantic_search(query, top_k)
st.subheader(f"Top {top_k} Results")
# ----------------------------------------------------------
# Display results (card-style)
# ----------------------------------------------------------
for i, paper in enumerate(results, start=1):
st.markdown(f"### **{i}. {paper['title']}**")
st.markdown(f"**Authors:** {paper['authors']}")
st.markdown(f"[πŸ”— View on arXiv]({paper['url']})")
with st.expander("Abstract Preview"):
st.write(paper["abstract"][:600] + "...")
st.markdown("---")