|
|
import streamlit as st |
|
|
import requests |
|
|
from datasets import load_dataset |
|
|
|
|
|
st.set_page_config(page_title="PWC Explorer", layout="wide") |
|
|
|
|
|
|
|
|
if 'current_view' not in st.session_state: |
|
|
st.session_state.current_view = "trending" |
|
|
if 'page_number' not in st.session_state: |
|
|
st.session_state.page_number = 0 |
|
|
|
|
|
|
|
|
nav_container = st.container() |
|
|
with nav_container: |
|
|
|
|
|
col1, col2, col3, col4, col5, col6 = st.columns([1, 1, 1, 1, 1, 1]) |
|
|
|
|
|
with col1: |
|
|
st.markdown("### π PWC") |
|
|
with col2: |
|
|
if st.button("π₯ Trending", use_container_width=True): |
|
|
st.session_state.current_view = "trending" |
|
|
st.session_state.page_number = 0 |
|
|
st.rerun() |
|
|
with col3: |
|
|
if st.button("π Search", use_container_width=True): |
|
|
st.session_state.current_view = "search" |
|
|
st.session_state.page_number = 0 |
|
|
st.rerun() |
|
|
with col4: |
|
|
if st.button("π Browse SOTA", use_container_width=True): |
|
|
st.session_state.current_view = "sota" |
|
|
st.rerun() |
|
|
with col5: |
|
|
if st.button("π Datasets", use_container_width=True): |
|
|
st.session_state.current_view = "datasets" |
|
|
st.rerun() |
|
|
with col6: |
|
|
if st.button("π§© Methods", use_container_width=True): |
|
|
st.session_state.current_view = "methods" |
|
|
st.rerun() |
|
|
|
|
|
st.divider() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if st.session_state.current_view == "trending": |
|
|
st.subheader("π₯ Trending Research Papers") |
|
|
|
|
|
|
|
|
PAGE_SIZE = 10 |
|
|
c1, c2, c3 = st.columns([1, 2, 1]) |
|
|
|
|
|
with c1: |
|
|
if st.button("β¬
οΈ Previous", disabled=(st.session_state.page_number == 0)): |
|
|
st.session_state.page_number -= 1 |
|
|
st.rerun() |
|
|
with c2: |
|
|
st.write(f"Center: Page {st.session_state.page_number + 1}") |
|
|
with c3: |
|
|
if st.button("Next β‘οΈ"): |
|
|
st.session_state.page_number += 1 |
|
|
st.rerun() |
|
|
|
|
|
|
|
|
offset = st.session_state.page_number * PAGE_SIZE |
|
|
with st.spinner("Loading papers..."): |
|
|
dataset = load_dataset("pwc-archive/papers-with-abstracts", split="train", streaming=True) |
|
|
papers = list(dataset.skip(offset).take(PAGE_SIZE)) |
|
|
|
|
|
|
|
|
for p in papers: |
|
|
with st.container(): |
|
|
st.markdown(f"### {p['title']}") |
|
|
st.write(p['abstract'][:500] + "...") |
|
|
if p.get('arxiv_id'): |
|
|
st.link_button("View on ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}") |
|
|
st.divider() |
|
|
|
|
|
|
|
|
elif st.session_state.current_view == "search": |
|
|
st.subheader("π Global Archive Search") |
|
|
st.info("Search through the entire 1GB archive of 500,000+ papers.") |
|
|
|
|
|
query = st.text_input("Enter keywords (e.g., 'Diffusion Models', 'LLM')", key="global_search") |
|
|
|
|
|
if query: |
|
|
|
|
|
search_url = "https://datasets-server.huggingface.co/search" |
|
|
params = { |
|
|
"dataset": "pwc-archive/papers-with-abstracts", |
|
|
"config": "default", |
|
|
"split": "train", |
|
|
"query": query, |
|
|
"offset": st.session_state.page_number * 10, |
|
|
"length": 10 |
|
|
} |
|
|
|
|
|
with st.spinner("Searching..."): |
|
|
response = requests.get(search_url, params=params) |
|
|
if response.status_code == 200: |
|
|
data = response.json() |
|
|
papers = [item['row'] for item in data['rows']] |
|
|
|
|
|
if not papers: |
|
|
st.warning("No matches found.") |
|
|
|
|
|
for p in papers: |
|
|
with st.expander(p['title']): |
|
|
st.write(p['abstract'][:500] + "...") |
|
|
if p.get('arxiv_id'): |
|
|
st.link_button("ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}") |
|
|
else: |
|
|
st.error("The search service is currently unavailable.") |
|
|
else: |
|
|
st.write("Type something above to begin your search.") |
|
|
|
|
|
|
|
|
elif st.session_state.current_view == "sota": |
|
|
st.title("π State-of-the-Art") |
|
|
st.info("Leaderboards coming soon! This page will show rankings for different ML tasks.") |
|
|
if st.button("Back to Trending"): |
|
|
st.session_state.current_view = "trending" |
|
|
st.rerun() |
|
|
|
|
|
else: |
|
|
st.title("Under Construction") |
|
|
st.write(f"The **{st.session_state.current_view}** section is currently being indexed.") |
|
|
|