pwc / src /streamlit_app.py
ale93111's picture
update
874a388
import streamlit as st
import requests
from datasets import load_dataset
st.set_page_config(page_title="PWC Explorer", layout="wide")
# 1. Initialize Session States
if 'current_view' not in st.session_state:
st.session_state.current_view = "trending"
if 'page_number' not in st.session_state:
st.session_state.page_number = 0
# --- 2. Top Navigation Bar ---
nav_container = st.container()
with nav_container:
# We added a 6th column to balance the layout
col1, col2, col3, col4, col5, col6 = st.columns([1, 1, 1, 1, 1, 1])
with col1:
st.markdown("### πŸ“š PWC")
with col2:
if st.button("πŸ”₯ Trending", use_container_width=True):
st.session_state.current_view = "trending"
st.session_state.page_number = 0 # Reset page when switching
st.rerun()
with col3:
if st.button("πŸ” Search", use_container_width=True):
st.session_state.current_view = "search"
st.session_state.page_number = 0
st.rerun()
with col4:
if st.button("πŸ† Browse SOTA", use_container_width=True):
st.session_state.current_view = "sota"
st.rerun()
with col5:
if st.button("πŸ“Š Datasets", use_container_width=True):
st.session_state.current_view = "datasets"
st.rerun()
with col6:
if st.button("🧩 Methods", use_container_width=True):
st.session_state.current_view = "methods"
st.rerun()
st.divider()
# --- 3. View Logic ---
# --- VIEW: TRENDING ---
if st.session_state.current_view == "trending":
st.subheader("πŸ”₯ Trending Research Papers")
# Pagination UI
PAGE_SIZE = 10
c1, c2, c3 = st.columns([1, 2, 1])
with c1:
if st.button("⬅️ Previous", disabled=(st.session_state.page_number == 0)):
st.session_state.page_number -= 1
st.rerun() # Refresh to update data
with c2:
st.write(f"Center: Page {st.session_state.page_number + 1}")
with c3:
if st.button("Next ➑️"):
st.session_state.page_number += 1
st.rerun()
# Data Fetching
offset = st.session_state.page_number * PAGE_SIZE
with st.spinner("Loading papers..."):
dataset = load_dataset("pwc-archive/papers-with-abstracts", split="train", streaming=True)
papers = list(dataset.skip(offset).take(PAGE_SIZE))
# Display Papers
for p in papers:
with st.container():
st.markdown(f"### {p['title']}")
st.write(p['abstract'][:500] + "...")
if p.get('arxiv_id'):
st.link_button("View on ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}")
st.divider()
# --- VIEW: SEARCH (The New Section) ---
elif st.session_state.current_view == "search":
st.subheader("πŸ” Global Archive Search")
st.info("Search through the entire 1GB archive of 500,000+ papers.")
query = st.text_input("Enter keywords (e.g., 'Diffusion Models', 'LLM')", key="global_search")
if query:
# Important: Added &config=default
search_url = "https://datasets-server.huggingface.co/search"
params = {
"dataset": "pwc-archive/papers-with-abstracts",
"config": "default", # <--- This fixes your error
"split": "train",
"query": query,
"offset": st.session_state.page_number * 10,
"length": 10
}
with st.spinner("Searching..."):
response = requests.get(search_url, params=params)
if response.status_code == 200:
data = response.json()
papers = [item['row'] for item in data['rows']]
if not papers:
st.warning("No matches found.")
for p in papers:
with st.expander(p['title']):
st.write(p['abstract'][:500] + "...")
if p.get('arxiv_id'):
st.link_button("ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}")
else:
st.error("The search service is currently unavailable.")
else:
st.write("Type something above to begin your search.")
# --- VIEW: SOTA ---
elif st.session_state.current_view == "sota":
st.title("πŸ† State-of-the-Art")
st.info("Leaderboards coming soon! This page will show rankings for different ML tasks.")
if st.button("Back to Trending"):
st.session_state.current_view = "trending"
st.rerun()
else:
st.title("Under Construction")
st.write(f"The **{st.session_state.current_view}** section is currently being indexed.")