Spaces:

ale93111
/

pwc

Sleeping

App Files Files Community

pwc / src /streamlit_app.py

ale93111

update

874a388 7 days ago

raw

history blame contribute delete

4.74 kB

	import streamlit as st
	import requests
	from datasets import load_dataset

	st.set_page_config(page_title="PWC Explorer", layout="wide")

	# 1. Initialize Session States
	if 'current_view' not in st.session_state:
	st.session_state.current_view = "trending"
	if 'page_number' not in st.session_state:
	st.session_state.page_number = 0

	# --- 2. Top Navigation Bar ---
	nav_container = st.container()
	with nav_container:
	# We added a 6th column to balance the layout
	col1, col2, col3, col4, col5, col6 = st.columns([1, 1, 1, 1, 1, 1])

	with col1:
	st.markdown("### 📚 PWC")
	with col2:
	if st.button("🔥 Trending", use_container_width=True):
	st.session_state.current_view = "trending"
	st.session_state.page_number = 0 # Reset page when switching
	st.rerun()
	with col3:
	if st.button("🔍 Search", use_container_width=True):
	st.session_state.current_view = "search"
	st.session_state.page_number = 0
	st.rerun()
	with col4:
	if st.button("🏆 Browse SOTA", use_container_width=True):
	st.session_state.current_view = "sota"
	st.rerun()
	with col5:
	if st.button("📊 Datasets", use_container_width=True):
	st.session_state.current_view = "datasets"
	st.rerun()
	with col6:
	if st.button("🧩 Methods", use_container_width=True):
	st.session_state.current_view = "methods"
	st.rerun()

	st.divider()

	# --- 3. View Logic ---

	# --- VIEW: TRENDING ---
	if st.session_state.current_view == "trending":
	st.subheader("🔥 Trending Research Papers")

	# Pagination UI
	PAGE_SIZE = 10
	c1, c2, c3 = st.columns([1, 2, 1])

	with c1:
	if st.button("⬅️ Previous", disabled=(st.session_state.page_number == 0)):
	st.session_state.page_number -= 1
	st.rerun() # Refresh to update data
	with c2:
	st.write(f"Center: Page {st.session_state.page_number + 1}")
	with c3:
	if st.button("Next ➡️"):
	st.session_state.page_number += 1
	st.rerun()

	# Data Fetching
	offset = st.session_state.page_number * PAGE_SIZE
	with st.spinner("Loading papers..."):
	dataset = load_dataset("pwc-archive/papers-with-abstracts", split="train", streaming=True)
	papers = list(dataset.skip(offset).take(PAGE_SIZE))

	# Display Papers
	for p in papers:
	with st.container():
	st.markdown(f"### {p['title']}")
	st.write(p['abstract'][:500] + "...")
	if p.get('arxiv_id'):
	st.link_button("View on ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}")
	st.divider()

	# --- VIEW: SEARCH (The New Section) ---
	elif st.session_state.current_view == "search":
	st.subheader("🔍 Global Archive Search")
	st.info("Search through the entire 1GB archive of 500,000+ papers.")

	query = st.text_input("Enter keywords (e.g., 'Diffusion Models', 'LLM')", key="global_search")

	if query:
	# Important: Added &config=default
	search_url = "https://datasets-server.huggingface.co/search"
	params = {
	"dataset": "pwc-archive/papers-with-abstracts",
	"config": "default", # <--- This fixes your error
	"split": "train",
	"query": query,
	"offset": st.session_state.page_number * 10,
	"length": 10
	}

	with st.spinner("Searching..."):
	response = requests.get(search_url, params=params)
	if response.status_code == 200:
	data = response.json()
	papers = [item['row'] for item in data['rows']]

	if not papers:
	st.warning("No matches found.")

	for p in papers:
	with st.expander(p['title']):
	st.write(p['abstract'][:500] + "...")
	if p.get('arxiv_id'):
	st.link_button("ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}")
	else:
	st.error("The search service is currently unavailable.")
	else:
	st.write("Type something above to begin your search.")

	# --- VIEW: SOTA ---
	elif st.session_state.current_view == "sota":
	st.title("🏆 State-of-the-Art")
	st.info("Leaderboards coming soon! This page will show rankings for different ML tasks.")
	if st.button("Back to Trending"):
	st.session_state.current_view = "trending"
	st.rerun()

	else:
	st.title("Under Construction")
	st.write(f"The {st.session_state.current_view} section is currently being indexed.")