import streamlit as st import requests from datasets import load_dataset st.set_page_config(page_title="PWC Explorer", layout="wide") # 1. Initialize Session States if 'current_view' not in st.session_state: st.session_state.current_view = "trending" if 'page_number' not in st.session_state: st.session_state.page_number = 0 # --- 2. Top Navigation Bar --- nav_container = st.container() with nav_container: # We added a 6th column to balance the layout col1, col2, col3, col4, col5, col6 = st.columns([1, 1, 1, 1, 1, 1]) with col1: st.markdown("### 📚 PWC") with col2: if st.button("🔥 Trending", use_container_width=True): st.session_state.current_view = "trending" st.session_state.page_number = 0 # Reset page when switching st.rerun() with col3: if st.button("🔍 Search", use_container_width=True): st.session_state.current_view = "search" st.session_state.page_number = 0 st.rerun() with col4: if st.button("🏆 Browse SOTA", use_container_width=True): st.session_state.current_view = "sota" st.rerun() with col5: if st.button("📊 Datasets", use_container_width=True): st.session_state.current_view = "datasets" st.rerun() with col6: if st.button("🧩 Methods", use_container_width=True): st.session_state.current_view = "methods" st.rerun() st.divider() # --- 3. View Logic --- # --- VIEW: TRENDING --- if st.session_state.current_view == "trending": st.subheader("🔥 Trending Research Papers") # Pagination UI PAGE_SIZE = 10 c1, c2, c3 = st.columns([1, 2, 1]) with c1: if st.button("⬅️ Previous", disabled=(st.session_state.page_number == 0)): st.session_state.page_number -= 1 st.rerun() # Refresh to update data with c2: st.write(f"Center: Page {st.session_state.page_number + 1}") with c3: if st.button("Next ➡️"): st.session_state.page_number += 1 st.rerun() # Data Fetching offset = st.session_state.page_number * PAGE_SIZE with st.spinner("Loading papers..."): dataset = load_dataset("pwc-archive/papers-with-abstracts", split="train", streaming=True) papers = list(dataset.skip(offset).take(PAGE_SIZE)) # Display Papers for p in papers: with st.container(): st.markdown(f"### {p['title']}") st.write(p['abstract'][:500] + "...") if p.get('arxiv_id'): st.link_button("View on ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}") st.divider() # --- VIEW: SEARCH (The New Section) --- elif st.session_state.current_view == "search": st.subheader("🔍 Global Archive Search") st.info("Search through the entire 1GB archive of 500,000+ papers.") query = st.text_input("Enter keywords (e.g., 'Diffusion Models', 'LLM')", key="global_search") if query: # Important: Added &config=default search_url = "https://datasets-server.huggingface.co/search" params = { "dataset": "pwc-archive/papers-with-abstracts", "config": "default", # <--- This fixes your error "split": "train", "query": query, "offset": st.session_state.page_number * 10, "length": 10 } with st.spinner("Searching..."): response = requests.get(search_url, params=params) if response.status_code == 200: data = response.json() papers = [item['row'] for item in data['rows']] if not papers: st.warning("No matches found.") for p in papers: with st.expander(p['title']): st.write(p['abstract'][:500] + "...") if p.get('arxiv_id'): st.link_button("ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}") else: st.error("The search service is currently unavailable.") else: st.write("Type something above to begin your search.") # --- VIEW: SOTA --- elif st.session_state.current_view == "sota": st.title("🏆 State-of-the-Art") st.info("Leaderboards coming soon! This page will show rankings for different ML tasks.") if st.button("Back to Trending"): st.session_state.current_view = "trending" st.rerun() else: st.title("Under Construction") st.write(f"The **{st.session_state.current_view}** section is currently being indexed.")