File size: 4,742 Bytes
e268701
3f8e6b7
5a3b6f5
e268701
3f8e6b7
e268701
5ecb57c
 
3f8e6b7
85f9682
 
e268701
3f8e6b7
5ecb57c
 
3f8e6b7
2d9884b
5ecb57c
 
 
 
 
 
3f8e6b7
 
5ecb57c
3f8e6b7
 
 
 
 
5ecb57c
 
3f8e6b7
 
5ecb57c
 
3f8e6b7
 
5ecb57c
 
3f8e6b7
5ecb57c
 
 
 
 
3f8e6b7
5ecb57c
3f8e6b7
 
5ecb57c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f8e6b7
 
 
 
 
 
 
 
 
874a388
 
 
 
 
 
 
 
 
 
 
3f8e6b7
874a388
3f8e6b7
 
 
 
 
 
 
 
 
874a388
3f8e6b7
 
 
 
 
 
5ecb57c
3f8e6b7
5ecb57c
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import streamlit as st
import requests
from datasets import load_dataset

st.set_page_config(page_title="PWC Explorer", layout="wide")

# 1. Initialize Session States
if 'current_view' not in st.session_state:
    st.session_state.current_view = "trending"
if 'page_number' not in st.session_state:
    st.session_state.page_number = 0

# --- 2. Top Navigation Bar ---
nav_container = st.container()
with nav_container:
    # We added a 6th column to balance the layout
    col1, col2, col3, col4, col5, col6 = st.columns([1, 1, 1, 1, 1, 1])
    
    with col1:
        st.markdown("### πŸ“š PWC")
    with col2:
        if st.button("πŸ”₯ Trending", use_container_width=True):
            st.session_state.current_view = "trending"
            st.session_state.page_number = 0 # Reset page when switching
            st.rerun()
    with col3:
        if st.button("πŸ” Search", use_container_width=True):
            st.session_state.current_view = "search"
            st.session_state.page_number = 0
            st.rerun()
    with col4:
        if st.button("πŸ† Browse SOTA", use_container_width=True):
            st.session_state.current_view = "sota"
            st.rerun()
    with col5:
        if st.button("πŸ“Š Datasets", use_container_width=True):
            st.session_state.current_view = "datasets"
            st.rerun()
    with col6:
        if st.button("🧩 Methods", use_container_width=True):
            st.session_state.current_view = "methods"
            st.rerun()

st.divider()

# --- 3. View Logic ---

# --- VIEW: TRENDING ---
if st.session_state.current_view == "trending":
    st.subheader("πŸ”₯ Trending Research Papers")

    # Pagination UI
    PAGE_SIZE = 10
    c1, c2, c3 = st.columns([1, 2, 1])
    
    with c1:
        if st.button("⬅️ Previous", disabled=(st.session_state.page_number == 0)):
            st.session_state.page_number -= 1
            st.rerun() # Refresh to update data
    with c2:
        st.write(f"Center: Page {st.session_state.page_number + 1}")
    with c3:
        if st.button("Next ➑️"):
            st.session_state.page_number += 1
            st.rerun()

    # Data Fetching
    offset = st.session_state.page_number * PAGE_SIZE
    with st.spinner("Loading papers..."):
        dataset = load_dataset("pwc-archive/papers-with-abstracts", split="train", streaming=True)
        papers = list(dataset.skip(offset).take(PAGE_SIZE))

    # Display Papers
    for p in papers:
        with st.container():
            st.markdown(f"### {p['title']}")
            st.write(p['abstract'][:500] + "...")
            if p.get('arxiv_id'):
                st.link_button("View on ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}")
            st.divider()
            
# --- VIEW: SEARCH (The New Section) ---
elif st.session_state.current_view == "search":
    st.subheader("πŸ” Global Archive Search")
    st.info("Search through the entire 1GB archive of 500,000+ papers.")
    
    query = st.text_input("Enter keywords (e.g., 'Diffusion Models', 'LLM')", key="global_search")
    
    if query:
        # Important: Added &config=default
        search_url = "https://datasets-server.huggingface.co/search"
        params = {
            "dataset": "pwc-archive/papers-with-abstracts",
            "config": "default", # <--- This fixes your error
            "split": "train",
            "query": query,
            "offset": st.session_state.page_number * 10,
            "length": 10
        }

        with st.spinner("Searching..."):
            response = requests.get(search_url, params=params)
            if response.status_code == 200:
                data = response.json()
                papers = [item['row'] for item in data['rows']]
                
                if not papers:
                    st.warning("No matches found.")
                
                for p in papers:
                    with st.expander(p['title']):
                        st.write(p['abstract'][:500] + "...")
                        if p.get('arxiv_id'):
                            st.link_button("ArXiv", f"https://arxiv.org/abs/{p['arxiv_id']}")
            else:
                st.error("The search service is currently unavailable.")
    else:
        st.write("Type something above to begin your search.")

# --- VIEW: SOTA ---
elif st.session_state.current_view == "sota":
    st.title("πŸ† State-of-the-Art")
    st.info("Leaderboards coming soon! This page will show rankings for different ML tasks.")
    if st.button("Back to Trending"):
        st.session_state.current_view = "trending"
        st.rerun()

else:
    st.title("Under Construction")
    st.write(f"The **{st.session_state.current_view}** section is currently being indexed.")