File size: 3,825 Bytes
04041cc
0b51cd7
04041cc
0b51cd7
 
 
04041cc
 
4bb959f
7d73f05
f95907e
4683184
a3643f7
4683184
 
 
 
 
d0a0b7a
4683184
 
 
0b51cd7
 
 
 
 
 
 
 
e469e5f
022682f
0a40ba4
0b51cd7
e469e5f
0b51cd7
 
 
 
792575c
0b51cd7
 
 
 
 
 
 
792575c
 
0b51cd7
fd2e156
792575c
 
 
fd2e156
792575c
fd2e156
 
792575c
0b51cd7
b662fa8
0b51cd7
e469e5f
0b51cd7
 
b9e7b32
 
8458d3b
b9e7b32
 
 
0b51cd7
 
 
e469e5f
0b51cd7
 
 
 
 
b9e7b32
 
0b51cd7
 
 
 
 
 
 
 
 
 
b9e7b32
 
 
0b51cd7
fd2e156
792575c
0b51cd7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
Streamlit app for the research assistant.

- Search: search arXiv by field
- Digest: generate a digest of the latest papers
- Ideate: generate project ideas

"""
import pathlib, tempfile
import os
# set up for Hugging Face Spaces
CACHE_DIR = pathlib.Path(tempfile.gettempdir()) / "hf_cache"
os.environ["XDG_CACHE_HOME"] = str(pathlib.Path(tempfile.gettempdir()) / "hf_cache")
for var in (
    "HF_HOME",
    "HF_HUB_CACHE",
    "TRANSFORMERS_CACHE",
    "SENTENCE_TRANSFORMERS_HOME",
    "TRITON_CACHE_DIR",
):
    os.environ[var] = str(CACHE_DIR)
    
import streamlit as st
from datetime import date
from config     import MAX_RESULTS
from scrape     import scrape
from digest     import build_html
from ideate     import ideate_from_topic, ideate_from_ids
from helpers    import render_rows, rows_by_tag
from db         import get_conn
from summarise     import summarise_by_tag



st.set_page_config(page_title="Research Assistant", layout="wide")
tab1, tab2, tab3 = st.tabs(["πŸ” Search", "πŸ“‘ Digest", "πŸ’‘ Ideate"])


with tab1:
    st.header("Search for papers you have not yet read")
    c1, c2, c3, c4 = st.columns(4)
    topic    = c1.text_input("Topic")
    title    = c2.text_input("Title")
    author   = c3.text_input("Author")
    category = c4.text_input("Category (e.g. cs.CL)")
    k = st.slider("Max papers", 5, 50, 25)
    if st.button("Run search"):
        with st.spinner("Finding new papers for your search..."):
            search_results = scrape(max_results=k, topic=topic, title=title,
               author=author, category=category)
        
        if search_results:
            st.success(f"Found {len(search_results)} new papers for your search!")
            # Convert search results to the format expected by render_rows
            paper_rows = [(p['title'], p['authors'], p['abstract'], p['published']) 
                         for p in search_results]
            st.components.v1.html(render_rows(paper_rows), height=600, scrolling=True)
        else:
            st.info("No new papers found for this search. All recent papers on this topic are already in your database.")


with tab2:
    st.header("Get a digest from the latest papers you have previously scraped")
    d_topic = st.text_input("Keyword to match tags", value="large language")
    if st.button("Generate digest"):
        with st.spinner("Finding papers and summarising them..."):
            summarise_by_tag(d_topic)
    rows = rows_by_tag(d_topic, MAX_RESULTS)
    if not rows:
        st.info("No papers found; try the Search tab.")
    else:
        st.components.v1.html(render_rows(rows), height=800, scrolling=True)

with tab3:
    st.header("Brainstorm new research ideas based on previously scraped papers")
    mode = st.radio("Context source", ["Keyword", "ArXiv IDs"])

    if mode == "Keyword":
        kw = st.text_input("Keyword")
        if st.button("Ideate"):
            with st.spinner("Thinking of new ideas..."):
                ideas = ideate_from_topic(kw)
            if ideas is None:
                st.info("No papers in the database match that keyword. "
                        "Try running a search in the **Search** tab first.")
            else:
                st.markdown(f"```\n{ideas}\n```")

    else:
        ids_in = st.text_area("Comma-separated IDs",
                              placeholder="2406.01234,2405.01234")
        if st.button("Ideate"):
            with st.spinner("Thinking of new ideas..."):
                ids   = [x.strip() for x in ids_in.split(",") if x.strip()]
                ideas = ideate_from_ids(ids)
            if ideas is None:
                st.info("Those IDs aren't in the database yet. "
                        "Fetch them via the Search tab, then try again.")
            else:
                st.markdown(f"```\n{ideas}\n```")