Spaces:
Sleeping
Sleeping
File size: 3,825 Bytes
04041cc 0b51cd7 04041cc 0b51cd7 04041cc 4bb959f 7d73f05 f95907e 4683184 a3643f7 4683184 d0a0b7a 4683184 0b51cd7 e469e5f 022682f 0a40ba4 0b51cd7 e469e5f 0b51cd7 792575c 0b51cd7 792575c 0b51cd7 fd2e156 792575c fd2e156 792575c fd2e156 792575c 0b51cd7 b662fa8 0b51cd7 e469e5f 0b51cd7 b9e7b32 8458d3b b9e7b32 0b51cd7 e469e5f 0b51cd7 b9e7b32 0b51cd7 b9e7b32 0b51cd7 fd2e156 792575c 0b51cd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
"""
Streamlit app for the research assistant.
- Search: search arXiv by field
- Digest: generate a digest of the latest papers
- Ideate: generate project ideas
"""
import pathlib, tempfile
import os
# set up for Hugging Face Spaces
CACHE_DIR = pathlib.Path(tempfile.gettempdir()) / "hf_cache"
os.environ["XDG_CACHE_HOME"] = str(pathlib.Path(tempfile.gettempdir()) / "hf_cache")
for var in (
"HF_HOME",
"HF_HUB_CACHE",
"TRANSFORMERS_CACHE",
"SENTENCE_TRANSFORMERS_HOME",
"TRITON_CACHE_DIR",
):
os.environ[var] = str(CACHE_DIR)
import streamlit as st
from datetime import date
from config import MAX_RESULTS
from scrape import scrape
from digest import build_html
from ideate import ideate_from_topic, ideate_from_ids
from helpers import render_rows, rows_by_tag
from db import get_conn
from summarise import summarise_by_tag
st.set_page_config(page_title="Research Assistant", layout="wide")
tab1, tab2, tab3 = st.tabs(["π Search", "π Digest", "π‘ Ideate"])
with tab1:
st.header("Search for papers you have not yet read")
c1, c2, c3, c4 = st.columns(4)
topic = c1.text_input("Topic")
title = c2.text_input("Title")
author = c3.text_input("Author")
category = c4.text_input("Category (e.g. cs.CL)")
k = st.slider("Max papers", 5, 50, 25)
if st.button("Run search"):
with st.spinner("Finding new papers for your search..."):
search_results = scrape(max_results=k, topic=topic, title=title,
author=author, category=category)
if search_results:
st.success(f"Found {len(search_results)} new papers for your search!")
# Convert search results to the format expected by render_rows
paper_rows = [(p['title'], p['authors'], p['abstract'], p['published'])
for p in search_results]
st.components.v1.html(render_rows(paper_rows), height=600, scrolling=True)
else:
st.info("No new papers found for this search. All recent papers on this topic are already in your database.")
with tab2:
st.header("Get a digest from the latest papers you have previously scraped")
d_topic = st.text_input("Keyword to match tags", value="large language")
if st.button("Generate digest"):
with st.spinner("Finding papers and summarising them..."):
summarise_by_tag(d_topic)
rows = rows_by_tag(d_topic, MAX_RESULTS)
if not rows:
st.info("No papers found; try the Search tab.")
else:
st.components.v1.html(render_rows(rows), height=800, scrolling=True)
with tab3:
st.header("Brainstorm new research ideas based on previously scraped papers")
mode = st.radio("Context source", ["Keyword", "ArXiv IDs"])
if mode == "Keyword":
kw = st.text_input("Keyword")
if st.button("Ideate"):
with st.spinner("Thinking of new ideas..."):
ideas = ideate_from_topic(kw)
if ideas is None:
st.info("No papers in the database match that keyword. "
"Try running a search in the **Search** tab first.")
else:
st.markdown(f"```\n{ideas}\n```")
else:
ids_in = st.text_area("Comma-separated IDs",
placeholder="2406.01234,2405.01234")
if st.button("Ideate"):
with st.spinner("Thinking of new ideas..."):
ids = [x.strip() for x in ids_in.split(",") if x.strip()]
ideas = ideate_from_ids(ids)
if ideas is None:
st.info("Those IDs aren't in the database yet. "
"Fetch them via the Search tab, then try again.")
else:
st.markdown(f"```\n{ideas}\n```")
|