research_assistant / src /streamlit_app.py
Sam-Oliveira
Change TAB 1 retrieval logic
792575c
"""
Streamlit app for the research assistant.
- Search: search arXiv by field
- Digest: generate a digest of the latest papers
- Ideate: generate project ideas
"""
import pathlib, tempfile
import os
# set up for Hugging Face Spaces
CACHE_DIR = pathlib.Path(tempfile.gettempdir()) / "hf_cache"
os.environ["XDG_CACHE_HOME"] = str(pathlib.Path(tempfile.gettempdir()) / "hf_cache")
for var in (
"HF_HOME",
"HF_HUB_CACHE",
"TRANSFORMERS_CACHE",
"SENTENCE_TRANSFORMERS_HOME",
"TRITON_CACHE_DIR",
):
os.environ[var] = str(CACHE_DIR)
import streamlit as st
from datetime import date
from config import MAX_RESULTS
from scrape import scrape
from digest import build_html
from ideate import ideate_from_topic, ideate_from_ids
from helpers import render_rows, rows_by_tag
from db import get_conn
from summarise import summarise_by_tag
st.set_page_config(page_title="Research Assistant", layout="wide")
tab1, tab2, tab3 = st.tabs(["πŸ” Search", "πŸ“‘ Digest", "πŸ’‘ Ideate"])
with tab1:
st.header("Search for papers you have not yet read")
c1, c2, c3, c4 = st.columns(4)
topic = c1.text_input("Topic")
title = c2.text_input("Title")
author = c3.text_input("Author")
category = c4.text_input("Category (e.g. cs.CL)")
k = st.slider("Max papers", 5, 50, 25)
if st.button("Run search"):
with st.spinner("Finding new papers for your search..."):
search_results = scrape(max_results=k, topic=topic, title=title,
author=author, category=category)
if search_results:
st.success(f"Found {len(search_results)} new papers for your search!")
# Convert search results to the format expected by render_rows
paper_rows = [(p['title'], p['authors'], p['abstract'], p['published'])
for p in search_results]
st.components.v1.html(render_rows(paper_rows), height=600, scrolling=True)
else:
st.info("No new papers found for this search. All recent papers on this topic are already in your database.")
with tab2:
st.header("Get a digest from the latest papers you have previously scraped")
d_topic = st.text_input("Keyword to match tags", value="large language")
if st.button("Generate digest"):
with st.spinner("Finding papers and summarising them..."):
summarise_by_tag(d_topic)
rows = rows_by_tag(d_topic, MAX_RESULTS)
if not rows:
st.info("No papers found; try the Search tab.")
else:
st.components.v1.html(render_rows(rows), height=800, scrolling=True)
with tab3:
st.header("Brainstorm new research ideas based on previously scraped papers")
mode = st.radio("Context source", ["Keyword", "ArXiv IDs"])
if mode == "Keyword":
kw = st.text_input("Keyword")
if st.button("Ideate"):
with st.spinner("Thinking of new ideas..."):
ideas = ideate_from_topic(kw)
if ideas is None:
st.info("No papers in the database match that keyword. "
"Try running a search in the **Search** tab first.")
else:
st.markdown(f"```\n{ideas}\n```")
else:
ids_in = st.text_area("Comma-separated IDs",
placeholder="2406.01234,2405.01234")
if st.button("Ideate"):
with st.spinner("Thinking of new ideas..."):
ids = [x.strip() for x in ids_in.split(",") if x.strip()]
ideas = ideate_from_ids(ids)
if ideas is None:
st.info("Those IDs aren't in the database yet. "
"Fetch them via the Search tab, then try again.")
else:
st.markdown(f"```\n{ideas}\n```")