Spaces:

SamOliveira
/

research_assistant

Sleeping

research_assistant / src /streamlit_app.py

Sam-Oliveira

Change TAB 1 retrieval logic

792575c 8 months ago

3.83 kB

	"""
	Streamlit app for the research assistant.

	- Search: search arXiv by field
	- Digest: generate a digest of the latest papers
	- Ideate: generate project ideas

	"""
	import pathlib, tempfile
	import os
	# set up for Hugging Face Spaces
	CACHE_DIR = pathlib.Path(tempfile.gettempdir()) / "hf_cache"
	os.environ["XDG_CACHE_HOME"] = str(pathlib.Path(tempfile.gettempdir()) / "hf_cache")
	for var in (
	"HF_HOME",
	"HF_HUB_CACHE",
	"TRANSFORMERS_CACHE",
	"SENTENCE_TRANSFORMERS_HOME",
	"TRITON_CACHE_DIR",
	):
	os.environ[var] = str(CACHE_DIR)

	import streamlit as st
	from datetime import date
	from config import MAX_RESULTS
	from scrape import scrape
	from digest import build_html
	from ideate import ideate_from_topic, ideate_from_ids
	from helpers import render_rows, rows_by_tag
	from db import get_conn
	from summarise import summarise_by_tag



	st.set_page_config(page_title="Research Assistant", layout="wide")
	tab1, tab2, tab3 = st.tabs(["🔍 Search", "📑 Digest", "💡 Ideate"])


	with tab1:
	st.header("Search for papers you have not yet read")
	c1, c2, c3, c4 = st.columns(4)
	topic = c1.text_input("Topic")
	title = c2.text_input("Title")
	author = c3.text_input("Author")
	category = c4.text_input("Category (e.g. cs.CL)")
	k = st.slider("Max papers", 5, 50, 25)
	if st.button("Run search"):
	with st.spinner("Finding new papers for your search..."):
	search_results = scrape(max_results=k, topic=topic, title=title,
	author=author, category=category)

	if search_results:
	st.success(f"Found {len(search_results)} new papers for your search!")
	# Convert search results to the format expected by render_rows
	paper_rows = [(p['title'], p['authors'], p['abstract'], p['published'])
	for p in search_results]
	st.components.v1.html(render_rows(paper_rows), height=600, scrolling=True)
	else:
	st.info("No new papers found for this search. All recent papers on this topic are already in your database.")


	with tab2:
	st.header("Get a digest from the latest papers you have previously scraped")
	d_topic = st.text_input("Keyword to match tags", value="large language")
	if st.button("Generate digest"):
	with st.spinner("Finding papers and summarising them..."):
	summarise_by_tag(d_topic)
	rows = rows_by_tag(d_topic, MAX_RESULTS)
	if not rows:
	st.info("No papers found; try the Search tab.")
	else:
	st.components.v1.html(render_rows(rows), height=800, scrolling=True)

	with tab3:
	st.header("Brainstorm new research ideas based on previously scraped papers")
	mode = st.radio("Context source", ["Keyword", "ArXiv IDs"])

	if mode == "Keyword":
	kw = st.text_input("Keyword")
	if st.button("Ideate"):
	with st.spinner("Thinking of new ideas..."):
	ideas = ideate_from_topic(kw)
	if ideas is None:
	st.info("No papers in the database match that keyword. "
	"Try running a search in the Search tab first.")
	else:
	st.markdown(f"```\n{ideas}\n```")

	else:
	ids_in = st.text_area("Comma-separated IDs",
	placeholder="2406.01234,2405.01234")
	if st.button("Ideate"):
	with st.spinner("Thinking of new ideas..."):
	ids = [x.strip() for x in ids_in.split(",") if x.strip()]
	ideas = ideate_from_ids(ids)
	if ideas is None:
	st.info("Those IDs aren't in the database yet. "
	"Fetch them via the Search tab, then try again.")
	else:
	st.markdown(f"```\n{ideas}\n```")