Spaces:

Brainiac77
/

Paper-Scholar

Build error

App Files Files Community

BRAINIAC2677 commited on Jan 19, 2025

Commit

e1eba48

1 Parent(s): 39a70a4

v1

Browse files

Files changed (6) hide show

Pipfile +19 -0
Pipfile.lock +0 -0
README.md +13 -13
app/__init__.py +0 -0
app/search.py +70 -0
main.py +38 -0

Pipfile ADDED Viewed

	@@ -0,0 +1,19 @@

+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+[packages]
+streamlit = "*"
+langchain = "*"
+ollama = "*"
+llama-index = "*"
+tiktoken = "*"
+faiss-cpu = "*"
+arxiv = "*"
+[dev-packages]
+[requires]
+python_version = "3.12"
+python_full_version = "3.12.1"

Pipfile.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md CHANGED Viewed

@@ -1,14 +1,14 @@
----
-title: Paper Scholar
-emoji: 📊
-colorFrom: purple
-colorTo: yellow
-sdk: streamlit
-sdk_version: 1.41.1
-app_file: app.py
-pinned: false
-license: apache-2.0
-short_description: Paper Scholar is a research paper search and analysis tool.
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Paper Scholar
+Paper Scholar is a research paper search and analysis tool that integrates open-source LLMs for document understanding and querying.
+## Features
+- Search for research papers from arXiv or Google Scholar.
+- Chatbox to query specific papers.
+- Dark-themed UI with yellow highlights.
+## Installation
+1. Clone the repository and open it in a GitHub Codespace.
+2. Install dependencies:
+   ```bash
+   pipenv install

app/__init__.py ADDED Viewed

File without changes

app/search.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import arxiv
+import faiss
+from langchain.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.docstore.in_memory import InMemoryDocstore
+def fetch_papers(query, max_results=60):
+    search = arxiv.Search(
+        query=query,
+        max_results=max_results,
+        sort_by=arxiv.SortCriterion.Relevance
+    )
+    papers = []
+    for result in search.results():
+        papers.append({
+            "title": result.title,
+            "summary": result.summary,
+            "url": result.entry_id
+        })
+    return papers
+# Initialize embeddings and FAISS vector store
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))
+vector_store = FAISS(
+    embedding_function=embeddings,
+    index=index,
+    docstore=InMemoryDocstore(),
+    index_to_docstore_id={},
+)
+def index_papers(papers, vector_store=vector_store):
+    new_papers = []
+    for paper in papers:
+        # Check if a document with the same URL already exists
+        existing_docs = vector_store.similarity_search_with_score(
+            query="",  # You'll need to provide a query here
+            n_results=1,
+            filter={"url": paper["url"]}
+        )
+        if not existing_docs:
+            new_papers.append(paper)
+    if new_papers:
+        documents = [
+            {"text": paper["summary"], "metadata": {"title": paper["title"], "url": paper["url"]}}
+            for paper in new_papers
+        ]
+        vector_store.add_texts(
+            texts=[doc["text"] for doc in documents],
+            metadatas=[doc["metadata"] for doc in documents]
+        )
+    return vector_store
+def search_papers(query, vector_store, top_k=5):
+    results = vector_store.similarity_search(query, k=top_k)
+    return [{"title": result.metadata["title"], "summary": result.page_content, "url": result.metadata["url"]} for result in results]

main.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import streamlit as st
+from app.search import fetch_papers, index_papers, search_papers, vector_store
+# Set page configuration
+st.set_page_config(
+    page_title="Paper Scholar",
+    page_icon=":page_with_curl:",
+    layout="centered",
+    initial_sidebar_state="expanded"
+)
+st.title(":page_with_curl: Paper Scholar")
+# User control for number of shown papers
+n_shown_paper = st.slider("Number of papers to display:", min_value=1, max_value=20, value=5, step=1)
+search_multiplier = 5
+top_k = n_shown_paper
+max_results = search_multiplier * top_k
+# Search bar for papers
+query = st.text_input("Search for research papers:")
+if query:
+    with st.spinner("Fetching and indexing papers..."):
+        papers = fetch_papers(query, max_results=max_results)
+        vector_store = index_papers(papers)
+        results = search_papers(query, vector_store, top_k=top_k)
+    st.subheader("Search Results")
+    for result in results:
+        # Display title with a link to the full paper
+        st.markdown(f"### [{result['title']}]({result['url']})")
+        # Foldable summary using expander
+        with st.expander("View Summary"):
+            st.write(result['summary'])
+        st.markdown("---")