Spaces:

QuantumLearner
/

Space24

Sleeping

App Files Files Community

cmagganas commited on Jun 10, 2023

Commit

90b2c39

0 Parent(s):

initial commit

Browse files

Files changed (7) hide show

.chainlit/.langchain.db +0 -0
.chainlit/config.toml +29 -0
.gitignore +4 -0
Dockerfile +11 -0
app.py +102 -0
chainlit.md +14 -0
requirements.txt +7 -0

.chainlit/.langchain.db ADDED Viewed

Binary file (12.3 kB). View file

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,29 @@

+[project]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# If true (default), the app will be available to anonymous users (once deployed).
+# If false, users will need to authenticate and be part of the project to use the app.
+public = true
+# The project ID (found on https://cloud.chainlit.io).
+# If provided, all the message data will be stored in the cloud.
+# The project ID is required when public is set to false.
+#id = ""
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Limit the number of requests per user.
+#request_limit = "10 per day"

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.env
+.vscode
+.chroma
+__pycache__

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+CMD ["chainlit", "run", "app.py", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.document_loaders import PyMuPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.chains import RetrievalQAWithSourcesChain
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+import os
+import arxiv
+import chainlit as cl
+from chainlit import user_session
+@cl.langchain_factory
+def init():
+    arxiv_query = None
+    # Wait for the user to ask an Arxiv question
+    while arxiv_query == None:
+        arxiv_query = cl.AskUserMessage(
+            content="Please enter a topic to begin!", timeout=15
+        ).send()
+    # Obtain the top 30 results from Arxiv for the query
+    search = arxiv.Search(
+        query=arxiv_query["content"],
+        max_results=30,
+        sort_by=arxiv.SortCriterion.Relevance,
+    )
+    # download each of the pdfs
+    pdf_data = []
+    for result in search.results():
+        loader = PyMuPDFLoader(result.pdf_url)
+        loaded_pdf = loader.load()
+        for document in loaded_pdf:
+            document.metadata["source"] = result.entry_id
+            document.metadata["file_path"] = result.pdf_url
+            document.metadata["title"] = result.title
+            pdf_data.append(document)
+    # Create a Chroma vector store
+    embeddings = OpenAIEmbeddings(
+        disallowed_special=(),
+    )
+    docsearch = Chroma.from_documents(pdf_data, embeddings)
+    # Create a chain that uses the Chroma vector store
+    chain = RetrievalQAWithSourcesChain.from_chain_type(
+        ChatOpenAI(
+            model_name="gpt-4",
+            temperature=0,
+        ),
+        chain_type="stuff",
+        retriever=docsearch.as_retriever(),
+        return_source_documents=True,
+    )
+    # Let the user know that the system is ready
+    cl.Message(
+        content=f"We found a few papers about `{arxiv_query['content']}` you can now ask questions!"
+    ).send()
+    return chain
+@cl.langchain_postprocess
+def process_response(res):
+    answer = res["answer"]
+    source_elements_dict = {}
+    source_elements = []
+    for idx, source in enumerate(res["source_documents"]):
+        title = source.metadata["title"]
+        if title not in source_elements_dict:
+            source_elements_dict[title] = {
+                "page_number": [source.metadata["page"]],
+                "url": source.metadata["file_path"],
+            }
+        else:
+            source_elements_dict[title]["page_number"].append(source.metadata["page"])
+        # sort the page numbers
+        source_elements_dict[title]["page_number"].sort()
+    for title, source in source_elements_dict.items():
+        # create a string for the page numbers
+        page_numbers = ", ".join([str(x) for x in source["page_number"]])
+        text_for_source = f"Page Number(s): {page_numbers}\nURL: {source['url']}"
+        source_elements.append(
+            cl.Text(name=title, text=text_for_source, display="inline")
+        )
+    cl.Message(content=answer, elements=source_elements).send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# Welcome to Chainlit! 🚀🤖
+Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
+## Useful Links 🔗
+- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
+- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/ZThrUxbAYw) to ask questions, share your projects, and connect with other developers! 💬
+We can't wait to see what you create with Chainlit! Happy coding! 💻😊
+## Welcome screen
+To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+arxiv==1.4.7
+langchain==0.0.193
+chainlit
+openai
+chromadb
+tiktoken
+pymupdf