cmagganas commited on
Commit
90b2c39
Β·
0 Parent(s):

initial commit

Browse files
Files changed (7) hide show
  1. .chainlit/.langchain.db +0 -0
  2. .chainlit/config.toml +29 -0
  3. .gitignore +4 -0
  4. Dockerfile +11 -0
  5. app.py +102 -0
  6. chainlit.md +14 -0
  7. requirements.txt +7 -0
.chainlit/.langchain.db ADDED
Binary file (12.3 kB). View file
 
.chainlit/config.toml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Name of the app and chatbot.
3
+ name = "Chatbot"
4
+ # Description of the app and chatbot. This is used for HTML tags.
5
+ # description = ""
6
+
7
+ # If true (default), the app will be available to anonymous users (once deployed).
8
+ # If false, users will need to authenticate and be part of the project to use the app.
9
+ public = true
10
+
11
+ # The project ID (found on https://cloud.chainlit.io).
12
+ # If provided, all the message data will be stored in the cloud.
13
+ # The project ID is required when public is set to false.
14
+ #id = ""
15
+
16
+ # Whether to enable telemetry (default: true). No personal data is collected.
17
+ enable_telemetry = true
18
+
19
+ # List of environment variables to be provided by each user to use the app.
20
+ user_env = []
21
+
22
+ # Hide the chain of thought details from the user in the UI.
23
+ hide_cot = false
24
+
25
+ # Link to your github repo. This will add a github button in the UI's header.
26
+ # github = ""
27
+
28
+ # Limit the number of requests per user.
29
+ #request_limit = "10 per day"
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ .vscode
3
+ .chroma
4
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings.openai import OpenAIEmbeddings
2
+ from langchain.document_loaders import PyMuPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.chains import RetrievalQAWithSourcesChain
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.prompts.chat import (
8
+ ChatPromptTemplate,
9
+ SystemMessagePromptTemplate,
10
+ HumanMessagePromptTemplate,
11
+ )
12
+ import os
13
+ import arxiv
14
+ import chainlit as cl
15
+ from chainlit import user_session
16
+
17
+ @cl.langchain_factory
18
+ def init():
19
+ arxiv_query = None
20
+
21
+ # Wait for the user to ask an Arxiv question
22
+ while arxiv_query == None:
23
+ arxiv_query = cl.AskUserMessage(
24
+ content="Please enter a topic to begin!", timeout=15
25
+ ).send()
26
+
27
+ # Obtain the top 30 results from Arxiv for the query
28
+ search = arxiv.Search(
29
+ query=arxiv_query["content"],
30
+ max_results=30,
31
+ sort_by=arxiv.SortCriterion.Relevance,
32
+ )
33
+
34
+ # download each of the pdfs
35
+ pdf_data = []
36
+
37
+ for result in search.results():
38
+ loader = PyMuPDFLoader(result.pdf_url)
39
+ loaded_pdf = loader.load()
40
+
41
+ for document in loaded_pdf:
42
+ document.metadata["source"] = result.entry_id
43
+ document.metadata["file_path"] = result.pdf_url
44
+ document.metadata["title"] = result.title
45
+ pdf_data.append(document)
46
+
47
+ # Create a Chroma vector store
48
+ embeddings = OpenAIEmbeddings(
49
+ disallowed_special=(),
50
+ )
51
+ docsearch = Chroma.from_documents(pdf_data, embeddings)
52
+
53
+ # Create a chain that uses the Chroma vector store
54
+ chain = RetrievalQAWithSourcesChain.from_chain_type(
55
+ ChatOpenAI(
56
+ model_name="gpt-4",
57
+ temperature=0,
58
+ ),
59
+ chain_type="stuff",
60
+ retriever=docsearch.as_retriever(),
61
+ return_source_documents=True,
62
+ )
63
+
64
+ # Let the user know that the system is ready
65
+ cl.Message(
66
+ content=f"We found a few papers about `{arxiv_query['content']}` you can now ask questions!"
67
+ ).send()
68
+
69
+ return chain
70
+
71
+
72
+ @cl.langchain_postprocess
73
+ def process_response(res):
74
+ answer = res["answer"]
75
+ source_elements_dict = {}
76
+ source_elements = []
77
+ for idx, source in enumerate(res["source_documents"]):
78
+ title = source.metadata["title"]
79
+
80
+ if title not in source_elements_dict:
81
+ source_elements_dict[title] = {
82
+ "page_number": [source.metadata["page"]],
83
+ "url": source.metadata["file_path"],
84
+ }
85
+
86
+ else:
87
+ source_elements_dict[title]["page_number"].append(source.metadata["page"])
88
+
89
+ # sort the page numbers
90
+ source_elements_dict[title]["page_number"].sort()
91
+
92
+ for title, source in source_elements_dict.items():
93
+ # create a string for the page numbers
94
+ page_numbers = ", ".join([str(x) for x in source["page_number"]])
95
+ text_for_source = f"Page Number(s): {page_numbers}\nURL: {source['url']}"
96
+ source_elements.append(
97
+ cl.Text(name=title, text=text_for_source, display="inline")
98
+ )
99
+
100
+ cl.Message(content=answer, elements=source_elements).send()
101
+
102
+
chainlit.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to Chainlit! πŸš€πŸ€–
2
+
3
+ Hi there, Developer! πŸ‘‹ We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4
+
5
+ ## Useful Links πŸ”—
6
+
7
+ - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) πŸ“š
8
+ - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/ZThrUxbAYw) to ask questions, share your projects, and connect with other developers! πŸ’¬
9
+
10
+ We can't wait to see what you create with Chainlit! Happy coding! πŸ’»πŸ˜Š
11
+
12
+ ## Welcome screen
13
+
14
+ To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ arxiv==1.4.7
2
+ langchain==0.0.193
3
+ chainlit
4
+ openai
5
+ chromadb
6
+ tiktoken
7
+ pymupdf