praneeth dodedu commited on
Commit
46b9702
·
1 Parent(s): 2992cdd
Files changed (3) hide show
  1. app-backup.py +0 -98
  2. app.py +86 -17
  3. ingest.py +29 -0
app-backup.py DELETED
@@ -1,98 +0,0 @@
1
- from langchain import PromptTemplate
2
- from langchain.embeddings import HuggingFaceEmbeddings
3
- from langchain.vectorstores import FAISS
4
- from langchain.llms import CTransformers
5
- from langchain.chains import RetrievalQA
6
- import chainlit as cl
7
-
8
- DB_FAISS_PATH = 'vectorstore/db_faiss'
9
-
10
- custom_prompt_template = """Use the following pieces of information to answer the user's question.
11
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
12
-
13
- Context: {context}
14
- Question: {question}
15
-
16
- Only return the helpful answer below and nothing else.
17
- Helpful answer:
18
- """
19
-
20
-
21
- def set_custom_prompt():
22
- """
23
- Prompt template for QA retrieval for each vectorstore
24
- """
25
- prompt = PromptTemplate(template=custom_prompt_template,
26
- input_variables=['context', 'question'])
27
- return prompt
28
-
29
-
30
- # Retrieval QA Chain
31
- def retrieval_qa_chain(llm, prompt, db):
32
- qa_chain = RetrievalQA.from_chain_type(llm=llm,
33
- chain_type='stuff',
34
- retriever=db.as_retriever(search_kwargs={'k': 2}),
35
- return_source_documents=True,
36
- chain_type_kwargs={'prompt': prompt}
37
- )
38
- return qa_chain
39
-
40
-
41
- # Loading the model
42
- def load_llm():
43
- # Load the locally downloaded model here
44
- llm = CTransformers(
45
- model="llama-2-7b-chat.ggmlv3.q8_0.bin",
46
- model_type="llama",
47
- max_new_tokens=512,
48
- temperature=0.5
49
- )
50
- return llm
51
-
52
-
53
- # QA Model Function
54
- def qa_bot():
55
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
56
- model_kwargs={'device': 'cpu'})
57
- db = FAISS.load_local(DB_FAISS_PATH, embeddings)
58
- llm = load_llm()
59
- qa_prompt = set_custom_prompt()
60
- qa = retrieval_qa_chain(llm, qa_prompt, db)
61
-
62
- return qa
63
-
64
-
65
- # output function
66
- def final_result(query):
67
- qa_result = qa_bot()
68
- response = qa_result({'query': query})
69
- print(response)
70
- return response
71
-
72
-
73
- # chain lit code
74
- @cl.on_chat_start
75
- async def start():
76
- chain = qa_bot()
77
- msg = cl.Message(content="Starting the bot...")
78
- await msg.send()
79
- msg.content = "Hi, Welcome to Ryder Bot. What is your query?"
80
- await msg.update()
81
- cl.user_session.set("chain", chain)
82
-
83
-
84
- @cl.on_message
85
- async def main(message):
86
- chain = cl.user_session.get("chain")
87
- cb = cl.AsyncLangchainCallbackHandler(
88
- stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
89
- )
90
- cb.answer_reached = True
91
- res = await chain.acall(message, callbacks=[cb])
92
- answer = res["result"]
93
- sources = res["source_documents"]
94
-
95
- if not sources:
96
- answer += "\nNo sources found"
97
-
98
- await cl.Message(content=answer).send()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,29 +1,98 @@
 
1
  from langchain.embeddings import HuggingFaceEmbeddings
2
  from langchain.vectorstores import FAISS
3
- from langchain.document_loaders import PyPDFLoader, DirectoryLoader
4
- from langchain.text_splitter import RecursiveCharacterTextSplitter
 
5
 
6
- DATA_PATH = 'data/'
7
  DB_FAISS_PATH = 'vectorstore/db_faiss'
8
 
 
 
9
 
10
- # Create vector database
11
- def create_vector_db():
12
- loader = DirectoryLoader(DATA_PATH,
13
- glob='*.pdf',
14
- loader_cls=PyPDFLoader)
15
 
16
- documents = loader.load()
17
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
18
- chunk_overlap=50)
19
- texts = text_splitter.split_documents(documents)
20
 
21
- embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  model_kwargs={'device': 'cpu'})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- db = FAISS.from_documents(texts, embeddings)
25
- db.save_local(DB_FAISS_PATH)
 
 
 
 
 
 
 
 
26
 
 
 
27
 
28
- if __name__ == "__main__":
29
- create_vector_db()
 
1
+ from langchain import PromptTemplate
2
  from langchain.embeddings import HuggingFaceEmbeddings
3
  from langchain.vectorstores import FAISS
4
+ from langchain.llms import CTransformers
5
+ from langchain.chains import RetrievalQA
6
+ import chainlit as cl
7
 
 
8
  DB_FAISS_PATH = 'vectorstore/db_faiss'
9
 
10
+ custom_prompt_template = """Use the following pieces of information to answer the user's question.
11
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
12
 
13
+ Context: {context}
14
+ Question: {question}
 
 
 
15
 
16
+ Only return the helpful answer below and nothing else.
17
+ Helpful answer:
18
+ """
 
19
 
20
+
21
+ def set_custom_prompt():
22
+ """
23
+ Prompt template for QA retrieval for each vectorstore
24
+ """
25
+ prompt = PromptTemplate(template=custom_prompt_template,
26
+ input_variables=['context', 'question'])
27
+ return prompt
28
+
29
+
30
+ # Retrieval QA Chain
31
+ def retrieval_qa_chain(llm, prompt, db):
32
+ qa_chain = RetrievalQA.from_chain_type(llm=llm,
33
+ chain_type='stuff',
34
+ retriever=db.as_retriever(search_kwargs={'k': 2}),
35
+ return_source_documents=True,
36
+ chain_type_kwargs={'prompt': prompt}
37
+ )
38
+ return qa_chain
39
+
40
+
41
+ # Loading the model
42
+ def load_llm():
43
+ # Load the locally downloaded model here
44
+ llm = CTransformers(
45
+ model="llama-2-7b-chat.ggmlv3.q8_0.bin",
46
+ model_type="llama",
47
+ max_new_tokens=512,
48
+ temperature=0.5
49
+ )
50
+ return llm
51
+
52
+
53
+ # QA Model Function
54
+ def qa_bot():
55
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
56
  model_kwargs={'device': 'cpu'})
57
+ db = FAISS.load_local(DB_FAISS_PATH, embeddings)
58
+ llm = load_llm()
59
+ qa_prompt = set_custom_prompt()
60
+ qa = retrieval_qa_chain(llm, qa_prompt, db)
61
+
62
+ return qa
63
+
64
+
65
+ # output function
66
+ def final_result(query):
67
+ qa_result = qa_bot()
68
+ response = qa_result({'query': query})
69
+ print(response)
70
+ return response
71
+
72
+
73
+ # chain lit code
74
+ @cl.on_chat_start
75
+ async def start():
76
+ chain = qa_bot()
77
+ msg = cl.Message(content="Starting the bot...")
78
+ await msg.send()
79
+ msg.content = "Hi, Welcome to Ryder Bot. What is your query?"
80
+ await msg.update()
81
+ cl.user_session.set("chain", chain)
82
+
83
 
84
+ @cl.on_message
85
+ async def main(message):
86
+ chain = cl.user_session.get("chain")
87
+ cb = cl.AsyncLangchainCallbackHandler(
88
+ stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
89
+ )
90
+ cb.answer_reached = True
91
+ res = await chain.acall(message, callbacks=[cb])
92
+ answer = res["result"]
93
+ sources = res["source_documents"]
94
 
95
+ if not sources:
96
+ answer += "\nNo sources found"
97
 
98
+ await cl.Message(content=answer).send()
 
ingest.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings import HuggingFaceEmbeddings
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+
6
+ DATA_PATH = 'data/'
7
+ DB_FAISS_PATH = 'vectorstore/db_faiss'
8
+
9
+
10
+ # Create vector database
11
+ def create_vector_db():
12
+ loader = DirectoryLoader(DATA_PATH,
13
+ glob='*.pdf',
14
+ loader_cls=PyPDFLoader)
15
+
16
+ documents = loader.load()
17
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
18
+ chunk_overlap=50)
19
+ texts = text_splitter.split_documents(documents)
20
+
21
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
22
+ model_kwargs={'device': 'cpu'})
23
+
24
+ db = FAISS.from_documents(texts, embeddings)
25
+ db.save_local(DB_FAISS_PATH)
26
+
27
+
28
+ if __name__ == "__main__":
29
+ create_vector_db()