Spaces:

Kimty
/

Chatbot_LLMs

Runtime error

App Files Files Community

Kimty commited on Nov 15, 2023

Commit

e0e6e48

1 Parent(s): f95e787

Upload file

Browse files

Files changed (4) hide show

app.py +47 -0
config.py +24 -0
edubot.py +60 -0
vector_db.py +34 -0

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from edubot import EduBotCreator
+from config import *
+import streamlit as st
+from streamlit_chat import message
+@st.cache_resource(show_spinner=True)
+def create_edubot():
+    edubotcreator = EduBotCreator()
+    edubot = edubotcreator.create_edubot()
+    return edubot
+edubot = create_edubot()
+def infer_edubot(prompt):
+    model_out = edubot(prompt)
+    answer = model_out['result']
+    return answer
+def display_conversation(history):
+    for i in range(len(history["assistant"])):
+        message(history["user"][i], is_user=True, key=str(i) + "_user")
+        message(history["assistant"][i],key=str(i))
+def main():
+    st.title("Edubot: Your Smart Education Sidekick 📚🤖")
+    st.subheader("A bot created using Langchain 🦜 to run on cpu making your learning process easier")
+    user_input = st.text_input("Enter your query")
+    if "assistant" not in st.session_state:
+        st.session_state["assistant"] = ["I am ready to help you"]
+    if "user" not in st.session_state:
+        st.session_state["user"] = ["Hey there!"]
+    if st.button("Answer"):
+        answer = infer_edubot({'query': user_input})
+        st.session_state["user"].append(user_input)
+        st.session_state["assistant"].append(answer)
+        if st.session_state["assistant"]:
+            display_conversation(st.session_state)
+if __name__ == "__main__":
+    main()

config.py ADDED Viewed

	@@ -0,0 +1,24 @@

+DATA_DIR_PATH = "data/"
+VECTOR_DB_PATH = "faiss/education"
+CHUNK_SIZE = 500
+CHUNK_OVERLAP = 200
+EMBEDDER = "thenlper/gte-large"
+DEVICE = "cpu"
+PROMPT_TEMPLATE = '''
+With the information provided try to answer the question.
+If you cant answer the question based on the information either say you cant find an answer or unable to find an answer.
+So try to understand in depth about the context and answer only based on the information provided. Dont generate irrelevant answers
+Context: {context}
+Question: {question}
+Do provide only helpful answers
+Helpful answer:
+'''
+INP_VARS = ['context', 'question']
+CHAIN_TYPE = "stuff"
+SEARCH_KWARGS = {'k': 2}
+MODEL_CKPT = "res/llama-2-7b-chat.ggmlv3.q4_1.bin"
+MODEL_TYPE = "llama"
+MAX_NEW_TOKENS = 512
+TEMPERATURE = 0.9

edubot.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from langchain import PromptTemplate
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import CTransformers
+from langchain.chains import RetrievalQA
+from config import *
+class EduBotCreator:
+    def __init__(self):
+        self.prompt_temp = PROMPT_TEMPLATE
+        self.input_variables = INP_VARS
+        self.chain_type = CHAIN_TYPE
+        self.search_kwargs = SEARCH_KWARGS
+        self.embedder = EMBEDDER
+        self.vector_db_path = VECTOR_DB_PATH
+        self.model_ckpt = MODEL_CKPT
+        self.model_type = MODEL_TYPE
+        self.max_new_tokens = MAX_NEW_TOKENS
+        self.temperature = TEMPERATURE
+    def create_custom_prompt(self):
+        custom_prompt_temp = PromptTemplate(template=self.prompt_temp,
+                            input_variables=self.input_variables)
+        return custom_prompt_temp
+    def load_llm(self):
+        llm = CTransformers(
+                model = self.model_ckpt,
+                model_type=self.model_type,
+                max_new_tokens = self.max_new_tokens,
+                temperature = self.temperature
+            )
+        return llm
+    def load_vectordb(self):
+        hfembeddings = HuggingFaceEmbeddings(
+                            model_name=self.embedder,
+                            model_kwargs={'device': 'cpu'}
+                        )
+        vector_db = FAISS.load_local(self.vector_db_path, hfembeddings)
+        return vector_db
+    def create_bot(self, custom_prompt, vectordb, llm):
+        retrieval_qa_chain = RetrievalQA.from_chain_type(
+                                llm=llm,
+                                chain_type=self.chain_type,
+                                retriever=vectordb.as_retriever(search_kwargs=self.search_kwargs),
+                                return_source_documents=True,
+                                chain_type_kwargs={"prompt": custom_prompt}
+                            )
+        return retrieval_qa_chain
+    def create_edubot(self):
+        self.custom_prompt = self.create_custom_prompt()
+        self.vector_db = self.load_vectordb()
+        self.llm = self.load_llm()
+        self.bot = self.create_bot(self.custom_prompt, self.vector_db, self.llm)
+        return self.bot

vector_db.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.document_loaders import PyPDFLoader, DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from config import *
+def faiss_vector_db():
+    dir_loader = DirectoryLoader(
+                            DATA_DIR_PATH,
+                            glob='*.pdf',
+                            loader_cls=PyPDFLoader
+                        )
+    docs = dir_loader.load()
+    print("PDFs Loaded")
+    txt_splitter = RecursiveCharacterTextSplitter(
+                            chunk_size=CHUNK_SIZE,
+                            chunk_overlap=CHUNK_OVERLAP
+                        )
+    inp_txt = txt_splitter.split_documents(docs)
+    print("Data Chunks Created")
+    hfembeddings = HuggingFaceEmbeddings(
+                            model_name=EMBEDDER,
+                            model_kwargs={'device': 'cpu'}
+                        )
+    db = FAISS.from_documents(inp_txt, hfembeddings)
+    db.save_local(VECTOR_DB_PATH)
+    print("Vector Store Creation Completed")
+if __name__ == "__main__":
+    faiss_vector_db()