hi-tech commited on
Commit
4cc5b4d
·
verified ·
1 Parent(s): 8ea6c84

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ vector_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+
4
+ load_dotenv()
5
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
6
+
7
+ import gradio as gr
8
+ from langchain_community.document_loaders import PyPDFLoader
9
+ from langchain_text_splitters import CharacterTextSplitter
10
+ from langchain_openai import OpenAIEmbeddings
11
+ from langchain_community.vectorstores import Chroma
12
+ from langchain_openai import ChatOpenAI
13
+ from langchain.prompts import PromptTemplate
14
+ from langchain.chains.question_answering import load_qa_chain
15
+
16
+ text_splitter = CharacterTextSplitter(
17
+ separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
18
+ )
19
+ embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
20
+ llm = ChatOpenAI(model="gpt-4-1106-preview", api_key=OPENAI_API_KEY)
21
+
22
+ vectordb_path = "./vector_db"
23
+ uploaded_files = ["./pdf/knowledgebase.pdf"]
24
+ vectorstore = None
25
+
26
+
27
+ def create_vectordb():
28
+ global vectorstore
29
+
30
+ for file in uploaded_files:
31
+ loader = PyPDFLoader(file)
32
+ data = loader.load()
33
+ texts = text_splitter.split_documents(data)
34
+
35
+ if vectorstore is None:
36
+ vectorstore = Chroma.from_documents(
37
+ documents=texts,
38
+ embedding=embeddings,
39
+ persist_directory=os.path.join(vectordb_path),
40
+ )
41
+ else:
42
+ vectorstore.add_documents(texts)
43
+
44
+
45
+ def rag_bot(query, chat_history):
46
+ print(f"Received query: {query}")
47
+
48
+ template = """Please answer to human's input based on context. If the input is not mentioned in context, output something like 'I don't know'.
49
+ Context: {context}
50
+ Human: {human_input}
51
+ Your Response as Chatbot:"""
52
+
53
+ prompt_s = PromptTemplate(
54
+ input_variables=["human_input", "context"], template=template
55
+ )
56
+
57
+ vectorstore = Chroma(
58
+ persist_directory=os.path.join(vectordb_path), embedding_function=embeddings
59
+ )
60
+
61
+ docs = vectorstore.similarity_search(query)
62
+
63
+ stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt_s)
64
+
65
+ output = stuff_chain(
66
+ {"input_documents": docs, "human_input": query}, return_only_outputs=False
67
+ )
68
+
69
+ final_answer = output["output_text"]
70
+ print(f"Final Answer ---> {final_answer}")
71
+
72
+ return final_answer
73
+
74
+
75
+ def chat(query, chat_history):
76
+ response = rag_bot(query, chat_history)
77
+ return response
78
+
79
+
80
+ if __name__ == "__main__":
81
+ # create_vectordb()
82
+
83
+ chatbot = gr.Chatbot(avatar_images=["user.jpg", "bot.png"], height=600)
84
+ clear_but = gr.Button(value="Clear Chat")
85
+ demo = gr.ChatInterface(
86
+ fn=chat,
87
+ title="RAG Chatbot Prototype",
88
+ multimodal=False,
89
+ chatbot=chatbot,
90
+ )
91
+ demo.launch(debug=True, share=True)
bot.png ADDED
user.jpg ADDED
vector_db/8a199d61-18fe-45a3-9a2c-fafc0783cc78/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18abd8c514282db82706e52b0a33ed659cd534e925a6f149deb7af9ce34bd8e
3
+ size 6284000
vector_db/8a199d61-18fe-45a3-9a2c-fafc0783cc78/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:effaa959ce2b30070fdafc2fe82096fc46e4ee7561b75920dd3ce43d09679b21
3
+ size 100
vector_db/8a199d61-18fe-45a3-9a2c-fafc0783cc78/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e65d7c29560358d84b96d1093011ba25fc1ba008e41867ed1d65c5fc2bf7f34a
3
+ size 4000
vector_db/8a199d61-18fe-45a3-9a2c-fafc0783cc78/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
vector_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b649ee0abb2614aee92ad4a81b099ea91f995c54032ec392a4cf2c0e35493a0
3
+ size 3260416