Krish30 commited on
Commit
2d12c4f
·
verified ·
1 Parent(s): 1991671

Upload 4 files

Browse files
Files changed (4) hide show
  1. config.json +1 -0
  2. main.py +82 -0
  3. requirements.txt +9 -0
  4. vectorize_documents.py +45 -0
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"GROQ_API_KEY": "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"}
main.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+
4
+ import streamlit as st
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_chroma import Chroma
7
+ from langchain_groq import ChatGroq
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+
11
+ from vectorize_documents import embeddings
12
+
13
+
14
+ working_dir = os.path.dirname(os.path.abspath(__file__))
15
+ config_data = json.load(open(f"{working_dir}/config.json"))
16
+ GROQ_API_KEY = config_data["GROQ_API_KEY"]
17
+ os.environ["GROQ_API_KEY"]= GROQ_API_KEY
18
+
19
+
20
+ def setup_vectorstore():
21
+ persist_directory = f"{working_dir}/vector_db_dir"
22
+ embeddings = HuggingFaceEmbeddings()
23
+ vectorstore = Chroma(persist_directory=persist_directory,
24
+ embedding_function=embeddings)
25
+ return vectorstore
26
+
27
+ def chat_chain(vectorstore):
28
+ llm = ChatGroq(
29
+ model = "llama-3.1-70b-versatile",
30
+ temperature = 0
31
+ )
32
+ retriever = vectorstore.as_retriever()
33
+ memory = ConversationBufferMemory(
34
+ llm = llm,
35
+ output_key = "answer",
36
+ memory_key = "chat_history",
37
+ return_messages = True
38
+ )
39
+ chain = ConversationalRetrievalChain.from_llm(
40
+ llm=llm,
41
+ retriever = retriever,
42
+ chain_type = "stuff",
43
+ memory = memory,
44
+ verbose=True,
45
+ return_source_documents= True
46
+ )
47
+ return chain
48
+
49
+ st.set_page_config(
50
+ page_title="WhatsApp FAQ AI",
51
+ page_icon="🤖AI",
52
+ layout="centered"
53
+ )
54
+
55
+ st.title("🤖AI WhatsApp FAQ")
56
+
57
+ if "chat_history" not in st.session_state:
58
+ st.session_state.chat_history = []
59
+
60
+ if "vectorstore" not in st.session_state:
61
+ st.session_state.vectorstore = setup_vectorstore()
62
+
63
+ if "conversational_chain" not in st.session_state:
64
+ st.session_state.conversational_chain = chat_chain(st.session_state.vectorstore)
65
+
66
+ for message in st.session_state.chat_history:
67
+ with st.chat_message(message["role"]):
68
+ st.markdown(message["content"])
69
+ user_input = st.chat_input("Ask AI....")
70
+
71
+ if user_input:
72
+ st.session_state.chat_history.append({"role":"user", "content":user_input})
73
+
74
+ with st.chat_message("user"):
75
+ st.markdown(user_input)
76
+
77
+ with st.chat_message("assistant"):
78
+ response = st.session_state.conversational_chain({"question":user_input})
79
+ assistant_response = response["answer"]
80
+ st.markdown(assistant_response)
81
+ st.session_state.chat_history.append({"role":"assistant","content": assistant_response})
82
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.38.0
2
+ langchain-community==0.2.16
3
+ langchain-text-splitters==0.2.4
4
+ langchain-chroma==0.1.3
5
+ langchain-huggingface==0.0.3
6
+ langchain-groq==0.1.9
7
+ unstructured==0.15.0
8
+ unstructured[pdf]==0.15.0
9
+ nltk==3.8.1
vectorize_documents.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import UnstructuredFileLoader
2
+ from langchain_community.document_loaders import DirectoryLoader
3
+ from langchain_text_splitters import CharacterTextSplitter
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_chroma import Chroma
6
+
7
+
8
+ # Define a function to perform vectorization
9
+ def vectorize_documents():
10
+ # Loading the embedding model
11
+ embeddings = HuggingFaceEmbeddings()
12
+
13
+ loader = DirectoryLoader(
14
+ path="Data",
15
+ glob="./*.pdf",
16
+ loader_cls=UnstructuredFileLoader
17
+ )
18
+
19
+ documents = loader.load()
20
+
21
+ # Splitting the text and creating chunks of these documents.
22
+ text_splitter = CharacterTextSplitter(
23
+ chunk_size=2000,
24
+ chunk_overlap=500
25
+ )
26
+
27
+ text_chunks = text_splitter.split_documents(documents)
28
+
29
+ # Store in Chroma vector DB
30
+ vectordb = Chroma.from_documents(
31
+ documents=text_chunks,
32
+ embedding=embeddings,
33
+ persist_directory="vector_db_dir"
34
+ )
35
+
36
+ print("Documents Vectorized and saved in VectorDB")
37
+
38
+
39
+ # Expose embeddings if needed
40
+ embeddings = HuggingFaceEmbeddings()
41
+
42
+
43
+ # Main guard to prevent execution on import
44
+ if __name__ == "__main__":
45
+ vectorize_documents()