Krish30 commited on
Commit
64d661c
·
verified ·
1 Parent(s): 76b72b5

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +97 -0
  2. vectorize_data_pgvector.py +56 -0
main.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import streamlit as st
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_postgres.vectorstores import PGVector
6
+ from langchain_groq import ChatGroq
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.chains import ConversationalRetrievalChain
9
+
10
+ # Load the embeddings function
11
+ from vectorize_data_pgvector import embeddings # Assuming embeddings are imported from your previous script
12
+
13
+ # Load configuration
14
+ working_dir = os.path.dirname(os.path.abspath(__file__))
15
+ config_data = json.load(open(f"{working_dir}/config.json"))
16
+ GROQ_API_KEY = config_data["GROQ_API_KEY"]
17
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
18
+
19
+ # Define the connection string and collection name for PostgreSQL
20
+ connection_string = "postgresql+psycopg2://postgres:krishna23@localhost:5432/vector_db"
21
+ collection_name = "whatsapp_chatbot"
22
+
23
+ # Set up the PGVector-based vectorstore
24
+ def setup_vectorstore():
25
+ embeddings = HuggingFaceEmbeddings() # Use HuggingFaceEmbeddings
26
+ vectorstore = PGVector(
27
+ embeddings=embeddings,
28
+ connection=connection_string,
29
+ collection_name=collection_name,
30
+
31
+ )
32
+ return vectorstore
33
+
34
+ # Set up the conversational chain
35
+ def chat_chain(vectorstore):
36
+ llm = ChatGroq(
37
+ model="llama-3.1-70b-versatile",
38
+ temperature=0
39
+ )
40
+ retriever = vectorstore.as_retriever()
41
+ memory = ConversationBufferMemory(
42
+ llm=llm,
43
+ output_key="answer",
44
+ memory_key="chat_history",
45
+ return_messages=True
46
+ )
47
+ chain = ConversationalRetrievalChain.from_llm(
48
+ llm=llm,
49
+ retriever=retriever,
50
+ chain_type="stuff",
51
+ memory=memory,
52
+ verbose=True,
53
+ return_source_documents=True
54
+ )
55
+ return chain
56
+
57
+ # Streamlit UI setup
58
+ st.set_page_config(
59
+ page_title="WhatsApp FAQ AI",
60
+ page_icon="🤖AI",
61
+ layout="centered"
62
+ )
63
+
64
+ st.title("🤖AI WhatsApp FAQ")
65
+
66
+ # Initialize session state for chat history and vectorstore
67
+ if "chat_history" not in st.session_state:
68
+ st.session_state.chat_history = []
69
+
70
+ if "vectorstore" not in st.session_state:
71
+ st.session_state.vectorstore = setup_vectorstore()
72
+
73
+ if "conversational_chain" not in st.session_state:
74
+ st.session_state.conversational_chain = chat_chain(st.session_state.vectorstore)
75
+
76
+ # Display chat history
77
+ for message in st.session_state.chat_history:
78
+ with st.chat_message(message["role"]):
79
+ st.markdown(message["content"])
80
+
81
+ # User input
82
+ user_input = st.chat_input("Ask AI....")
83
+
84
+ if user_input:
85
+ # Append user message to chat history
86
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
87
+
88
+ with st.chat_message("user"):
89
+ st.markdown(user_input)
90
+
91
+ with st.chat_message("assistant"):
92
+ response = st.session_state.conversational_chain({"question": user_input})
93
+ assistant_response = response["answer"]
94
+ st.markdown(assistant_response)
95
+
96
+ # Append assistant response to chat history
97
+ st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
vectorize_data_pgvector.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import UnstructuredFileLoader
2
+ from langchain_community.document_loaders import DirectoryLoader
3
+ from langchain_text_splitters import CharacterTextSplitter
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_postgres.vectorstores import PGVector
6
+
7
+ # Loading the embedding model
8
+ embeddings = HuggingFaceEmbeddings()
9
+
10
+ # Define a function to perform vectorization
11
+ def vectorize_documents():
12
+ try:
13
+ # Loading the embedding model
14
+
15
+
16
+ loader = DirectoryLoader(
17
+ path="Data",
18
+ glob="./*.pdf",
19
+ loader_cls=UnstructuredFileLoader
20
+ )
21
+
22
+ documents = loader.load()
23
+
24
+ if not documents:
25
+ print("No documents found in the specified directory.")
26
+ return
27
+
28
+ # Splitting the text and creating chunks of these documents.
29
+ text_splitter = CharacterTextSplitter(
30
+ chunk_size=2000,
31
+ chunk_overlap=500
32
+ )
33
+
34
+ text_chunks = text_splitter.split_documents(documents)
35
+
36
+ # Storing in PostgreSQL - PGVector
37
+ connection_string = "postgresql+psycopg2://postgres:krishna23@localhost:5432/vector_db"
38
+
39
+ collection_name = "whatsapp_chatbot"
40
+
41
+ # Create a PGVector instance and store the documents
42
+ vector_store = PGVector.from_documents(
43
+ embedding=embeddings,
44
+ documents=text_chunks,
45
+ collection_name=collection_name,
46
+ connection=connection_string,
47
+ )
48
+
49
+ print("Documents vectorized successfully and stored in PGVector.")
50
+
51
+ except Exception as e:
52
+ print(f"An error occurred: {e}")
53
+
54
+ # Main guard to prevent execution on import
55
+ if __name__ == "__main__":
56
+ vectorize_documents()