Spaces:

Krish30
/

whatsapp_chatbot_pgvector

Sleeping

App Files Files Community

Krish30 commited on Oct 19, 2024

Commit

64d661c

verified ·

1 Parent(s): 76b72b5

Upload 2 files

Browse files

Files changed (2) hide show

main.py +97 -0
vectorize_data_pgvector.py +56 -0

main.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import os
+import json
+import streamlit as st
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_postgres.vectorstores import PGVector
+from langchain_groq import ChatGroq
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+# Load the embeddings function
+from vectorize_data_pgvector import embeddings  # Assuming embeddings are imported from your previous script
+# Load configuration
+working_dir = os.path.dirname(os.path.abspath(__file__))
+config_data = json.load(open(f"{working_dir}/config.json"))
+GROQ_API_KEY = config_data["GROQ_API_KEY"]
+os.environ["GROQ_API_KEY"] = GROQ_API_KEY
+# Define the connection string and collection name for PostgreSQL
+connection_string = "postgresql+psycopg2://postgres:krishna23@localhost:5432/vector_db"
+collection_name = "whatsapp_chatbot"
+# Set up the PGVector-based vectorstore
+def setup_vectorstore():
+    embeddings = HuggingFaceEmbeddings()  # Use HuggingFaceEmbeddings
+    vectorstore = PGVector(
+        embeddings=embeddings,
+        connection=connection_string,
+        collection_name=collection_name,
+    )
+    return vectorstore
+# Set up the conversational chain
+def chat_chain(vectorstore):
+    llm = ChatGroq(
+        model="llama-3.1-70b-versatile",
+        temperature=0
+    )
+    retriever = vectorstore.as_retriever()
+    memory = ConversationBufferMemory(
+        llm=llm,
+        output_key="answer",
+        memory_key="chat_history",
+        return_messages=True
+    )
+    chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=retriever,
+        chain_type="stuff",
+        memory=memory,
+        verbose=True,
+        return_source_documents=True
+    )
+    return chain
+# Streamlit UI setup
+st.set_page_config(
+    page_title="WhatsApp FAQ AI",
+    page_icon="🤖AI",
+    layout="centered"
+)
+st.title("🤖AI WhatsApp FAQ")
+# Initialize session state for chat history and vectorstore
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+if "vectorstore" not in st.session_state:
+    st.session_state.vectorstore = setup_vectorstore()
+if "conversational_chain" not in st.session_state:
+    st.session_state.conversational_chain = chat_chain(st.session_state.vectorstore)
+# Display chat history
+for message in st.session_state.chat_history:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# User input
+user_input = st.chat_input("Ask AI....")
+if user_input:
+    # Append user message to chat history
+    st.session_state.chat_history.append({"role": "user", "content": user_input})
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    with st.chat_message("assistant"):
+        response = st.session_state.conversational_chain({"question": user_input})
+        assistant_response = response["answer"]
+        st.markdown(assistant_response)
+        # Append assistant response to chat history
+        st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})

vectorize_data_pgvector.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from langchain_community.document_loaders import UnstructuredFileLoader
+from langchain_community.document_loaders import DirectoryLoader
+from langchain_text_splitters import CharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_postgres.vectorstores import PGVector
+ # Loading the embedding model
+embeddings = HuggingFaceEmbeddings()
+# Define a function to perform vectorization
+def vectorize_documents():
+    try:
+        # Loading the embedding model
+        loader = DirectoryLoader(
+            path="Data",
+            glob="./*.pdf",
+            loader_cls=UnstructuredFileLoader
+        )
+        documents = loader.load()
+        if not documents:
+            print("No documents found in the specified directory.")
+            return
+        # Splitting the text and creating chunks of these documents.
+        text_splitter = CharacterTextSplitter(
+            chunk_size=2000,
+            chunk_overlap=500
+        )
+        text_chunks = text_splitter.split_documents(documents)
+        # Storing in PostgreSQL - PGVector
+        connection_string = "postgresql+psycopg2://postgres:krishna23@localhost:5432/vector_db"
+        collection_name = "whatsapp_chatbot"
+        # Create a PGVector instance and store the documents
+        vector_store = PGVector.from_documents(
+            embedding=embeddings,
+            documents=text_chunks,
+            collection_name=collection_name,
+            connection=connection_string,
+        )
+        print("Documents vectorized successfully and stored in PGVector.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+# Main guard to prevent execution on import
+if __name__ == "__main__":
+    vectorize_documents()