import gradio as gr import os from langchain.vectorstores import FAISS from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import WebBaseLoader from langchain.tools import Tool from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI from langchain.agents import initialize_agent, AgentType from langchain.memory import ConversationBufferMemory # Set OpenAI API Key # from google.colab import userdata os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY') # Load website data url = "https://www.halodesigns.in/" loader = WebBaseLoader(url) documents = loader.load() # Split data into chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) docs = text_splitter.split_documents(documents) # Create vector store embeddings = OpenAIEmbeddings() vector_store = FAISS.from_documents(docs, embeddings) retriever = vector_store.as_retriever(search_type="similarity", search_k=3) # Initialize LLM # llm = ChatOpenAI(model="gpt-4") llm = ChatOpenAI(model="gpt-4o-mini") # Define document retrieval function def document_retrieval(query: str): return retrieval_qa_chain({"query": query})["result"] # Create Retrieval QA Chain retrieval_qa_chain = RetrievalQA.from_chain_type( llm=llm, retriever=retriever, return_source_documents=True ) # Function to generate a summary of the document database def get_document_summary(): summary_response = retrieval_qa_chain({"query": "Summarize the document in detail. Do Not Miss Any points."}) return summary_response["result"] # Get the document summary document_summary = get_document_summary() # Function to generate possible questions from the document database def get_document_questions(): questions_response = retrieval_qa_chain({"query": "List all the possible questions based on the given context. Do Not Miss Any questions."}) return questions_response["result"] # Get the document questions document_questions = get_document_questions() # Define tools llm_tool = Tool( name="General Query LLM", func=lambda q: llm.predict(q), description="Uses LLM to answer general knowledge questions (e.g., greetings, sports, world events). Does NOT handle RAG-related queries.") document_retrieval_tool = Tool( name="Document Retrieval", func=document_retrieval, description=( f"This tool retrieves information that contains following information: \n" f"{document_summary}\n" f"Also the following questions: \n" f"{document_questions}" ) ) # Initialize memory for conversation history memory = ConversationBufferMemory(memory_key="chat_history") # Initialize agent agent = initialize_agent( tools=[llm_tool, document_retrieval_tool], llm=llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, memory = memory ) # Define chatbot function def chatbot_response(user_input,history): # Format history into a conversational format # conversation = "\n".join([f"User: {h[0]}\nBot: {h[1]}" for h in history]) # full_prompt = f"{conversation}\nUser: {user_input}\nBot:" try: response = agent.run(user_input) # response = agent.run(full_prompt) return response except Exception as e: return f"Error: {e}" # Create Gradio interface gr.ChatInterface(fn=chatbot_response, title="Halo Designs Chatbot", theme="soft").launch(debug=True)