File size: 3,511 Bytes
86d383a
 
 
 
 
 
 
 
 
 
 
 
 
 
536fc4e
86d383a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d317d73
 
86d383a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import gradio as gr
import os
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WebBaseLoader
from langchain.tools import Tool
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.memory import ConversationBufferMemory

# Set OpenAI API Key
# from google.colab import userdata
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

# Load website data
url = "https://www.halodesigns.in/"
loader = WebBaseLoader(url)
documents = loader.load()

# Split data into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

# Create vector store
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(docs, embeddings)
retriever = vector_store.as_retriever(search_type="similarity", search_k=3)

# Initialize LLM
# llm = ChatOpenAI(model="gpt-4")
llm = ChatOpenAI(model="gpt-4o-mini")

# Define document retrieval function
def document_retrieval(query: str):
    return retrieval_qa_chain({"query": query})["result"]

# Create Retrieval QA Chain
retrieval_qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

# Function to generate a summary of the document database
def get_document_summary():
    summary_response = retrieval_qa_chain({"query": "Summarize the document in detail. Do Not Miss Any points."})
    return summary_response["result"]

# Get the document summary
document_summary = get_document_summary()

# Function to generate possible questions from the document database
def get_document_questions():
    questions_response = retrieval_qa_chain({"query": "List all the possible questions based on the given context. Do Not Miss Any questions."})
    return questions_response["result"]

# Get the document questions
document_questions = get_document_questions()


# Define tools
llm_tool = Tool(
    name="General Query LLM",
    func=lambda q: llm.predict(q),
    description="Uses LLM to answer general knowledge questions (e.g., greetings, sports, world events). Does NOT handle RAG-related queries.")

document_retrieval_tool = Tool(
    name="Document Retrieval",
    func=document_retrieval,
    description=(
        f"This tool retrieves information that contains following information: \n"
        f"{document_summary}\n"
        f"Also the following questions: \n"
        f"{document_questions}"
    )
)

# Initialize memory for conversation history
memory = ConversationBufferMemory(memory_key="chat_history")

# Initialize agent
agent = initialize_agent(
    tools=[llm_tool, document_retrieval_tool],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    memory = memory
)

# Define chatbot function
def chatbot_response(user_input,history):
    # Format history into a conversational format
    # conversation = "\n".join([f"User: {h[0]}\nBot: {h[1]}" for h in history])  
    # full_prompt = f"{conversation}\nUser: {user_input}\nBot:"
    try:
        response = agent.run(user_input)
        # response = agent.run(full_prompt)
        return response
    except Exception as e:
        return f"Error: {e}"

# Create Gradio interface
gr.ChatInterface(fn=chatbot_response, title="Halo Designs Chatbot", theme="soft").launch(debug=True)