File size: 4,079 Bytes
5717062
a41e511
 
744b8c2
a41e511
 
 
 
 
744b8c2
7323361
5717062
744b8c2
 
 
 
 
a41e511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744b8c2
 
 
 
 
 
 
 
 
a41e511
744b8c2
a41e511
744b8c2
a41e511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36b1b62
a41e511
 
e6d8c7b
a41e511
 
 
 
 
d1b7cba
a41e511
 
 
 
 
36b1b62
a41e511
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import gradio as gr
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

# Initialize DialoGPT model and tokenizer
model_name = "microsoft/DialoGPT-medium"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# Initialize embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Global variables
vectorstore = None
chat_history = []

# Function to process PDFs and websites
def process_documents(pdf_files, website_urls):
    global vectorstore
    documents = []

    # Process PDFs
    if pdf_files:
        for pdf in pdf_files:
            loader = PyPDFLoader(pdf.name)
            documents.extend(loader.load())

    # Process websites
    if website_urls:
        urls = website_urls.split("\n")
        loader = WebBaseLoader(urls)
        documents.extend(loader.load())

    # Split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(documents)

    # Create vector store
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
    return "Documents processed successfully!"

# RAG chatbot function
def chat_with_bot(message, history):
    global vectorstore, chat_history

    if vectorstore is None:
        return "Please upload PDFs or provide website URLs first."

    # Set up retriever
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    # Define prompt template
    prompt_template = """
    You are a helpful customer support assistant. Use the provided context to answer the user's question accurately and politely. If the context doesn't contain relevant information, provide a general helpful response.

    Context: {context}

    Question: {question}

    Answer:
    """
    prompt = PromptTemplate.from_template(prompt_template)

    # Create RAG chain
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    def generate_response(input_text):
        # Generate response using DialoGPT
        outputs = generator(input_text, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
        response = outputs[0]["generated_text"].replace(input_text, "").strip()
        return response

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | generate_response
        | StrOutputParser()
    )

    # Get response
    response = rag_chain.invoke(message)
    chat_history.append((message, response))
    return response

# Gradio interface
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# Customer Support Chatbot")
    gr.Markdown("Upload PDFs and/or provide website URLs to initialize the knowledge base, then chat with the bot.")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
        website_input = gr.Textbox(label="Website URLs (one per line)", placeholder="https://example.com")

    process_button = gr.Button("Process Documents")
    process_output = gr.Textbox(label="Processing Status")

    chatbot = gr.ChatInterface(
        fn=chat_with_bot,
        title="Chat with Support Bot",
        description="Ask your customer support questions here."
    )

    process_button.click(
        fn=process_documents,
        inputs=[pdf_input, website_input],
        outputs=process_output
    )

# Launch the app
demo.launch()