import os import gradio as gr from langchain_community.vectorstores import Chroma from langchain_huggingface import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader from langchain_core.prompts import PromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM import torch # Initialize DialoGPT model and tokenizer model_name = "microsoft/DialoGPT-medium" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) # Initialize embeddings embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Global variables vectorstore = None chat_history = [] # Function to process PDFs and websites def process_documents(pdf_files, website_urls): global vectorstore documents = [] # Process PDFs if pdf_files: for pdf in pdf_files: loader = PyPDFLoader(pdf.name) documents.extend(loader.load()) # Process websites if website_urls: urls = website_urls.split("\n") loader = WebBaseLoader(urls) documents.extend(loader.load()) # Split documents text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) splits = text_splitter.split_documents(documents) # Create vector store vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings) return "Documents processed successfully!" # RAG chatbot function def chat_with_bot(message, history): global vectorstore, chat_history if vectorstore is None: return "Please upload PDFs or provide website URLs first." # Set up retriever retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # Define prompt template prompt_template = """ You are a helpful customer support assistant. Use the provided context to answer the user's question accurately and politely. If the context doesn't contain relevant information, provide a general helpful response. Context: {context} Question: {question} Answer: """ prompt = PromptTemplate.from_template(prompt_template) # Create RAG chain def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) def generate_response(input_text): # Generate response using DialoGPT outputs = generator(input_text, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id) response = outputs[0]["generated_text"].replace(input_text, "").strip() return response rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | generate_response | StrOutputParser() ) # Get response response = rag_chain.invoke(message) chat_history.append((message, response)) return response # Gradio interface with gr.Blocks(theme="soft") as demo: gr.Markdown("# Customer Support Chatbot") gr.Markdown("Upload PDFs and/or provide website URLs to initialize the knowledge base, then chat with the bot.") with gr.Row(): pdf_input = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple") website_input = gr.Textbox(label="Website URLs (one per line)", placeholder="https://example.com") process_button = gr.Button("Process Documents") process_output = gr.Textbox(label="Processing Status") chatbot = gr.ChatInterface( fn=chat_with_bot, title="Chat with Support Bot", description="Ask your customer support questions here." ) process_button.click( fn=process_documents, inputs=[pdf_input, website_input], outputs=process_output ) # Launch the app demo.launch()