Spaces:

iajitpanday
/

vBot-1.7

Runtime error

App Files Files Community

vBot-1.7 / app.py

iajitpanday

Update app.py

744b8c2 verified 9 months ago

raw

history blame contribute delete

4.08 kB

	import os
	import gradio as gr
	from langchain_community.vectorstores import Chroma
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
	from langchain_core.prompts import PromptTemplate
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.output_parsers import StrOutputParser
	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	import torch

	# Initialize DialoGPT model and tokenizer
	model_name = "microsoft/DialoGPT-medium"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)
	generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

	# Initialize embeddings
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# Global variables
	vectorstore = None
	chat_history = []

	# Function to process PDFs and websites
	def process_documents(pdf_files, website_urls):
	global vectorstore
	documents = []

	# Process PDFs
	if pdf_files:
	for pdf in pdf_files:
	loader = PyPDFLoader(pdf.name)
	documents.extend(loader.load())

	# Process websites
	if website_urls:
	urls = website_urls.split("\n")
	loader = WebBaseLoader(urls)
	documents.extend(loader.load())

	# Split documents
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	splits = text_splitter.split_documents(documents)

	# Create vector store
	vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
	return "Documents processed successfully!"

	# RAG chatbot function
	def chat_with_bot(message, history):
	global vectorstore, chat_history

	if vectorstore is None:
	return "Please upload PDFs or provide website URLs first."

	# Set up retriever
	retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

	# Define prompt template
	prompt_template = """
	You are a helpful customer support assistant. Use the provided context to answer the user's question accurately and politely. If the context doesn't contain relevant information, provide a general helpful response.

	Context: {context}

	Question: {question}

	Answer:
	"""
	prompt = PromptTemplate.from_template(prompt_template)

	# Create RAG chain
	def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)

	def generate_response(input_text):
	# Generate response using DialoGPT
	outputs = generator(input_text, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
	response = outputs[0]["generated_text"].replace(input_text, "").strip()
	return response

	rag_chain = (
	{"context": retriever \| format_docs, "question": RunnablePassthrough()}
	\| prompt
	\| generate_response
	\| StrOutputParser()
	)

	# Get response
	response = rag_chain.invoke(message)
	chat_history.append((message, response))
	return response

	# Gradio interface
	with gr.Blocks(theme="soft") as demo:
	gr.Markdown("# Customer Support Chatbot")
	gr.Markdown("Upload PDFs and/or provide website URLs to initialize the knowledge base, then chat with the bot.")

	with gr.Row():
	pdf_input = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
	website_input = gr.Textbox(label="Website URLs (one per line)", placeholder="https://example.com")

	process_button = gr.Button("Process Documents")
	process_output = gr.Textbox(label="Processing Status")

	chatbot = gr.ChatInterface(
	fn=chat_with_bot,
	title="Chat with Support Bot",
	description="Ask your customer support questions here."
	)

	process_button.click(
	fn=process_documents,
	inputs=[pdf_input, website_input],
	outputs=process_output
	)

	# Launch the app
	demo.launch()