Spaces:

iajitpanday
/

vBot-1.7

Runtime error

App Files Files Community

iajitpanday commited on May 10, 2025

Commit

5717062

verified ·

1 Parent(s): fee555b

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -112

app.py CHANGED Viewed

@@ -1,71 +1,71 @@
 import gradio as gr
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.document_loaders import PyPDFLoader, WebBaseLoader
 from langchain.chains import RetrievalQA
 from langchain.llms import HuggingFacePipeline
-import os
-import tempfile
-from typing import List, Tuple
-import requests
-from bs4 import BeautifulSoup
-# Initialize the model and tokenizer
 class CustomerSupportChatbot:
     def __init__(self):
-        # Initialize embeddings
         self.embeddings = HuggingFaceEmbeddings(
-            model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
-        # Initialize the base language model
-        model_name = "microsoft/DialoGPT-medium"  # You can change this to a different model
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForCausalLM.from_pretrained(model_name)
-        # Create text generation pipeline
-        self.pipe = pipeline(
             "text-generation",
-            model=self.model,
-            tokenizer=self.tokenizer,
-            max_length=512,
             temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1
         )
-        # Initialize HuggingFace pipeline for LangChain
-        self.llm = HuggingFacePipeline(pipeline=self.pipe)
         # Initialize vector store
         self.vector_store = None
-        self.qa_chain = None
         # Text splitter
         self.text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=1000,
-            chunk_overlap=200
         )
     def process_documents(self, pdf_files, website_urls) -> str:
         """Process PDF files and website URLs to create a vector store"""
-        documents = []
         # Process PDF files
         if pdf_files:
             for pdf_file in pdf_files:
-                with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
-                    tmp_file.write(pdf_file.file.read())
-                    tmp_file.flush()
-                    loader = PyPDFLoader(tmp_file.name)
-                    pdf_documents = loader.load()
-                    documents.extend(pdf_documents)
-                os.unlink(tmp_file.name)
         # Process websites
         if website_urls:
@@ -73,86 +73,111 @@ class CustomerSupportChatbot:
                 url = url.strip()
                 if url:
                     try:
-                        loader = WebBaseLoader(url)
-                        web_documents = loader.load()
-                        documents.extend(web_documents)
                     except Exception as e:
                         print(f"Error loading {url}: {str(e)}")
-        if not documents:
             return "No documents processed. Please upload PDFs or provide website URLs."
-        # Split documents into chunks
-        texts = self.text_splitter.split_documents(documents)
-        # Create vector store
-        self.vector_store = FAISS.from_documents(texts, self.embeddings)
-        # Create QA chain
-        self.qa_chain = RetrievalQA.from_chain_type(
-            llm=self.llm,
-            chain_type="stuff",
-            retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
-            return_source_documents=True
-        )
-        return f"Successfully processed {len(documents)} documents and created knowledge base."
     def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
         """Chat function that uses RAG if available"""
-        # If we have a knowledge base, use RAG
-        if self.qa_chain:
-            try:
-                # Get relevant context from the knowledge base
-                result = self.qa_chain({"query": message})
-                # Format the response with context
-                response = result["result"]
-                # Add source information if available
-                if "source_documents" in result and result["source_documents"]:
-                    sources = set()
-                    for doc in result["source_documents"]:
-                        if hasattr(doc, 'metadata') and 'source' in doc.metadata:
-                            sources.add(doc.metadata['source'])
-                    if sources:
-                        response += "\n\nSources: " + ", ".join(list(sources)[:3])
-                return response
-            except Exception as e:
-                print(f"Error using RAG: {str(e)}")
-                # Fall back to basic chat if RAG fails
-        # Basic chat without RAG
-        # Format conversation history for the model
-        conversation = ""
-        for user_msg, bot_msg in history[-5:]:  # Use last 5 exchanges
-            conversation += f"User: {user_msg}\nBot: {bot_msg}\n"
-        conversation += f"User: {message}\nBot:"
-        # Generate response
-        response = self.pipe(conversation, max_length=len(conversation) + 100)[0]['generated_text']
-        # Extract only the bot's response
-        bot_response = response.split("Bot:")[-1].strip()
-        return bot_response
 # Initialize the chatbot
 chatbot = CustomerSupportChatbot()
 # Create the Gradio interface
 def create_interface():
-    with gr.Blocks(title="Customer Support Chatbot with RAG") as demo:
-        gr.Markdown("# Customer Support Chatbot with RAG")
         gr.Markdown("Upload PDFs and/or provide website URLs to create a knowledge base for the chatbot.")
         with gr.Row():
             with gr.Column(scale=1):
                 pdf_upload = gr.File(
                     label="Upload PDF files",
                     file_count="multiple",
@@ -166,20 +191,40 @@ def create_interface():
                 )
                 process_btn = gr.Button("Process Documents", variant="primary")
-                status_text = gr.Textbox(label="Status", interactive=False)
             with gr.Column(scale=2):
-                chatbot_interface = gr.Chatbot(label="Customer Support Chat")
                 msg_input = gr.Textbox(
                     label="Message",
                     placeholder="Ask a question...",
-                    lines=2
                 )
                 with gr.Row():
                     submit_btn = gr.Button("Send", variant="primary")
                     clear_btn = gr.Button("Clear Chat")
         # Event handlers
         def process_documents(pdf_files, website_urls):
             return chatbot.process_documents(pdf_files, website_urls)
@@ -211,20 +256,10 @@ def create_interface():
         )
         clear_btn.click(
-            fn=lambda: None,
-            outputs=chatbot_interface
-        )
-        # Add example questions
-        gr.Examples(
-            examples=[
-                "What are your customer support hours?",
-                "How can I track my order?",
-                "What is the return policy?",
-                "How do I contact customer service?",
-                "What payment methods do you accept?"
-            ],
-            inputs=msg_input
         )
     return demo

 import gradio as gr
+import os
+import tempfile
+from typing import List, Tuple
+import requests
+from bs4 import BeautifulSoup
+import json
+# Instead of using torch/transformers directly, use HuggingFace's Inference API
+from transformers import pipeline
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.document_loaders import PyPDFLoader, WebBaseLoader
 from langchain.chains import RetrievalQA
 from langchain.llms import HuggingFacePipeline
+from langchain.schema import Document
+# Initialize the chatbot class
 class CustomerSupportChatbot:
     def __init__(self):
+        # Use a lighter embedding model
         self.embeddings = HuggingFaceEmbeddings(
+            model_name="all-MiniLM-L6-v2",
+            model_kwargs={'device': 'cpu'}
         )
+        # Use a simpler model for chat
+        self.chat_pipeline = pipeline(
             "text-generation",
+            model="microsoft/DialoGPT-small",  # Using smaller model
+            device_map="auto",
+            torch_dtype="auto",
+            max_new_tokens=100,
             temperature=0.7,
+            pad_token_id=50256
         )
         # Initialize vector store
         self.vector_store = None
+        self.documents = []
         # Text splitter
         self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=500,
+            chunk_overlap=50,
+            length_function=len,
         )
     def process_documents(self, pdf_files, website_urls) -> str:
         """Process PDF files and website URLs to create a vector store"""
+        self.documents = []
         # Process PDF files
         if pdf_files:
             for pdf_file in pdf_files:
+                try:
+                    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
+                        tmp_file.write(pdf_file.read())
+                        tmp_file.flush()
+                        loader = PyPDFLoader(tmp_file.name)
+                        pdf_documents = loader.load()
+                        self.documents.extend(pdf_documents)
+                    os.unlink(tmp_file.name)
+                except Exception as e:
+                    print(f"Error processing PDF: {str(e)}")
         # Process websites
         if website_urls:
                 url = url.strip()
                 if url:
                     try:
+                        # Simple web scraping
+                        response = requests.get(url, timeout=10)
+                        soup = BeautifulSoup(response.content, 'html.parser')
+                        # Extract text content
+                        text = soup.get_text(separator=' ', strip=True)
+                        # Create a document
+                        doc = Document(
+                            page_content=text,
+                            metadata={"source": url}
+                        )
+                        self.documents.append(doc)
                     except Exception as e:
                         print(f"Error loading {url}: {str(e)}")
+        if not self.documents:
             return "No documents processed. Please upload PDFs or provide website URLs."
+        try:
+            # Split documents into chunks
+            texts = self.text_splitter.split_documents(self.documents)
+            # Create vector store
+            self.vector_store = FAISS.from_documents(texts, self.embeddings)
+            return f"Successfully processed {len(self.documents)} documents into {len(texts)} chunks."
+        except Exception as e:
+            return f"Error creating vector store: {str(e)}"
+    def search_documents(self, query: str, k: int = 3) -> List[str]:
+        """Search for relevant documents"""
+        if not self.vector_store:
+            return []
+        try:
+            docs = self.vector_store.similarity_search(query, k=k)
+            return [doc.page_content for doc in docs]
+        except Exception as e:
+            print(f"Error searching documents: {str(e)}")
+            return []
     def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
         """Chat function that uses RAG if available"""
+        # Search for relevant context
+        if self.vector_store:
+            relevant_docs = self.search_documents(message)
+            if relevant_docs:
+                # Create context from relevant documents
+                context = "\n\n".join(relevant_docs[:2])  # Use top 2 documents
+                # Create a prompt with context
+                prompt = f"""Based on the following context, please answer the customer's question:
+Context:
+{context}
+Customer Question: {message}
+Answer: """
+            else:
+                prompt = f"Customer Question: {message}\nAnswer: "
+        else:
+            prompt = f"Customer Question: {message}\nAnswer: "
+        try:
+            # Generate response
+            response = self.chat_pipeline(
+                prompt,
+                max_new_tokens=100,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9,
+                num_return_sequences=1
+            )[0]['generated_text']
+            # Extract just the answer part
+            if "Answer: " in response:
+                answer = response.split("Answer: ")[-1].strip()
+            else:
+                answer = response.strip()
+            # Clean up the response
+            answer = answer.split("\n")[0].strip()  # Take first line only
+            return answer if answer else "I'm here to help! Could you please rephrase your question?"
+        except Exception as e:
+            print(f"Error generating response: {str(e)}")
+            return "I'm sorry, I encountered an error. Could you please try again?"
 # Initialize the chatbot
 chatbot = CustomerSupportChatbot()
 # Create the Gradio interface
 def create_interface():
+    with gr.Blocks(title="Customer Support Chatbot with RAG", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🤖 Customer Support Chatbot with RAG")
         gr.Markdown("Upload PDFs and/or provide website URLs to create a knowledge base for the chatbot.")
         with gr.Row():
             with gr.Column(scale=1):
+                gr.Markdown("### 📁 Document Upload")
                 pdf_upload = gr.File(
                     label="Upload PDF files",
                     file_count="multiple",
                 )
                 process_btn = gr.Button("Process Documents", variant="primary")
+                status_text = gr.Textbox(label="Status", interactive=False, show_label=True)
             with gr.Column(scale=2):
+                gr.Markdown("### 💬 Chat")
+                chatbot_interface = gr.Chatbot(
+                    label="Customer Support Chat",
+                    height=400,
+                    show_label=True
+                )
                 msg_input = gr.Textbox(
                     label="Message",
                     placeholder="Ask a question...",
+                    lines=2,
+                    show_label=True
                 )
                 with gr.Row():
                     submit_btn = gr.Button("Send", variant="primary")
                     clear_btn = gr.Button("Clear Chat")
+        # Example questions section
+        gr.Markdown("### 💡 Example Questions")
+        gr.Examples(
+            examples=[
+                "What are your customer support hours?",
+                "How can I track my order?",
+                "What is the return policy?",
+                "How do I contact customer service?",
+                "What payment methods do you accept?"
+            ],
+            inputs=msg_input,
+            label="Click on any example to try it:"
+        )
         # Event handlers
         def process_documents(pdf_files, website_urls):
             return chatbot.process_documents(pdf_files, website_urls)
         )
         clear_btn.click(
+            lambda: None,
+            None,
+            chatbot_interface,
+            queue=False
         )
     return demo