Spaces:

prernajeet01
/

RAG_based_Equinix_chatbot

Sleeping

App Files Files Community

prernajeet01 commited on Mar 13, 2025

Commit

7adefa6

verified ·

1 Parent(s): 0a64af5

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -24

app.py CHANGED Viewed

@@ -2,9 +2,9 @@ import os
 import gradio as gr
 import google.generativeai as genai
 from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
-from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.vectorstores import FAISS
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
@@ -19,35 +19,77 @@ genai.configure(api_key=google_api_key)
 # Load PDF and create vector store
 def initialize_retriever():
-    # Load PDF
-    pdf_path = "Team1.pdf"
-    loader = PyPDFLoader(pdf_path)
-    documents = loader.load()
-    # Split text into chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
-    text_chunks = text_splitter.split_documents(documents)
-    # Generate embeddings
-    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
-    # Store embeddings in FAISS index
-    vectorstore = FAISS.from_documents(text_chunks, embeddings)
-    return vectorstore.as_retriever(search_kwargs={"k": 4})
 # Initialize LLM
-llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
 # RAG query function
 def rag_query(query, retriever):
-    # Retrieve relevant documents
-    docs = retriever.get_relevant_documents(query)
-    # Create context from retrieved documents
-    context = "\n".join([doc.page_content for doc in docs])
-    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer directly and concisely:"
     try:
         response = llm.invoke(prompt)
         return response.content
     except Exception as e:
@@ -55,6 +97,9 @@ def rag_query(query, retriever):
 # General query function
 def general_query(query):
     try:
         # Define the prompt
         prompt = PromptTemplate.from_template("Answer the following query: {query}")
@@ -67,11 +112,18 @@ def general_query(query):
         return response
     except Exception as e:
-        return f"Error: {str(e)}"
 # Query router function
 def query_router(query, method, retriever):
     if method == "Team Query":
         return rag_query(query, retriever)
     elif method == "General Query":
         return general_query(query)
@@ -80,6 +132,7 @@ def query_router(query, method, retriever):
 # Main function to create and launch the Gradio interface
 def main():
     # Initialize retriever
     retriever = initialize_retriever()
     # Custom CSS for styling
@@ -95,9 +148,16 @@ def main():
     }
     """
     # Create Gradio UI
     with gr.Blocks(css=custom_css) as ui:
-        gr.Image("equinix-sign.jpg", elem_id="logo", show_label=False, height=100, width=200)
         # Title & Description
         gr.Markdown("<h1 style='text-align: center; color: black;'>Equinix Chatbot for Automation Team</h1>")
@@ -122,7 +182,7 @@ def main():
         )
     # Launch UI
-    ui.launch(share=True)
 if __name__ == "__main__":
     main()

 import gradio as gr
 import google.generativeai as genai
 from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
+from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 # Load PDF and create vector store
 def initialize_retriever():
+    try:
+        # Get current directory
+        current_dir = os.getcwd()
+        print(f"Current working directory: {current_dir}")
+        # List files in current directory for debugging
+        print(f"Files in directory: {os.listdir(current_dir)}")
+        # Use absolute path for the PDF
+        pdf_path = os.path.join(current_dir, "Team1.pdf")
+        print(f"Attempting to load PDF from: {pdf_path}")
+        # Check if file exists
+        if not os.path.exists(pdf_path):
+            raise FileNotFoundError(f"The file {pdf_path} does not exist")
+        # Load PDF
+        loader = PyPDFLoader(pdf_path)
+        documents = loader.load()
+        print(f"Successfully loaded {len(documents)} pages from the PDF")
+        # Split text into chunks
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
+        text_chunks = text_splitter.split_documents(documents)
+        print(f"Split into {len(text_chunks)} text chunks")
+        # Generate embeddings
+        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+        # Store embeddings in FAISS index
+        vectorstore = FAISS.from_documents(text_chunks, embeddings)
+        print("Successfully created vector store")
+        return vectorstore.as_retriever(search_kwargs={"k": 4})
+    except Exception as e:
+        print(f"Error in initialize_retriever: {str(e)}")
+        # Return a dummy retriever for graceful failure
+        class DummyRetriever:
+            def get_relevant_documents(self, query):
+                return []
+        print("Returning dummy retriever due to error")
+        return DummyRetriever()
 # Initialize LLM
+def get_llm():
+    try:
+        return ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
+    except Exception as e:
+        print(f"Error initializing LLM: {str(e)}")
+        return None
+llm = get_llm()
 # RAG query function
 def rag_query(query, retriever):
+    if retriever is None:
+        return "Error: Could not initialize document retriever. Please check if Team1.pdf exists."
     try:
+        # Retrieve relevant documents
+        docs = retriever.get_relevant_documents(query)
+        if not docs:
+            return "No relevant information found in the document. Try a general query instead."
+        # Create context from retrieved documents
+        context = "\n".join([doc.page_content for doc in docs])
+        prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer directly and concisely:"
         response = llm.invoke(prompt)
         return response.content
     except Exception as e:
 # General query function
 def general_query(query):
+    if llm is None:
+        return "Error: Could not initialize language model. Please check your API key."
     try:
         # Define the prompt
         prompt = PromptTemplate.from_template("Answer the following query: {query}")
         return response
     except Exception as e:
+        return f"Error in general query: {str(e)}"
+# Function to handle the case when no PDF is found
+def file_not_found_message():
+    return ("The Team1.pdf file could not be found. Team Query mode will not work properly. "
+            "Please ensure the PDF is correctly uploaded to the Hugging Face Space.")
 # Query router function
 def query_router(query, method, retriever):
     if method == "Team Query":
+        if isinstance(retriever, type) or retriever is None:
+            return file_not_found_message()
         return rag_query(query, retriever)
     elif method == "General Query":
         return general_query(query)
 # Main function to create and launch the Gradio interface
 def main():
     # Initialize retriever
+    print("Initializing retriever...")
     retriever = initialize_retriever()
     # Custom CSS for styling
     }
     """
+    logo_path = "equinix-sign.jpg"
+    logo_exists = os.path.exists(logo_path)
     # Create Gradio UI
     with gr.Blocks(css=custom_css) as ui:
+        if logo_exists:
+            gr.Image(logo_path, elem_id="logo", show_label=False, height=100, width=200)
+        else:
+            gr.Markdown("<h2 style='text-align: center;'>Equinix</h2>")
+            print(f"Warning: Logo file {logo_path} not found")
         # Title & Description
         gr.Markdown("<h1 style='text-align: center; color: black;'>Equinix Chatbot for Automation Team</h1>")
         )
     # Launch UI
+    ui.launch()
 if __name__ == "__main__":
     main()