Spaces:

MusaR
/

rag-chatbot

Sleeping

App Files Files Community

MusaR commited on Jun 23, 2025

Commit

dcc1b8b

verified ·

1 Parent(s): 9b86fc8

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -115

app.py CHANGED Viewed

@@ -1,38 +1,33 @@
-# app.py (DEBUGGING VERSION)
 print("--- Python script starting ---")
-import streamlit as st
 import os
-import langchain
-langchain.debug = True
 os.environ['TOKENIZERS_PARALLELISM'] = 'false'
-os.environ['HF_HOME'] = '/app/huggingface_cache' # For transformers and datasets
 os.environ['TRANSFORMERS_CACHE'] = '/app/huggingface_cache/transformers'
 os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/app/huggingface_cache/sentence_transformers'
-# Create the directory if it doesn't exist, with permissions
 if not os.path.exists('/app/huggingface_cache'):
     os.makedirs('/app/huggingface_cache', exist_ok=True)
 from dotenv import load_dotenv
 from pinecone import Pinecone
-# --- Standard Imports ---
 from langchain_pinecone import PineconeVectorStore
 from langchain_community.embeddings import SentenceTransformerEmbeddings
 from langchain_groq import ChatGroq
-from langchain_core.prompts import PromptTemplate
 from langchain_core.runnables import RunnablePassthrough
-from langchain_core.output_parsers import PydanticOutputParser
-from pydantic import BaseModel, Field
 from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import CohereRerank
 print("--- All imports successful ---")
-# We wrap the ENTIRE app in a try/except block to catch any startup error
 try:
-    # --- Load Environment Variables ---
     print("Step 1: Loading environment variables...")
     load_dotenv()
     PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
@@ -41,47 +36,23 @@ try:
     INDEX_NAME = "rag-chatbot"
     print("Step 1: SUCCESS")
-    # --- Page Configuration ---
-    st.set_page_config(page_title="Production RAG System", page_icon="🚀", layout="wide")
-    st.title("🚀 Production-Grade RAG System")
-    # --- Pydantic Model ---
-    class StructuredAnswer(BaseModel):
-        summary: str = Field(description="A concise summary.")
-        key_points: list[str] = Field(description="A list of key bullet points.")
-        confidence_score: float = Field(description="A 0.0 to 1.0 confidence score.")
-    # --- Caching and Initialization ---
     @st.cache_resource
     def initialize_services():
         print("Step 2: Entering initialize_services function...")
         if not all([PINECONE_API_KEY, GROQ_API_KEY, COHERE_API_KEY]):
             raise ValueError("An API key is missing!")
-        print("Step 2a: Initializing embedding model...")
         embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
-        print("Step 2a: SUCCESS")
-        print("Step 2b: Initializing Pinecone client...")
         pinecone = Pinecone(api_key=PINECONE_API_KEY)
-        host = "https://rag-chatbot-sg8t88c.svc.aped-4627-b74a.pinecone.io"
         index = pinecone.Index(host=host)
-        print("Step 2b: SUCCESS")
-        print("Step 2c: Creating PineconeVectorStore object...")
         vectorstore = PineconeVectorStore(index=index, embedding=embeddings)
-        print("Step 2c: SUCCESS")
-        print("Step 2d: Initializing Cohere Re-ranker...")
-        base_retriever = vectorstore.as_retriever(search_kwargs={'k': 20})
-        compressor = CohereRerank(cohere_api_key=COHERE_API_KEY, top_n=5, model="rerank-english-v3.0")
         reranking_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=base_retriever)
-        print("Step 2d: SUCCESS")
-        print("Step 2e: Initializing Groq LLM...")
-        llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
-        print("Step 2e: SUCCESS")
         print("Step 2: All services initialized successfully.")
         return reranking_retriever, llm
@@ -89,92 +60,96 @@ try:
     retriever, llm = initialize_services()
     print("Step 3: SUCCESS, services are loaded.")
-    # --- RAG Chain Definition ---
     print("Step 4: Defining RAG chain...")
-    pydantic_parser = PydanticOutputParser(pydantic_object=StructuredAnswer)
-    format_instructions = pydantic_parser.get_format_instructions()
-    template = """
-    You are a world-class analysis engine. Your task is to provide a structured, factual answer based *only* on the following context.
-    Synthesize the information from all context snippets. Do not use any outside knowledge.
     Context:
     {context}
-    Question:
-    {question}
-    Follow these formatting instructions precisely:
-    {format_instructions}
     """
-    prompt = PromptTemplate(
-        template=template,
-        input_variables=["context", "question"],
-        partial_variables={"format_instructions": format_instructions}
-    )
-    # --- NEW: Break down the chain for debugging ---
-    def retrieve_and_rerank(input_dict):
-        print(f"--- RAG DEBUG: Retrieving for question: {input_dict['question']} ---")
-        docs = retriever.invoke(input_dict['question'])
-        print(f"--- RAG DEBUG: Retrieved {len(docs)} docs after reranking ---")
-        for i, doc in enumerate(docs):
-            print(f"    Doc {i} (source: {doc.metadata.get('source', 'N/A')}, page: {doc.metadata.get('page', 'N/A')}): {doc.page_content[:100]}...")
-        return {"context": docs, "question": input_dict['question']}
-    def format_prompt(input_dict):
-        print(f"--- RAG DEBUG: Formatting prompt with context ---")
-        # Manually construct the context string to see it clearly
-        context_str = "\n\n---\n\n".join([doc.page_content for doc in input_dict['context']])
-        print(f"--- RAG DEBUG: Context fed to LLM: {context_str[:500]}... ---") # Print first 500 chars of context
-        return prompt.invoke({"context": context_str, "question": input_dict['question']})
-    def call_llm(formatted_prompt):
-        print(f"--- RAG DEBUG: Calling LLM ---")
-        llm_output = llm.invoke(formatted_prompt)
-        print(f"--- RAG DEBUG: Raw LLM Output: {llm_output} ---") # See exactly what Groq returns
-        return llm_output
-    def parse_output(llm_output_str):
-        print(f"--- RAG DEBUG: Attempting to parse LLM output with Pydantic ---")
-        try:
-            parsed = pydantic_parser.invoke(llm_output_str)
-            print(f"--- RAG DEBUG: Pydantic parsing successful ---")
-            return parsed
-        except Exception as e_parse:
-            print(f"!!!!!!!!!! PYDANTIC PARSING ERROR !!!!!!!!!!")
-            print(f"Raw LLM Output that failed to parse: {llm_output_str}")
-            print(traceback.format_exc())
-            # Fallback: return a dictionary indicating failure, or just the raw string
-            return StructuredAnswer(summary="LLM output parsing failed. See logs.", key_points=[], confidence_score=0.0)
     rag_chain = (
-        {"context": retriever, "question": RunnablePassthrough()}
         | prompt
         | llm
-        | pydantic_parser
     )
     print("Step 4: SUCCESS")
-    # --- UI Rendering ---
-    print("Step 5: Starting to render Streamlit UI...")
-    st.success("System is ready. Ask your question below.")
-    query = st.text_input("Enter your question:", key="query_input")
-    if query:
-        with st.spinner("Processing..."):
-            structured_answer = rag_chain.invoke(query)
-            st.write("### Answer")
-            # ... rest of UI ...
-    print("Step 5: SUCCESS, UI is rendered.")
 except Exception as e:
-    # If ANY error happens during startup, it will be printed here
-    print(f"!!!!!!!!!! A FATAL ERROR OCCURRED !!!!!!!!!!")
     import traceback
     print(traceback.format_exc())
     st.error(f"A fatal error occurred during startup. Please check the container logs. Error: {e}")

+%%writefile app.py
 print("--- Python script starting ---")
 import os
 os.environ['TOKENIZERS_PARALLELISM'] = 'false'
+os.environ['HF_HOME'] = '/app/huggingface_cache'
 os.environ['TRANSFORMERS_CACHE'] = '/app/huggingface_cache/transformers'
 os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/app/huggingface_cache/sentence_transformers'
 if not os.path.exists('/app/huggingface_cache'):
     os.makedirs('/app/huggingface_cache', exist_ok=True)
+import langchain
+langchain.debug = False # Turn off verbose RAG chain logging for production
+import streamlit as st
 from dotenv import load_dotenv
 from pinecone import Pinecone
 from langchain_pinecone import PineconeVectorStore
 from langchain_community.embeddings import SentenceTransformerEmbeddings
 from langchain_groq import ChatGroq
+from langchain_core.prompts import ChatPromptTemplate # Use ChatPromptTemplate
 from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser # Simpler string output
 from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import CohereRerank
 print("--- All imports successful ---")
 try:
     print("Step 1: Loading environment variables...")
     load_dotenv()
     PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
     INDEX_NAME = "rag-chatbot"
     print("Step 1: SUCCESS")
+    st.set_page_config(page_title="Advanced RAG Chatbot", page_icon="🚀", layout="wide")
+    st.title("🚀 Production-Grade RAG Chatbot")
     @st.cache_resource
     def initialize_services():
         print("Step 2: Entering initialize_services function...")
         if not all([PINECONE_API_KEY, GROQ_API_KEY, COHERE_API_KEY]):
             raise ValueError("An API key is missing!")
         embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
         pinecone = Pinecone(api_key=PINECONE_API_KEY)
+        host = "https://rag-chatbot-sg8t88c.svc.aped-4627-b74a.pinecone.io" # Your host
         index = pinecone.Index(host=host)
         vectorstore = PineconeVectorStore(index=index, embedding=embeddings)
+        base_retriever = vectorstore.as_retriever(search_kwargs={'k': 10}) # Fetch 10 for reranker
+        compressor = CohereRerank(cohere_api_key=COHERE_API_KEY, top_n=3, model="rerank-english-02") # Rerank to top 3
         reranking_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=base_retriever)
+        llm = ChatGroq(temperature=0.1, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
         print("Step 2: All services initialized successfully.")
         return reranking_retriever, llm
     retriever, llm = initialize_services()
     print("Step 3: SUCCESS, services are loaded.")
+    # --- NEW RAG CHAIN with simpler output and source handling ---
     print("Step 4: Defining RAG chain...")
+    # System prompt to guide the LLM for chat-like, sourced answers
+    system_prompt = """You are a helpful AI assistant that answers questions based ONLY on the provided context.
+    Your answer should be concise and directly address the question.
+    After your answer, list the numbers of the sources you used, like this: [1][2].
+    Do not make up information. If the answer is not in the context, say "I cannot answer this based on the provided documents."
     Context:
     {context}
     """
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", system_prompt),
+        ("human", "{question}")
+    ])
+    def format_docs_with_numbers(docs):
+        # Prepend numbers to each document for citation
+        # Also limit the length of each doc to avoid overwhelming the LLM
+        MAX_DOC_LENGTH = 1500 # Max characters per document chunk
+        numbered_docs = []
+        for i, doc in enumerate(docs):
+            content = doc.page_content
+            if len(content) > MAX_DOC_LENGTH:
+                content = content[:MAX_DOC_LENGTH] + "..."
+            numbered_docs.append(f"Source [{i+1}]:\n{content}")
+        return "\n\n".join(numbered_docs)
     rag_chain = (
+        {"context": retriever | format_docs_with_numbers, "question": RunnablePassthrough()}
         | prompt
         | llm
+        | StrOutputParser()
     )
     print("Step 4: SUCCESS")
+    # --- Initialize chat history ---
+    if "messages" not in st.session_state:
+        st.session_state.messages = [{"role": "assistant", "content": "Hello! I'm ready to answer questions about your documents."}]
+    # Display chat messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # Chat input
+    if user_query := st.chat_input("Ask a question about your documents"):
+        st.session_state.messages.append({"role": "user", "content": user_query})
+        with st.chat_message("user"):
+            st.markdown(user_query)
+        with st.chat_message("assistant"):
+            with st.spinner("Thinking..."):
+                try:
+                    print(f"--- UI DEBUG: Invoking RAG chain with query: {user_query} ---")
+                    answer = rag_chain.invoke(user_query)
+                    print(f"--- UI DEBUG: Raw LLM Answer: {answer} ---")
+                    st.markdown(answer) # Display the LLM's answer directly
+                    # Retrieve sources again just for display (not ideal for performance but simple)
+                    # In a more complex app, you'd pass source objects through the chain.
+                    with st.expander("Sources"):
+                        source_docs = retriever.invoke(user_query)
+                        if source_docs:
+                            for i, doc in enumerate(source_docs):
+                                source_filename = os.path.basename(doc.metadata.get('source', 'Unknown'))
+                                page_number = doc.metadata.get('page', 'N/A')
+                                st.markdown(f"**[{i+1}] Source:** `{source_filename}` (Page: {page_number})")
+                                st.markdown(f"> {doc.page_content[:300]}...") # Show a snippet
+                                st.markdown("---")
+                        else:
+                            st.write("No specific sources were retrieved for this part of the answer.")
+                    st.session_state.messages.append({"role": "assistant", "content": answer}) # Add LLM's answer to history
+                except Exception as e_invoke:
+                    error_message = f"Error processing your query: {e_invoke}"
+                    print(f"!!!!!!!!!! ERROR DURING RAG CHAIN INVOCATION (UI Level) !!!!!!!!!!")
+                    import traceback
+                    print(traceback.format_exc())
+                    st.error(error_message)
+                    st.session_state.messages.append({"role": "assistant", "content": f"Sorry, I encountered an error: {error_message}"})
+    print("--- app.py script finished a run ---")
 except Exception as e:
+    print(f"!!!!!!!!!! A FATAL ERROR OCCURRED DURING STARTUP !!!!!!!!!!")
     import traceback
     print(traceback.format_exc())
     st.error(f"A fatal error occurred during startup. Please check the container logs. Error: {e}")