Spaces:
Sleeping
Sleeping
File size: 10,821 Bytes
b2967c1 71b147b e086a87 dcc1b8b ecb8e7a dcc1b8b 6eb665c b2967c1 6eb665c b2967c1 0ab9683 b2967c1 6eb665c 0ab9683 6eb665c 0ab9683 6eb665c b2967c1 83623aa 6eb665c dcc1b8b 6eb665c 0ab9683 6eb665c dcc1b8b 6eb665c b2967c1 6eb665c b2967c1 dcc1b8b b2967c1 dcc1b8b 9b86fc8 6eb665c b2967c1 6eb665c dcc1b8b 6eb665c 0ab9683 b2967c1 dcc1b8b b2967c1 dcc1b8b 0ab9683 b2967c1 dcc1b8b b2967c1 dcc1b8b b2967c1 dcc1b8b b2967c1 dcc1b8b 0ab9683 b2967c1 0ab9683 b2967c1 0ab9683 b2967c1 0ab9683 b2967c1 0ab9683 b2967c1 dcc1b8b 0ab9683 dcc1b8b b2967c1 dcc1b8b 2f13b5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
import streamlit as st
import os
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
os.environ['HF_HOME'] = '/app/huggingface_cache'
os.environ['TRANSFORMERS_CACHE'] = '/app/huggingface_cache/transformers'
os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/app/huggingface_cache/sentence_transformers'
if not os.path.exists('/app/huggingface_cache'):
os.makedirs('/app/huggingface_cache', exist_ok=True)
import langchain
from dotenv import load_dotenv
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank
try:
print("Step 1: Loading environment variables...")
load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
COHERE_API_KEY = os.getenv('COHERE_API_KEY')
INDEX_NAME = "rag-chatbot"
print("Step 1: SUCCESS")
st.set_page_config(page_title="Advanced RAG Chatbot", page_icon="π", layout="wide")
st.markdown("""
<style>
.chat-container {
display: flex;
flex-direction: column;
width: 100%;
margin: auto; /* Can adjust max-width if needed */
}
.chat-row {
display: flex;
align-items: flex-start;
margin-bottom: 1rem;
width: 100%;
}
.chat-bubble {
padding: 0.9rem;
border-radius: 1rem;
max-width: 70%;
word-wrap: break-word;
font-size: 1rem;
line-height: 1.4;
}
.user-row {
justify-content: flex-end;
}
.bot-row {
justify-content: flex-start;
}
.user-bubble {
background-color: #0078D4; /* Streamlit blue */
color: white;
border-bottom-right-radius: 0.2rem;
}
.bot-bubble {
background-color: #262730; /* Streamlit dark theme component background */
color: white;
border-bottom-left-radius: 0.2rem;
border: 1px solid #3c3d49; /* Slight border for bot bubble */
}
.avatar {
font-size: 1.5rem;
width: 40px;
height: 40px;
display: flex;
align-items: center;
justify-content: center;
border-radius: 50%;
background-color: #4A4A4A; /* Neutral avatar background */
}
.user-avatar { margin-left: 0.5rem; }
.bot-avatar { margin-right: 0.5rem; }
</style>
""", unsafe_allow_html=True)
@st.cache_resource
def initialize_services():
print("Step 2: Entering initialize_services function...")
if not all([PINECONE_API_KEY, GROQ_API_KEY, COHERE_API_KEY]):
raise ValueError("An API key is missing!")
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
pinecone = Pinecone(api_key=PINECONE_API_KEY)
host = "https://rag-chatbot-sg8t88c.svc.aped-4627-b74a.pinecone.io"
index = pinecone.Index(host=host)
vectorstore = PineconeVectorStore(index=index, embedding=embeddings)
base_retriever = vectorstore.as_retriever(search_kwargs={'k': 10})
compressor = CohereRerank(cohere_api_key=COHERE_API_KEY, top_n=3, model="rerank-english-v3.0")
reranking_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=base_retriever)
llm = ChatGroq(temperature=0.1, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
print("Step 2: All services initialized successfully.")
return reranking_retriever, llm
print("Step 3: Calling initialize_services...")
retriever, llm = initialize_services()
print("Step 3: SUCCESS, services are loaded.")
# --- RAG CHAIN
print("Step 4: Defining RAG chain...")
system_prompt = """You are a helpful AI assistant that answers questions based ONLY on the provided context.
Your answer should be concise and directly address the question.
After your answer, list the numbers of the sources you used, like this: [1][2].
Do not make up information. If the answer is not in the context, say "I cannot answer this based on the provided documents."
Context (Sources are numbered starting from 1):
{context}
"""
prompt_template = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "{question}")
])
def format_docs_with_numbers(docs):
numbered_docs = []
MAX_DOC_LENGTH = 1200 # Adjusted max length
for i, doc in enumerate(docs):
content = doc.page_content
if len(content) > MAX_DOC_LENGTH:
content = content[:MAX_DOC_LENGTH] + "..."
numbered_docs.append(f"Source [{i+1}]:\n{content}")
return "\n\n".join(numbered_docs)
rag_chain = (
{"context": retriever | RunnableLambda(format_docs_with_numbers), "question": RunnablePassthrough()}
| prompt_template
| llm
| StrOutputParser()
)
print("Step 4: SUCCESS")
# --- Streamlit Chat UI
st.title("π¬ Document Chatbot Interface")
if "messages" not in st.session_state:
st.session_state.messages = [{"role": "assistant", "content": "Hello! I'm ready to answer questions about your documents.", "sources": []}]
st.markdown('<div class="chat-container">', unsafe_allow_html=True)
for message in st.session_state.messages:
if message["role"] == "user":
st.markdown(f'<div class="chat-row user-row"><div class="chat-bubble user-bubble">{message["content"]}</div><div class="avatar user-avatar">π€</div></div>', unsafe_allow_html=True)
else: # Assistant
st.markdown(f'<div class="chat-row bot-row"><div class="avatar bot-avatar">π€</div><div class="chat-bubble bot-bubble">{message["content"]}</div></div>', unsafe_allow_html=True)
if message.get("sources"): # Check if sources exist for this message
with st.expander("Sources Referenced in this Answer"):
for i, doc_info in enumerate(message["sources"]):
st.markdown(f"**[{i+1}] Source:** `{doc_info['filename']}` (Page: {doc_info['page']})")
st.markdown(f"> {doc_info['content_snippet'][:300]}...") # Show snippet
st.markdown("---")
st.markdown('</div>', unsafe_allow_html=True)
if user_query := st.chat_input("Ask a question about your documents"):
# Add user message to history and display it immediately
st.session_state.messages.append({"role": "user", "content": user_query})
st.markdown(f'<div class="chat-row user-row"><div class="chat-bubble user-bubble">{user_query}</div><div class="avatar user-avatar">π€</div></div>', unsafe_allow_html=True)
with st.chat_message("assistant", avatar="π€"): # Use Streamlit's chat_message context for the spinner and bot response area
message_placeholder = st.empty() # Create a placeholder for the streaming response
full_response = ""
with st.spinner("Thinking..."):
try:
print(f"--- UI DEBUG: Invoking RAG chain with query: {user_query} ---")
assistant_response_content = ""
for chunk in rag_chain.stream(user_query):
assistant_response_content += chunk
message_placeholder.markdown(f'<div class="chat-bubble bot-bubble">{assistant_response_content}β</div>', unsafe_allow_html=True) # Typing effect
message_placeholder.markdown(f'<div class="chat-bubble bot-bubble">{assistant_response_content}</div>', unsafe_allow_html=True) # Final response
print(f"--- UI DEBUG: Full LLM Answer: {assistant_response_content} ---")
retrieved_docs_for_display = retriever.invoke(user_query)
sources_info_for_display = []
if retrieved_docs_for_display:
for doc in retrieved_docs_for_display:
sources_info_for_display.append({
"filename": os.path.basename(doc.metadata.get('source', 'Unknown')),
"page": doc.metadata.get('page', 'N/A'),
"content_snippet": doc.page_content
})
st.session_state.messages.append({
"role": "assistant",
"content": assistant_response_content,
"sources": sources_info_for_display
})
if sources_info_for_display:
with st.expander("Sources for the latest answer"):
for i, doc_info in enumerate(sources_info_for_display):
st.markdown(f"**[{i+1}] Source:** `{doc_info['filename']}` (Page: {doc_info['page']})")
st.markdown(f"> {doc_info['content_snippet'][:300]}...")
st.markdown("---")
except Exception as e_invoke:
error_message = f"Error processing your query: {e_invoke}"
print(f"!!!!!!!!!! ERROR DURING RAG CHAIN INVOCATION (UI Level) !!!!!!!!!!")
import traceback
print(traceback.format_exc())
message_placeholder.error(error_message)
st.session_state.messages.append({"role": "assistant", "content": f"Sorry, I encountered an error: {error_message}", "sources": []})
print("--- app.py script finished a run ---")
except Exception as e:
print(f"!!!!!!!!!! A FATAL ERROR OCCURRED DURING STARTUP !!!!!!!!!!")
import traceback
print(traceback.format_exc())
st.error(f"A fatal error occurred during startup. Please check the container logs. Error: {e}") |