Spaces:
Sleeping
Sleeping
File size: 3,935 Bytes
ba900f0 8bb0a69 91c6bea ba900f0 91c6bea 40ca01e 986437f ce64b19 ba900f0 986437f ba900f0 7bc82ac 91c6bea ba900f0 986437f ba900f0 40ca01e ba900f0 986437f ba900f0 91c6bea 986437f 91c6bea 986437f 91c6bea 986437f 7bc82ac 986437f 91c6bea ba900f0 91c6bea 986437f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import os
import streamlit as st
from src.indexing.document_processing import DocumentProcessor
from src.indexing.vectore_store import VectorStoreManager
from src.tools_retrieval.retriever import RetrieverManager
from src.workflow import RAGWorkflow
from src.utils import (
logger,
determine_top_k,
determine_reranking_top_n
)
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
UPLOAD_FOLDER = "uploads/"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# Initialize session state
if "messages" not in st.session_state:
st.session_state.messages = []
if "retriever" not in st.session_state:
st.session_state.retriever = None
if "vector_store" not in st.session_state:
st.session_state.vector_store = None
if "workflow" not in st.session_state:
st.session_state.workflow = None
st.set_page_config(
page_title="RAG Chatbot",
layout="wide",
page_icon="📘",
)
st.title("Agentic RAG Chatbot")
def process_document_upload(file_obj):
file_path = os.path.join(UPLOAD_FOLDER, file_obj.name)
with open(file_path, "wb") as f:
f.write(file_obj.getbuffer())
return file_path
with st.sidebar:
st.header("Upload")
uploaded_file = st.file_uploader("Upload Document", type=["pdf", "xlsx", "docx", "txt"])
process_button = st.button("Process Document")
if uploaded_file and process_button:
with st.spinner("Processing Document..."):
try:
file_path = process_document_upload(uploaded_file)
doc_processor = DocumentProcessor()
chunks = doc_processor.load_and_split_pdf(file_path)
vector_store_manager = VectorStoreManager()
vector_store = vector_store_manager.index_documents(chunks)
st.session_state.vector_store = vector_store
st.success("Document processed and indexed successfully!")
top_k = determine_top_k(len(chunks))
top_n = determine_reranking_top_n(top_k)
retriever_manager = RetrieverManager(vector_store)
retriever_tool = retriever_manager.create_retriever(
documents=chunks,
top_n=top_n,
k=top_k
)
st.session_state.retriever = retriever_tool
st.success("Retriever tool created successfully!")
rag_workflow = RAGWorkflow(retriever_tool)
workflow = rag_workflow.compile()
st.session_state.workflow = workflow
except Exception as e:
logger.error(f"Error processing document: {e}")
st.error(f"Error processing document: {e}")
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Ask a question about your document"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
if st.session_state.workflow is None:
final_response = "Please upload a document first."
else:
try:
with st.spinner("Thinking..."):
inputs = {"messages": [("user", prompt)]}
response = st.session_state.workflow.invoke(inputs)
final_response = response["messages"][-1].content
except Exception as e:
logger.error(f"Error invoking workflow: {e}")
final_response = f"An error occurred while processing your request: {e}"
st.markdown(final_response)
st.session_state.messages.append({"role": "assistant", "content": final_response})
if st.sidebar.button("Clear Chat"):
st.session_state.messages = [] |