Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from src.indexing.document_processing import DocumentProcessor | |
| from src.indexing.vectore_store import VectorStoreManager | |
| from src.tools_retrieval.retriever import RetrieverManager | |
| from src.workflow import RAGWorkflow | |
| from src.utils import ( | |
| logger, | |
| determine_top_k, | |
| determine_reranking_top_n | |
| ) | |
| os.environ["LANGCHAIN_TRACING_V2"] = "true" | |
| os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY") | |
| UPLOAD_FOLDER = "uploads/" | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| # Initialize session state | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| if "retriever" not in st.session_state: | |
| st.session_state.retriever = None | |
| if "vector_store" not in st.session_state: | |
| st.session_state.vector_store = None | |
| if "workflow" not in st.session_state: | |
| st.session_state.workflow = None | |
| st.set_page_config( | |
| page_title="RAG Chatbot", | |
| layout="wide", | |
| page_icon="📘", | |
| ) | |
| st.title("Agentic RAG Chatbot") | |
| def process_document_upload(file_obj): | |
| file_path = os.path.join(UPLOAD_FOLDER, file_obj.name) | |
| with open(file_path, "wb") as f: | |
| f.write(file_obj.getbuffer()) | |
| return file_path | |
| with st.sidebar: | |
| st.header("Upload") | |
| uploaded_file = st.file_uploader("Upload Document", type=["pdf", "xlsx", "docx", "txt"]) | |
| process_button = st.button("Process Document") | |
| if uploaded_file and process_button: | |
| with st.spinner("Processing Document..."): | |
| try: | |
| file_path = process_document_upload(uploaded_file) | |
| doc_processor = DocumentProcessor() | |
| chunks = doc_processor.load_and_split_pdf(file_path) | |
| vector_store_manager = VectorStoreManager() | |
| vector_store = vector_store_manager.index_documents(chunks) | |
| st.session_state.vector_store = vector_store | |
| st.success("Document processed and indexed successfully!") | |
| top_k = determine_top_k(len(chunks)) | |
| top_n = determine_reranking_top_n(top_k) | |
| retriever_manager = RetrieverManager(vector_store) | |
| retriever_tool = retriever_manager.create_retriever( | |
| documents=chunks, | |
| top_n=top_n, | |
| k=top_k | |
| ) | |
| st.session_state.retriever = retriever_tool | |
| st.success("Retriever tool created successfully!") | |
| rag_workflow = RAGWorkflow(retriever_tool) | |
| workflow = rag_workflow.compile() | |
| st.session_state.workflow = workflow | |
| except Exception as e: | |
| logger.error(f"Error processing document: {e}") | |
| st.error(f"Error processing document: {e}") | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| if prompt := st.chat_input("Ask a question about your document"): | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| with st.chat_message("assistant"): | |
| if st.session_state.workflow is None: | |
| final_response = "Please upload a document first." | |
| else: | |
| try: | |
| with st.spinner("Thinking..."): | |
| inputs = {"messages": [("user", prompt)]} | |
| response = st.session_state.workflow.invoke(inputs) | |
| final_response = response["messages"][-1].content | |
| except Exception as e: | |
| logger.error(f"Error invoking workflow: {e}") | |
| final_response = f"An error occurred while processing your request: {e}" | |
| st.markdown(final_response) | |
| st.session_state.messages.append({"role": "assistant", "content": final_response}) | |
| if st.sidebar.button("Clear Chat"): | |
| st.session_state.messages = [] |