File size: 3,935 Bytes
ba900f0
8bb0a69
91c6bea
ba900f0
91c6bea
 
40ca01e
 
 
 
 
986437f
ce64b19
 
 
ba900f0
 
 
986437f
ba900f0
 
 
 
 
 
7bc82ac
 
91c6bea
ba900f0
 
 
 
 
 
 
986437f
 
 
 
 
 
ba900f0
40ca01e
 
 
ba900f0
986437f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba900f0
91c6bea
 
 
 
 
 
 
 
986437f
 
91c6bea
986437f
 
91c6bea
986437f
 
 
7bc82ac
 
986437f
 
 
 
91c6bea
 
ba900f0
91c6bea
986437f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import streamlit as st
from src.indexing.document_processing import DocumentProcessor
from src.indexing.vectore_store import VectorStoreManager
from src.tools_retrieval.retriever import RetrieverManager
from src.workflow import RAGWorkflow
from src.utils import (
    logger,
    determine_top_k,
    determine_reranking_top_n
)

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

UPLOAD_FOLDER = "uploads/"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Initialize session state
if "messages" not in st.session_state:
    st.session_state.messages = []
if "retriever" not in st.session_state:
    st.session_state.retriever = None
if "vector_store" not in st.session_state:
    st.session_state.vector_store = None
if "workflow" not in st.session_state:
    st.session_state.workflow = None

st.set_page_config(
    page_title="RAG Chatbot",
    layout="wide",
    page_icon="📘",
)
st.title("Agentic RAG Chatbot")

def process_document_upload(file_obj):
    file_path = os.path.join(UPLOAD_FOLDER, file_obj.name)
    with open(file_path, "wb") as f:
        f.write(file_obj.getbuffer())
    return file_path

with st.sidebar:
    st.header("Upload")
    uploaded_file = st.file_uploader("Upload Document", type=["pdf", "xlsx", "docx", "txt"])
    process_button = st.button("Process Document")

    if uploaded_file and process_button:
        with st.spinner("Processing Document..."):
            try:
                file_path = process_document_upload(uploaded_file)
                
                doc_processor = DocumentProcessor()
                chunks = doc_processor.load_and_split_pdf(file_path)

                vector_store_manager = VectorStoreManager()
                vector_store = vector_store_manager.index_documents(chunks)
                st.session_state.vector_store = vector_store
                st.success("Document processed and indexed successfully!")
    
                top_k = determine_top_k(len(chunks))
                top_n = determine_reranking_top_n(top_k)
    
                retriever_manager = RetrieverManager(vector_store)
                retriever_tool = retriever_manager.create_retriever(
                    documents=chunks,
                    top_n=top_n,
                    k=top_k
                )
                st.session_state.retriever = retriever_tool
                st.success("Retriever tool created successfully!")
    
                rag_workflow = RAGWorkflow(retriever_tool)
                workflow = rag_workflow.compile()
                st.session_state.workflow = workflow
            except Exception as e:
                logger.error(f"Error processing document: {e}")
                st.error(f"Error processing document: {e}")


for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if prompt := st.chat_input("Ask a question about your document"):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)
    
    with st.chat_message("assistant"):
        if st.session_state.workflow is None:
            final_response = "Please upload a document first."
        else:
            try:
                with st.spinner("Thinking..."):
                    inputs = {"messages": [("user", prompt)]}
                    response = st.session_state.workflow.invoke(inputs)
                    final_response = response["messages"][-1].content
            except Exception as e:
                logger.error(f"Error invoking workflow: {e}")
                final_response = f"An error occurred while processing your request: {e}"
    
        st.markdown(final_response)
        st.session_state.messages.append({"role": "assistant", "content": final_response})

if st.sidebar.button("Clear Chat"):
    st.session_state.messages = []