import streamlit as st from rag_pipelline import ( extract_text_from_pdf, split_text_into_chunks, create_vector_store, create_rag_agent, get_answer ) # Page Config----- st.set_page_config( page_title = "PDF Chatbot- using RAG", page_icon = "๐Ÿ“„", layout = "wide" ) # Header----- st.markdown("### ๐Ÿ“„ PDF Chatbot - RAG + Gemini") st.markdown("Powered by Langchain and Gemini 2.5 Flash") st.divider() # Session State ----- if "agent" not in st.session_state: st.session_state.agent = None if "chat_history" not in st.session_state: st.session_state.chat_history = [] if "display_messages" not in st.session_state: st.session_state.display_messages = [] if "pdf_processed" not in st.session_state: st.session_state.pdf_processed = False if "pdf_name" not in st.session_state: st.session_state.pdf_name = "" # Sidebar ---- with st.sidebar: st.header("โš™๏ธ Stack Info") st.markdown("**Framework:** Langchain 1.2.10") st.markdown("**LLM:** Gemini 2.5 Flash") st.markdown("**Embeddings:** Google embedding-001") st.markdown("**Vector Store:** FAISS") st.divider() st.header("๐Ÿ“ Upload or Select PDF") # Upload a PDF uploaded_file = st.file_uploader( "Upload a PDF", type = ["pdf"], help = "Max 10 MB ยท Max 50 pages ยท Must have selectable text (not scanned)" ) # Select a sample PDF sample_pdf = st.selectbox( "Or pick a sample PDF:", ["None", "Attention is All You Need", "2025 ICC Champions Trophy-Wikipedia.pdf"] ) # Ensure only one PDF is uploaded at a time chosen_file , chosen_name = None,"" if uploaded_file is not None: chosen_file = uploaded_file chosen_name = uploaded_file.name elif sample_pdf != "None": sample_map = { "Attention is All You Need": "src/sample_pdf/Attention_is_all_you_need.pdf", "2025 ICC Champions Trophy-Wikipedia.pdf":"src/sample_pdf/2025_ICC_Champions_Trophy-Wikipedia.pdf", } # Using a variable and closing after use sample_path = sample_map.get(sample_pdf) if sample_path: try: chosen_file = open(sample_path, "rb") chosen_name = sample_pdf st.info(f"๐Ÿ“„ Using sample file: {chosen_name}") except FileNotFoundError: st.error(f"โŒ Sample file not found: {sample_path}") chosen_file = None if chosen_file is not None: if st.button("Process PDF", type = "primary", use_container_width = True): with st.spinner("Step 1/4 - Extracting raw text"): raw_text = extract_text_from_pdf(chosen_file) # Close sample file after reading to avoid resource leak if sample_pdf != "None" and hasattr(chosen_file, "close"): chosen_file.close() if not raw_text.strip(): st.error("โŒ No text found, please check your PDF and confirm its text selectable") else: with st.spinner("Step 2/4 - Splitting text into chunks"): chunks = split_text_into_chunks(raw_text) with st.spinner("Step 3/4 - Creating embedding and vector store"): vector_store = create_vector_store(chunks) with st.spinner("Step 4/4 - Creating RAG Agent"): st.session_state.agent = create_rag_agent(vector_store) st.session_state.pdf_processed = True st.session_state.pdf_name = chosen_name st.session_state.chat_history = [] st.session_state.display_messages = [] st.success(f"โœ… Ready! {len(chunks)} chunks indexed") if st.session_state.pdf_processed: st.divider() st.success(f" Active :\n{st.session_state.pdf_name}") st.caption(f"Messages so far:{len(st.session_state.display_messages)}") if st.button("Clear & Reset", use_container_width= True): st.session_state.agent = None st.session_state.chat_history = [] st.session_state.display_messages = [] st.session_state.pdf_processed = False st.session_state.pdf_name = "" st.rerun() # Main Area ----- if not st.session_state.pdf_processed: st.markdown("### How to use") col1, col2, col3 = st.columns(3) with col1: st.markdown("Step 1 - Upload or select the PDF from sidebar") with col2: st.markdown("Step 2 - Click Process PDF") with col3: st.markdown("Step 3 - Ask your questions in the chat box") st.divider() else: st.markdown(f"### Chatting with {st.session_state.pdf_name}") # Display all previous messages for msg in st.session_state.display_messages: with st.chat_message(msg["role"]): st.write(msg["content"]) # Show source chunks for assistant messages if msg["role"] == "assistant" and msg.get("sources"): with st.expander(" PDF Chunks used to generate this answer"): for i, doc in enumerate(msg["sources"]): st.markdown(f"**Chunk {i+1}:**") st.markdown(f"> {doc.page_content[:400]}...") st.divider() #Chat Input if st.session_state.pdf_processed: user_question = st.chat_input(f"Ask Something about {st.session_state.pdf_name}...") if user_question: # Show user message with st.chat_message("user"): st.write(user_question) # Store in both histories st.session_state.chat_history.append({ "role":"user", "content":user_question }) st.session_state.display_messages.append({ "role": "user", "content": user_question }) # Get answer from agent with st.chat_message("assistant"): with st.spinner("Agent is searching PDF and thinking"): answer, source_docs = get_answer( st.session_state.agent, user_question, st.session_state.chat_history[:-1] # history without current question ) st.write(answer) if source_docs: with st.expander(" PDF chunks used to generate this answer"): for i, doc in enumerate(source_docs): st.markdown(f"**Chunk {i+1}:**") st.markdown(f"> {doc.page_content[:400]}...") #Store assistant response st.session_state.chat_history.append({ "role":"assistant", "content" : answer }) st.session_state.display_messages.append({ "role": "assistant", "content": answer, "sources":source_docs })