import streamlit as st from PyPDF2 import PdfReader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_groq import ChatGroq import os # Page config st.set_page_config(page_title="RAG Document Q&A", page_icon="📚", layout="wide") # Title st.title("📚 RAG Document Q&A System") st.markdown("Upload PDFs and ask questions about them!") # Sidebar for API key with st.sidebar: st.header("⚙️ Configuration") api_key = st.text_input("Enter Groq API Key:", type="password") st.markdown("[Get free API key from Groq](https://console.groq.com/)") st.markdown("---") st.markdown("### About") st.markdown("This RAG system uses:") st.markdown("- 🤖 Groq (Llama 3.3)") st.markdown("- 🔍 Vector Search") st.markdown("- 📄 PDF Processing") # Initialize session state if 'vectorstore' not in st.session_state: st.session_state.vectorstore = None if 'chat_history' not in st.session_state: st.session_state.chat_history = [] # Main area col1, col2 = st.columns([1, 2]) with col1: st.header("📤 Upload Documents") uploaded_files = st.file_uploader( "Upload PDF files", type=['pdf'], accept_multiple_files=True ) if uploaded_files and api_key: if st.button("🔄 Process Documents", type="primary"): with st.spinner("Processing PDFs..."): try: # Extract text from all PDFs all_text = "" for pdf_file in uploaded_files: pdf_reader = PdfReader(pdf_file) for page in pdf_reader.pages: all_text += page.extract_text() # Split into chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 ) chunks = text_splitter.split_text(all_text) # Create embeddings and vector store embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) st.session_state.vectorstore = Chroma.from_texts( texts=chunks, embedding=embeddings ) st.success(f"✅ Processed {len(uploaded_files)} PDF(s) into {len(chunks)} chunks!") except Exception as e: st.error(f"Error: {str(e)}") with col2: st.header("💬 Ask Questions") if st.session_state.vectorstore and api_key: # Question input question = st.text_input("Ask a question about your documents:") if question: with st.spinner("Thinking..."): try: # Setup LLM os.environ["GROQ_API_KEY"] = api_key llm = ChatGroq( model="llama-3.3-70b-versatile", temperature=0 ) # Get relevant docs docs = st.session_state.vectorstore.similarity_search(question, k=3) context = "\n\n".join([doc.page_content for doc in docs]) # Create prompt prompt = f"""Answer based only on this context: {context} Question: {question} Answer:""" # Get answer answer = llm.invoke(prompt) # Display answer st.markdown("### 💡 Answer") st.markdown(answer.content) # Show sources with st.expander("📚 View Sources"): for i, doc in enumerate(docs, 1): st.markdown(f"**Source {i}:**") st.text(doc.page_content[:300] + "...") st.markdown("---") # Add to history st.session_state.chat_history.append({ "question": question, "answer": answer.content }) except Exception as e: st.error(f"Error: {str(e)}") # Show chat history if st.session_state.chat_history: st.markdown("### 📜 Chat History") for i, chat in enumerate(reversed(st.session_state.chat_history[-5:]), 1): with st.expander(f"Q{i}: {chat['question'][:50]}..."): st.markdown(f"**Q:** {chat['question']}") st.markdown(f"**A:** {chat['answer']}") else: st.info("👈 Upload PDFs and enter API key to get started!") # Footer st.markdown("---") st.markdown("Built with Streamlit, LangChain, and Groq 🚀")