Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_groq import ChatGroq | |
| import os | |
| # Page config | |
| st.set_page_config(page_title="RAG Document Q&A", page_icon="π", layout="wide") | |
| # Title | |
| st.title("π RAG Document Q&A System") | |
| st.markdown("Upload PDFs and ask questions about them!") | |
| # Sidebar for API key | |
| with st.sidebar: | |
| st.header("βοΈ Configuration") | |
| api_key = st.text_input("Enter Groq API Key:", type="password") | |
| st.markdown("[Get free API key from Groq](https://console.groq.com/)") | |
| st.markdown("---") | |
| st.markdown("### About") | |
| st.markdown("This RAG system uses:") | |
| st.markdown("- π€ Groq (Llama 3.3)") | |
| st.markdown("- π Vector Search") | |
| st.markdown("- π PDF Processing") | |
| # Initialize session state | |
| if 'vectorstore' not in st.session_state: | |
| st.session_state.vectorstore = None | |
| if 'chat_history' not in st.session_state: | |
| st.session_state.chat_history = [] | |
| # Main area | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| st.header("π€ Upload Documents") | |
| uploaded_files = st.file_uploader( | |
| "Upload PDF files", | |
| type=['pdf'], | |
| accept_multiple_files=True | |
| ) | |
| if uploaded_files and api_key: | |
| if st.button("π Process Documents", type="primary"): | |
| with st.spinner("Processing PDFs..."): | |
| try: | |
| # Extract text from all PDFs | |
| all_text = "" | |
| for pdf_file in uploaded_files: | |
| pdf_reader = PdfReader(pdf_file) | |
| for page in pdf_reader.pages: | |
| all_text += page.extract_text() | |
| # Split into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200 | |
| ) | |
| chunks = text_splitter.split_text(all_text) | |
| # Create embeddings and vector store | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| st.session_state.vectorstore = Chroma.from_texts( | |
| texts=chunks, | |
| embedding=embeddings | |
| ) | |
| st.success(f"β Processed {len(uploaded_files)} PDF(s) into {len(chunks)} chunks!") | |
| except Exception as e: | |
| st.error(f"Error: {str(e)}") | |
| with col2: | |
| st.header("π¬ Ask Questions") | |
| if st.session_state.vectorstore and api_key: | |
| # Question input | |
| question = st.text_input("Ask a question about your documents:") | |
| if question: | |
| with st.spinner("Thinking..."): | |
| try: | |
| # Setup LLM | |
| os.environ["GROQ_API_KEY"] = api_key | |
| llm = ChatGroq( | |
| model="llama-3.3-70b-versatile", | |
| temperature=0 | |
| ) | |
| # Get relevant docs | |
| docs = st.session_state.vectorstore.similarity_search(question, k=3) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| # Create prompt | |
| prompt = f"""Answer based only on this context: | |
| {context} | |
| Question: {question} | |
| Answer:""" | |
| # Get answer | |
| answer = llm.invoke(prompt) | |
| # Display answer | |
| st.markdown("### π‘ Answer") | |
| st.markdown(answer.content) | |
| # Show sources | |
| with st.expander("π View Sources"): | |
| for i, doc in enumerate(docs, 1): | |
| st.markdown(f"**Source {i}:**") | |
| st.text(doc.page_content[:300] + "...") | |
| st.markdown("---") | |
| # Add to history | |
| st.session_state.chat_history.append({ | |
| "question": question, | |
| "answer": answer.content | |
| }) | |
| except Exception as e: | |
| st.error(f"Error: {str(e)}") | |
| # Show chat history | |
| if st.session_state.chat_history: | |
| st.markdown("### π Chat History") | |
| for i, chat in enumerate(reversed(st.session_state.chat_history[-5:]), 1): | |
| with st.expander(f"Q{i}: {chat['question'][:50]}..."): | |
| st.markdown(f"**Q:** {chat['question']}") | |
| st.markdown(f"**A:** {chat['answer']}") | |
| else: | |
| st.info("π Upload PDFs and enter API key to get started!") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown("Built with Streamlit, LangChain, and Groq π") | |