Spaces:

mshabir
/

RAG-document_QA

Sleeping

App Files Files Community

RAG-document_QA / app.py

mshabir

Upload 3 files

e9f434c verified about 2 months ago

raw

history blame contribute delete

5.24 kB

	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import Chroma
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_groq import ChatGroq
	import os

	# Page config
	st.set_page_config(page_title="RAG Document Q&A", page_icon="📚", layout="wide")

	# Title
	st.title("📚 RAG Document Q&A System")
	st.markdown("Upload PDFs and ask questions about them!")

	# Sidebar for API key
	with st.sidebar:
	st.header("⚙️ Configuration")
	api_key = st.text_input("Enter Groq API Key:", type="password")
	st.markdown("[Get free API key from Groq](https://console.groq.com/)")

	st.markdown("---")
	st.markdown("### About")
	st.markdown("This RAG system uses:")
	st.markdown("- 🤖 Groq (Llama 3.3)")
	st.markdown("- 🔍 Vector Search")
	st.markdown("- 📄 PDF Processing")

	# Initialize session state
	if 'vectorstore' not in st.session_state:
	st.session_state.vectorstore = None
	if 'chat_history' not in st.session_state:
	st.session_state.chat_history = []

	# Main area
	col1, col2 = st.columns([1, 2])

	with col1:
	st.header("📤 Upload Documents")
	uploaded_files = st.file_uploader(
	"Upload PDF files",
	type=['pdf'],
	accept_multiple_files=True
	)

	if uploaded_files and api_key:
	if st.button("🔄 Process Documents", type="primary"):
	with st.spinner("Processing PDFs..."):
	try:
	# Extract text from all PDFs
	all_text = ""
	for pdf_file in uploaded_files:
	pdf_reader = PdfReader(pdf_file)
	for page in pdf_reader.pages:
	all_text += page.extract_text()

	# Split into chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200
	)
	chunks = text_splitter.split_text(all_text)

	# Create embeddings and vector store
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)
	st.session_state.vectorstore = Chroma.from_texts(
	texts=chunks,
	embedding=embeddings
	)

	st.success(f"✅ Processed {len(uploaded_files)} PDF(s) into {len(chunks)} chunks!")

	except Exception as e:
	st.error(f"Error: {str(e)}")

	with col2:
	st.header("💬 Ask Questions")

	if st.session_state.vectorstore and api_key:
	# Question input
	question = st.text_input("Ask a question about your documents:")

	if question:
	with st.spinner("Thinking..."):
	try:
	# Setup LLM
	os.environ["GROQ_API_KEY"] = api_key
	llm = ChatGroq(
	model="llama-3.3-70b-versatile",
	temperature=0
	)

	# Get relevant docs
	docs = st.session_state.vectorstore.similarity_search(question, k=3)
	context = "\n\n".join([doc.page_content for doc in docs])

	# Create prompt
	prompt = f"""Answer based only on this context:

	{context}

	Question: {question}

	Answer:"""

	# Get answer
	answer = llm.invoke(prompt)

	# Display answer
	st.markdown("### 💡 Answer")
	st.markdown(answer.content)

	# Show sources
	with st.expander("📚 View Sources"):
	for i, doc in enumerate(docs, 1):
	st.markdown(f"Source {i}:")
	st.text(doc.page_content[:300] + "...")
	st.markdown("---")

	# Add to history
	st.session_state.chat_history.append({
	"question": question,
	"answer": answer.content
	})

	except Exception as e:
	st.error(f"Error: {str(e)}")

	# Show chat history
	if st.session_state.chat_history:
	st.markdown("### 📜 Chat History")
	for i, chat in enumerate(reversed(st.session_state.chat_history[-5:]), 1):
	with st.expander(f"Q{i}: {chat['question'][:50]}..."):
	st.markdown(f"Q: {chat['question']}")
	st.markdown(f"A: {chat['answer']}")

	else:
	st.info("👈 Upload PDFs and enter API key to get started!")

	# Footer
	st.markdown("---")
	st.markdown("Built with Streamlit, LangChain, and Groq 🚀")