RAG-Application / app.py
Itanutiwari527's picture
Upload 2 files
876b710 verified
import streamlit as st
import PyPDF2
import io
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import pickle
import os
import re
from typing import List, Tuple
import warnings
warnings.filterwarnings("ignore")
# Page config
st.set_page_config(
page_title="RAG PDF Chat Application",
page_icon="πŸ“š",
layout="wide"
)
class RAGSystem:
def __init__(self):
self.embedding_model = None
self.llm_pipeline = None
self.index = None
self.chunks = []
self.embeddings = None
@st.cache_resource
def load_embedding_model(_self):
"""Load sentence transformer model"""
try:
model = SentenceTransformer('all-MiniLM-L6-v2')
return model
except Exception as e:
st.error(f"Error loading embedding model: {str(e)}")
return None
@st.cache_resource
def load_llm_model(_self):
"""Load Hugging Face LLM"""
try:
# Better models for Q&A tasks - choose one based on your system
# Option 1: Google's Flan-T5 (Best for Q&A, lightweight)
model_name = "google/flan-t5-base" # 250M parameters
# Option 2: For more powerful responses (if you have good hardware)
# model_name = "google/flan-t5-large" # 780M parameters
# Option 3: Microsoft's DialoGPT (conversational)
# model_name = "microsoft/DialoGPT-small" # 117M parameters
# Option 4: Facebook's BART (good for summarization + Q&A)
# model_name = "facebook/bart-base"
# Load tokenizer and pipeline
if "flan-t5" in model_name:
# Text-to-text generation for Flan-T5
pipeline_obj = pipeline(
"text2text-generation",
model=model_name,
max_length=512,
temperature=0.7,
do_sample=True,
device=0 if torch.cuda.is_available() else -1
)
else:
# Text generation for other models
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
pipeline_obj = pipeline(
"text-generation",
model=model_name,
tokenizer=tokenizer,
max_length=512,
temperature=0.7,
do_sample=True,
device=0 if torch.cuda.is_available() else -1
)
return pipeline_obj
except Exception as e:
st.error(f"Error loading LLM: {str(e)}")
return None
def extract_text_from_pdf(self, pdf_file) -> str:
"""Extract text from uploaded PDF"""
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() + "\n"
return text
except Exception as e:
st.error(f"Error extracting text from PDF: {str(e)}")
return ""
def chunk_text(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
"""Split text into overlapping chunks"""
# Clean the text
text = re.sub(r'\s+', ' ', text.strip())
# Split into sentences
sentences = re.split(r'[.!?]+', text)
chunks = []
current_chunk = ""
for sentence in sentences:
sentence = sentence.strip()
if not sentence:
continue
# If adding this sentence would exceed chunk size, save current chunk
if len(current_chunk) + len(sentence) > chunk_size and current_chunk:
chunks.append(current_chunk.strip())
# Start new chunk with overlap
words = current_chunk.split()
overlap_text = ' '.join(words[-overlap:]) if len(words) > overlap else current_chunk
current_chunk = overlap_text + " " + sentence
else:
current_chunk += " " + sentence if current_chunk else sentence
# Add the last chunk
if current_chunk.strip():
chunks.append(current_chunk.strip())
return chunks
def create_embeddings(self, chunks: List[str]) -> np.ndarray:
"""Generate embeddings for text chunks"""
if self.embedding_model is None:
self.embedding_model = self.load_embedding_model()
if self.embedding_model is None:
return None
try:
embeddings = self.embedding_model.encode(chunks, show_progress_bar=True)
return embeddings
except Exception as e:
st.error(f"Error creating embeddings: {str(e)}")
return None
def create_vector_store(self, embeddings: np.ndarray):
"""Create FAISS vector store"""
try:
dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension) # Inner product similarity
# Normalize embeddings for cosine similarity
faiss.normalize_L2(embeddings)
index.add(embeddings.astype('float32'))
return index
except Exception as e:
st.error(f"Error creating vector store: {str(e)}")
return None
def search_similar_chunks(self, query: str, k: int = 3) -> List[Tuple[str, float]]:
"""Search for similar chunks using vector similarity"""
if self.embedding_model is None or self.index is None:
return []
try:
# Generate query embedding
query_embedding = self.embedding_model.encode([query])
faiss.normalize_L2(query_embedding)
# Search in vector store
scores, indices = self.index.search(query_embedding.astype('float32'), k)
results = []
for idx, score in zip(indices[0], scores[0]):
if idx < len(self.chunks):
results.append((self.chunks[idx], float(score)))
return results
except Exception as e:
st.error(f"Error searching chunks: {str(e)}")
return []
def generate_answer(self, query: str, context_chunks: List[str]) -> str:
"""Generate answer using LLM with context"""
if self.llm_pipeline is None:
self.llm_pipeline = self.load_llm_model()
if self.llm_pipeline is None:
return "Sorry, LLM model is not available."
try:
# Combine context
context = "\n".join(context_chunks[:2]) # Use top 2 chunks to avoid token limit
# Different prompts for different model types
model_name = getattr(self.llm_pipeline.model, 'name_or_path', 'unknown')
if "flan-t5" in model_name.lower():
# For Flan-T5 (text2text-generation)
prompt = f"Answer the question based on the context.\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:"
response = self.llm_pipeline(
prompt,
max_length=200,
num_return_sequences=1,
temperature=0.7,
do_sample=True
)
answer = response[0]['generated_text'].strip()
else:
# For GPT-style models (text-generation)
prompt = f"""Based on the following context, answer the question:
Context: {context}
Question: {query}
Answer:"""
response = self.llm_pipeline(
prompt,
max_length=len(prompt.split()) + 100,
num_return_sequences=1,
temperature=0.7,
do_sample=True,
pad_token_id=self.llm_pipeline.tokenizer.eos_token_id
)
# Extract the generated answer
generated_text = response[0]['generated_text']
answer = generated_text[len(prompt):].strip()
return answer if answer else "I couldn't find a specific answer in the provided context."
except Exception as e:
st.error(f"Error generating answer: {str(e)}")
return "Sorry, I encountered an error while generating the answer."
# Initialize RAG system
@st.cache_resource
def get_rag_system():
return RAGSystem()
# Main app
def main():
st.title("RAG PDF Chat Application")
st.markdown("Upload a PDF and chat with its contents using AI!")
# Initialize RAG system
rag = get_rag_system()
# Sidebar for PDF upload and processing
with st.sidebar:
st.header("Document Processing")
uploaded_file = st.file_uploader(
"Upload a PDF file",
type=['pdf'],
help="Upload a PDF document to create embeddings and chat with it"
)
if uploaded_file is not None:
st.success(f"Uploaded: {uploaded_file.name}")
if st.button("Process PDF", type="primary"):
with st.spinner("Processing PDF... This may take a few minutes"):
# Extract text
st.info("Extracting text from PDF...")
text = rag.extract_text_from_pdf(uploaded_file)
if text:
st.success(f"Extracted {len(text)} characters")
# Chunk text
st.info("Splitting text into chunks...")
rag.chunks = rag.chunk_text(text)
st.success(f"Created {len(rag.chunks)} chunks")
# Create embeddings
st.info("Generating embeddings...")
rag.embeddings = rag.create_embeddings(rag.chunks)
if rag.embeddings is not None:
st.success(f"Generated embeddings: {rag.embeddings.shape}")
# Create vector store
st.info("Creating vector store...")
rag.index = rag.create_vector_store(rag.embeddings)
if rag.index is not None:
st.success("PDF processed successfully!")
st.session_state['pdf_processed'] = True
else:
st.error("Failed to create vector store")
else:
st.error("Failed to generate embeddings")
else:
st.error("Failed to extract text from PDF")
# Display processing status
if 'pdf_processed' in st.session_state:
st.success("PDF Ready for Chat!")
# Model info
st.header("Model Information")
st.info("""
**Embedding Model**: all-MiniLM-L6-v2 (384 dim)
**LLM Model**: google/flan-t5-base (250M params)
**Vector Store**: FAISS with cosine similarity
**Alternative Models Available:**
- google/flan-t5-large (better quality)
- microsoft/DialoGPT-small (conversational)
- facebook/bart-base (summarization focus)
""")
# Main chat interface
if 'pdf_processed' in st.session_state and st.session_state['pdf_processed']:
st.header("Chat with your PDF")
# Initialize chat history
if 'messages' not in st.session_state:
st.session_state.messages = []
# Display chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if "sources" in message:
with st.expander("View Sources"):
for i, source in enumerate(message["sources"], 1):
st.markdown(f"**Source {i}:**")
st.text(source)
# Chat input
if prompt := st.chat_input("Ask a question about your PDF..."):
# Add user message
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Generate response
with st.chat_message("assistant"):
with st.spinner("Searching and generating answer..."):
# Search for relevant chunks
similar_chunks = rag.search_similar_chunks(prompt, k=3)
if similar_chunks:
# Extract context
context_chunks = [chunk for chunk, score in similar_chunks]
# Generate answer
answer = rag.generate_answer(prompt, context_chunks)
st.markdown(answer)
# Show sources
with st.expander("View Sources"):
for i, (chunk, score) in enumerate(similar_chunks, 1):
st.markdown(f"**Source {i} (Similarity: {score:.3f}):**")
st.text(chunk[:500] + "..." if len(chunk) > 500 else chunk)
# Add assistant message with sources
st.session_state.messages.append({
"role": "assistant",
"content": answer,
"sources": context_chunks
})
else:
error_msg = "Sorry, I couldn't find relevant information to answer your question."
st.markdown(error_msg)
st.session_state.messages.append({"role": "assistant", "content": error_msg})
else:
# Instructions when no PDF is processed
st.header(" ****Getting Started****")
st.markdown("""
### Welcome to the RAG PDF Chat Application!
**Steps to use:**
1. πŸ“„ Upload a PDF file using the sidebar
2. πŸ”„ Click "Process PDF" to create embeddings
3. πŸ’¬ Start chatting with your document!
**Features:**
- 🧠 AI-powered document understanding
- πŸ” Semantic search through your PDF
- πŸ“š Source citations for transparency
- ⚑ Fast vector-based retrieval
**Note:** First time loading may take a few minutes to download models.
""")
if __name__ == "__main__":
main()