Spaces:

Itanutiwari527
/

RAG-Application

Runtime error

App Files Files Community

RAG-Application / app.py

Itanutiwari527

Upload 2 files

876b710 verified 7 months ago

raw

history blame contribute delete

15.6 kB

	import streamlit as st
	import PyPDF2
	import io
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import torch
	import pickle
	import os
	import re
	from typing import List, Tuple
	import warnings
	warnings.filterwarnings("ignore")

	# Page config
	st.set_page_config(
	page_title="RAG PDF Chat Application",
	page_icon="📚",
	layout="wide"
	)

	class RAGSystem:
	def __init__(self):
	self.embedding_model = None
	self.llm_pipeline = None
	self.index = None
	self.chunks = []
	self.embeddings = None

	@st.cache_resource
	def load_embedding_model(_self):
	"""Load sentence transformer model"""
	try:
	model = SentenceTransformer('all-MiniLM-L6-v2')
	return model
	except Exception as e:
	st.error(f"Error loading embedding model: {str(e)}")
	return None

	@st.cache_resource
	def load_llm_model(_self):
	"""Load Hugging Face LLM"""
	try:
	# Better models for Q&A tasks - choose one based on your system

	# Option 1: Google's Flan-T5 (Best for Q&A, lightweight)
	model_name = "google/flan-t5-base" # 250M parameters

	# Option 2: For more powerful responses (if you have good hardware)
	# model_name = "google/flan-t5-large" # 780M parameters

	# Option 3: Microsoft's DialoGPT (conversational)
	# model_name = "microsoft/DialoGPT-small" # 117M parameters

	# Option 4: Facebook's BART (good for summarization + Q&A)
	# model_name = "facebook/bart-base"

	# Load tokenizer and pipeline
	if "flan-t5" in model_name:
	# Text-to-text generation for Flan-T5
	pipeline_obj = pipeline(
	"text2text-generation",
	model=model_name,
	max_length=512,
	temperature=0.7,
	do_sample=True,
	device=0 if torch.cuda.is_available() else -1
	)
	else:
	# Text generation for other models
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	pipeline_obj = pipeline(
	"text-generation",
	model=model_name,
	tokenizer=tokenizer,
	max_length=512,
	temperature=0.7,
	do_sample=True,
	device=0 if torch.cuda.is_available() else -1
	)
	return pipeline_obj
	except Exception as e:
	st.error(f"Error loading LLM: {str(e)}")
	return None

	def extract_text_from_pdf(self, pdf_file) -> str:
	"""Extract text from uploaded PDF"""
	try:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text() + "\n"
	return text
	except Exception as e:
	st.error(f"Error extracting text from PDF: {str(e)}")
	return ""

	def chunk_text(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
	"""Split text into overlapping chunks"""
	# Clean the text
	text = re.sub(r'\s+', ' ', text.strip())

	# Split into sentences
	sentences = re.split(r'[.!?]+', text)

	chunks = []
	current_chunk = ""

	for sentence in sentences:
	sentence = sentence.strip()
	if not sentence:
	continue

	# If adding this sentence would exceed chunk size, save current chunk
	if len(current_chunk) + len(sentence) > chunk_size and current_chunk:
	chunks.append(current_chunk.strip())
	# Start new chunk with overlap
	words = current_chunk.split()
	overlap_text = ' '.join(words[-overlap:]) if len(words) > overlap else current_chunk
	current_chunk = overlap_text + " " + sentence
	else:
	current_chunk += " " + sentence if current_chunk else sentence

	# Add the last chunk
	if current_chunk.strip():
	chunks.append(current_chunk.strip())

	return chunks

	def create_embeddings(self, chunks: List[str]) -> np.ndarray:
	"""Generate embeddings for text chunks"""
	if self.embedding_model is None:
	self.embedding_model = self.load_embedding_model()

	if self.embedding_model is None:
	return None

	try:
	embeddings = self.embedding_model.encode(chunks, show_progress_bar=True)
	return embeddings
	except Exception as e:
	st.error(f"Error creating embeddings: {str(e)}")
	return None

	def create_vector_store(self, embeddings: np.ndarray):
	"""Create FAISS vector store"""
	try:
	dimension = embeddings.shape[1]
	index = faiss.IndexFlatIP(dimension) # Inner product similarity

	# Normalize embeddings for cosine similarity
	faiss.normalize_L2(embeddings)
	index.add(embeddings.astype('float32'))

	return index
	except Exception as e:
	st.error(f"Error creating vector store: {str(e)}")
	return None

	def search_similar_chunks(self, query: str, k: int = 3) -> List[Tuple[str, float]]:
	"""Search for similar chunks using vector similarity"""
	if self.embedding_model is None or self.index is None:
	return []

	try:
	# Generate query embedding
	query_embedding = self.embedding_model.encode([query])
	faiss.normalize_L2(query_embedding)

	# Search in vector store
	scores, indices = self.index.search(query_embedding.astype('float32'), k)

	results = []
	for idx, score in zip(indices[0], scores[0]):
	if idx < len(self.chunks):
	results.append((self.chunks[idx], float(score)))

	return results
	except Exception as e:
	st.error(f"Error searching chunks: {str(e)}")
	return []

	def generate_answer(self, query: str, context_chunks: List[str]) -> str:
	"""Generate answer using LLM with context"""
	if self.llm_pipeline is None:
	self.llm_pipeline = self.load_llm_model()

	if self.llm_pipeline is None:
	return "Sorry, LLM model is not available."

	try:
	# Combine context
	context = "\n".join(context_chunks[:2]) # Use top 2 chunks to avoid token limit

	# Different prompts for different model types
	model_name = getattr(self.llm_pipeline.model, 'name_or_path', 'unknown')

	if "flan-t5" in model_name.lower():
	# For Flan-T5 (text2text-generation)
	prompt = f"Answer the question based on the context.\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:"

	response = self.llm_pipeline(
	prompt,
	max_length=200,
	num_return_sequences=1,
	temperature=0.7,
	do_sample=True
	)
	answer = response[0]['generated_text'].strip()

	else:
	# For GPT-style models (text-generation)
	prompt = f"""Based on the following context, answer the question:

	Context: {context}

	Question: {query}

	Answer:"""

	response = self.llm_pipeline(
	prompt,
	max_length=len(prompt.split()) + 100,
	num_return_sequences=1,
	temperature=0.7,
	do_sample=True,
	pad_token_id=self.llm_pipeline.tokenizer.eos_token_id
	)

	# Extract the generated answer
	generated_text = response[0]['generated_text']
	answer = generated_text[len(prompt):].strip()

	return answer if answer else "I couldn't find a specific answer in the provided context."

	except Exception as e:
	st.error(f"Error generating answer: {str(e)}")
	return "Sorry, I encountered an error while generating the answer."

	# Initialize RAG system
	@st.cache_resource
	def get_rag_system():
	return RAGSystem()

	# Main app
	def main():
	st.title("RAG PDF Chat Application")
	st.markdown("Upload a PDF and chat with its contents using AI!")

	# Initialize RAG system
	rag = get_rag_system()

	# Sidebar for PDF upload and processing
	with st.sidebar:
	st.header("Document Processing")

	uploaded_file = st.file_uploader(
	"Upload a PDF file",
	type=['pdf'],
	help="Upload a PDF document to create embeddings and chat with it"
	)

	if uploaded_file is not None:
	st.success(f"Uploaded: {uploaded_file.name}")

	if st.button("Process PDF", type="primary"):
	with st.spinner("Processing PDF... This may take a few minutes"):

	# Extract text
	st.info("Extracting text from PDF...")
	text = rag.extract_text_from_pdf(uploaded_file)

	if text:
	st.success(f"Extracted {len(text)} characters")

	# Chunk text
	st.info("Splitting text into chunks...")
	rag.chunks = rag.chunk_text(text)
	st.success(f"Created {len(rag.chunks)} chunks")

	# Create embeddings
	st.info("Generating embeddings...")
	rag.embeddings = rag.create_embeddings(rag.chunks)

	if rag.embeddings is not None:
	st.success(f"Generated embeddings: {rag.embeddings.shape}")

	# Create vector store
	st.info("Creating vector store...")
	rag.index = rag.create_vector_store(rag.embeddings)

	if rag.index is not None:
	st.success("PDF processed successfully!")
	st.session_state['pdf_processed'] = True
	else:
	st.error("Failed to create vector store")
	else:
	st.error("Failed to generate embeddings")
	else:
	st.error("Failed to extract text from PDF")

	# Display processing status
	if 'pdf_processed' in st.session_state:
	st.success("PDF Ready for Chat!")

	# Model info
	st.header("Model Information")
	st.info("""
	Embedding Model: all-MiniLM-L6-v2 (384 dim)
	LLM Model: google/flan-t5-base (250M params)
	Vector Store: FAISS with cosine similarity

	Alternative Models Available:
	- google/flan-t5-large (better quality)
	- microsoft/DialoGPT-small (conversational)
	- facebook/bart-base (summarization focus)
	""")

	# Main chat interface
	if 'pdf_processed' in st.session_state and st.session_state['pdf_processed']:
	st.header("Chat with your PDF")

	# Initialize chat history
	if 'messages' not in st.session_state:
	st.session_state.messages = []

	# Display chat history
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])
	if "sources" in message:
	with st.expander("View Sources"):
	for i, source in enumerate(message["sources"], 1):
	st.markdown(f"Source {i}:")
	st.text(source)

	# Chat input
	if prompt := st.chat_input("Ask a question about your PDF..."):
	# Add user message
	st.session_state.messages.append({"role": "user", "content": prompt})

	with st.chat_message("user"):
	st.markdown(prompt)

	# Generate response
	with st.chat_message("assistant"):
	with st.spinner("Searching and generating answer..."):

	# Search for relevant chunks
	similar_chunks = rag.search_similar_chunks(prompt, k=3)

	if similar_chunks:
	# Extract context
	context_chunks = [chunk for chunk, score in similar_chunks]

	# Generate answer
	answer = rag.generate_answer(prompt, context_chunks)

	st.markdown(answer)

	# Show sources
	with st.expander("View Sources"):
	for i, (chunk, score) in enumerate(similar_chunks, 1):
	st.markdown(f"Source {i} (Similarity: {score:.3f}):")
	st.text(chunk[:500] + "..." if len(chunk) > 500 else chunk)

	# Add assistant message with sources
	st.session_state.messages.append({
	"role": "assistant",
	"content": answer,
	"sources": context_chunks
	})
	else:
	error_msg = "Sorry, I couldn't find relevant information to answer your question."
	st.markdown(error_msg)
	st.session_state.messages.append({"role": "assistant", "content": error_msg})

	else:
	# Instructions when no PDF is processed
	st.header(" **Getting Started**")
	st.markdown("""
	### Welcome to the RAG PDF Chat Application!

	Steps to use:
	1. 📄 Upload a PDF file using the sidebar
	2. 🔄 Click "Process PDF" to create embeddings
	3. 💬 Start chatting with your document!

	Features:
	- 🧠 AI-powered document understanding
	- 🔍 Semantic search through your PDF
	- 📚 Source citations for transparency
	- ⚡ Fast vector-based retrieval

	Note: First time loading may take a few minutes to download models.
	""")

	if __name__ == "__main__":
	main()