import streamlit as st
import PyPDF2
import io
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import pickle
import os
import re
from typing import List, Tuple
import warnings
warnings.filterwarnings("ignore")

# Page config
st.set_page_config(
    page_title="RAG PDF Chat Application",
    page_icon="📚",
    layout="wide"
)

class RAGSystem:
    def __init__(self):
        self.embedding_model = None
        self.llm_pipeline = None
        self.index = None
        self.chunks = []
        self.embeddings = None
        
    @st.cache_resource
    def load_embedding_model(_self):
        """Load sentence transformer model"""
        try:
            model = SentenceTransformer('all-MiniLM-L6-v2')
            return model
        except Exception as e:
            st.error(f"Error loading embedding model: {str(e)}")
            return None
    
    @st.cache_resource
    def load_llm_model(_self):
        """Load Hugging Face LLM"""
        try:
            # Better models for Q&A tasks - choose one based on your system
            
            # Option 1: Google's Flan-T5 (Best for Q&A, lightweight)
            model_name = "google/flan-t5-base"  # 250M parameters
            
            # Option 2: For more powerful responses (if you have good hardware)
            # model_name = "google/flan-t5-large"  # 780M parameters
            
            # Option 3: Microsoft's DialoGPT (conversational)
            # model_name = "microsoft/DialoGPT-small"  # 117M parameters
            
            # Option 4: Facebook's BART (good for summarization + Q&A)
            # model_name = "facebook/bart-base"
            
            # Load tokenizer and pipeline
            if "flan-t5" in model_name:
                # Text-to-text generation for Flan-T5
                pipeline_obj = pipeline(
                    "text2text-generation",
                    model=model_name,
                    max_length=512,
                    temperature=0.7,
                    do_sample=True,
                    device=0 if torch.cuda.is_available() else -1
                )
            else:
                # Text generation for other models
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                if tokenizer.pad_token is None:
                    tokenizer.pad_token = tokenizer.eos_token
                    
                pipeline_obj = pipeline(
                    "text-generation",
                    model=model_name,
                    tokenizer=tokenizer,
                    max_length=512,
                    temperature=0.7,
                    do_sample=True,
                    device=0 if torch.cuda.is_available() else -1
                )
            return pipeline_obj
        except Exception as e:
            st.error(f"Error loading LLM: {str(e)}")
            return None
    
    def extract_text_from_pdf(self, pdf_file) -> str:
        """Extract text from uploaded PDF"""
        try:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
            return text
        except Exception as e:
            st.error(f"Error extracting text from PDF: {str(e)}")
            return ""
    
    def chunk_text(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
        """Split text into overlapping chunks"""
        # Clean the text
        text = re.sub(r'\s+', ' ', text.strip())
        
        # Split into sentences
        sentences = re.split(r'[.!?]+', text)
        
        chunks = []
        current_chunk = ""
        
        for sentence in sentences:
            sentence = sentence.strip()
            if not sentence:
                continue
                
            # If adding this sentence would exceed chunk size, save current chunk
            if len(current_chunk) + len(sentence) > chunk_size and current_chunk:
                chunks.append(current_chunk.strip())
                # Start new chunk with overlap
                words = current_chunk.split()
                overlap_text = ' '.join(words[-overlap:]) if len(words) > overlap else current_chunk
                current_chunk = overlap_text + " " + sentence
            else:
                current_chunk += " " + sentence if current_chunk else sentence
        
        # Add the last chunk
        if current_chunk.strip():
            chunks.append(current_chunk.strip())
        
        return chunks
    
    def create_embeddings(self, chunks: List[str]) -> np.ndarray:
        """Generate embeddings for text chunks"""
        if self.embedding_model is None:
            self.embedding_model = self.load_embedding_model()
        
        if self.embedding_model is None:
            return None
            
        try:
            embeddings = self.embedding_model.encode(chunks, show_progress_bar=True)
            return embeddings
        except Exception as e:
            st.error(f"Error creating embeddings: {str(e)}")
            return None
    
    def create_vector_store(self, embeddings: np.ndarray):
        """Create FAISS vector store"""
        try:
            dimension = embeddings.shape[1]
            index = faiss.IndexFlatIP(dimension)  # Inner product similarity
            
            # Normalize embeddings for cosine similarity
            faiss.normalize_L2(embeddings)
            index.add(embeddings.astype('float32'))
            
            return index
        except Exception as e:
            st.error(f"Error creating vector store: {str(e)}")
            return None
    
    def search_similar_chunks(self, query: str, k: int = 3) -> List[Tuple[str, float]]:
        """Search for similar chunks using vector similarity"""
        if self.embedding_model is None or self.index is None:
            return []
        
        try:
            # Generate query embedding
            query_embedding = self.embedding_model.encode([query])
            faiss.normalize_L2(query_embedding)
            
            # Search in vector store
            scores, indices = self.index.search(query_embedding.astype('float32'), k)
            
            results = []
            for idx, score in zip(indices[0], scores[0]):
                if idx < len(self.chunks):
                    results.append((self.chunks[idx], float(score)))
            
            return results
        except Exception as e:
            st.error(f"Error searching chunks: {str(e)}")
            return []
    
    def generate_answer(self, query: str, context_chunks: List[str]) -> str:
        """Generate answer using LLM with context"""
        if self.llm_pipeline is None:
            self.llm_pipeline = self.load_llm_model()
        
        if self.llm_pipeline is None:
            return "Sorry, LLM model is not available."
        
        try:
            # Combine context
            context = "\n".join(context_chunks[:2])  # Use top 2 chunks to avoid token limit
            
            # Different prompts for different model types
            model_name = getattr(self.llm_pipeline.model, 'name_or_path', 'unknown')
            
            if "flan-t5" in model_name.lower():
                # For Flan-T5 (text2text-generation)
                prompt = f"Answer the question based on the context.\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:"
                
                response = self.llm_pipeline(
                    prompt,
                    max_length=200,
                    num_return_sequences=1,
                    temperature=0.7,
                    do_sample=True
                )
                answer = response[0]['generated_text'].strip()
                
            else:
                # For GPT-style models (text-generation)
                prompt = f"""Based on the following context, answer the question:

Context: {context}

Question: {query}

Answer:"""
                
                response = self.llm_pipeline(
                    prompt,
                    max_length=len(prompt.split()) + 100,
                    num_return_sequences=1,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=self.llm_pipeline.tokenizer.eos_token_id
                )
                
                # Extract the generated answer
                generated_text = response[0]['generated_text']
                answer = generated_text[len(prompt):].strip()
            
            return answer if answer else "I couldn't find a specific answer in the provided context."
            
        except Exception as e:
            st.error(f"Error generating answer: {str(e)}")
            return "Sorry, I encountered an error while generating the answer."

# Initialize RAG system
@st.cache_resource
def get_rag_system():
    return RAGSystem()

# Main app
def main():
    st.title("RAG PDF Chat Application")
    st.markdown("Upload a PDF and chat with its contents using AI!")
    
    # Initialize RAG system
    rag = get_rag_system()
    
    # Sidebar for PDF upload and processing
    with st.sidebar:
        st.header("Document Processing")
        
        uploaded_file = st.file_uploader(
            "Upload a PDF file", 
            type=['pdf'],
            help="Upload a PDF document to create embeddings and chat with it"
        )
        
        if uploaded_file is not None:
            st.success(f"Uploaded: {uploaded_file.name}")
            
            if st.button("Process PDF", type="primary"):
                with st.spinner("Processing PDF... This may take a few minutes"):
                    
                    # Extract text
                    st.info("Extracting text from PDF...")
                    text = rag.extract_text_from_pdf(uploaded_file)
                    
                    if text:
                        st.success(f"Extracted {len(text)} characters")
                        
                        # Chunk text
                        st.info("Splitting text into chunks...")
                        rag.chunks = rag.chunk_text(text)
                        st.success(f"Created {len(rag.chunks)} chunks")
                        
                        # Create embeddings
                        st.info("Generating embeddings...")
                        rag.embeddings = rag.create_embeddings(rag.chunks)
                        
                        if rag.embeddings is not None:
                            st.success(f"Generated embeddings: {rag.embeddings.shape}")
                            
                            # Create vector store
                            st.info("Creating vector store...")
                            rag.index = rag.create_vector_store(rag.embeddings)
                            
                            if rag.index is not None:
                                st.success("PDF processed successfully!")
                                st.session_state['pdf_processed'] = True
                            else:
                                st.error("Failed to create vector store")
                        else:
                            st.error("Failed to generate embeddings")
                    else:
                        st.error("Failed to extract text from PDF")
        
        # Display processing status
        if 'pdf_processed' in st.session_state:
            st.success("PDF Ready for Chat!")
        
        # Model info
        st.header("Model Information")
        st.info("""
        **Embedding Model**: all-MiniLM-L6-v2 (384 dim)
        **LLM Model**: google/flan-t5-base (250M params)
        **Vector Store**: FAISS with cosine similarity
        
        **Alternative Models Available:**
        - google/flan-t5-large (better quality)
        - microsoft/DialoGPT-small (conversational)
        - facebook/bart-base (summarization focus)
        """)
    
    # Main chat interface
    if 'pdf_processed' in st.session_state and st.session_state['pdf_processed']:
        st.header("Chat with your PDF")
        
        # Initialize chat history
        if 'messages' not in st.session_state:
            st.session_state.messages = []
        
        # Display chat history
        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])
                if "sources" in message:
                    with st.expander("View Sources"):
                        for i, source in enumerate(message["sources"], 1):
                            st.markdown(f"**Source {i}:**")
                            st.text(source)
        
        # Chat input
        if prompt := st.chat_input("Ask a question about your PDF..."):
            # Add user message
            st.session_state.messages.append({"role": "user", "content": prompt})
            
            with st.chat_message("user"):
                st.markdown(prompt)
            
            # Generate response
            with st.chat_message("assistant"):
                with st.spinner("Searching and generating answer..."):
                    
                    # Search for relevant chunks
                    similar_chunks = rag.search_similar_chunks(prompt, k=3)
                    
                    if similar_chunks:
                        # Extract context
                        context_chunks = [chunk for chunk, score in similar_chunks]
                        
                        # Generate answer
                        answer = rag.generate_answer(prompt, context_chunks)
                        
                        st.markdown(answer)
                        
                        # Show sources
                        with st.expander("View Sources"):
                            for i, (chunk, score) in enumerate(similar_chunks, 1):
                                st.markdown(f"**Source {i} (Similarity: {score:.3f}):**")
                                st.text(chunk[:500] + "..." if len(chunk) > 500 else chunk)
                        
                        # Add assistant message with sources
                        st.session_state.messages.append({
                            "role": "assistant", 
                            "content": answer,
                            "sources": context_chunks
                        })
                    else:
                        error_msg = "Sorry, I couldn't find relevant information to answer your question."
                        st.markdown(error_msg)
                        st.session_state.messages.append({"role": "assistant", "content": error_msg})
    
    else:
        # Instructions when no PDF is processed
        st.header(" ****Getting Started****")
        st.markdown("""
        ### Welcome to the RAG PDF Chat Application!
        
        **Steps to use:**
        1. 📄 Upload a PDF file using the sidebar
        2. 🔄 Click "Process PDF" to create embeddings
        3. 💬 Start chatting with your document!
        
        **Features:**
        - 🧠 AI-powered document understanding
        - 🔍 Semantic search through your PDF
        - 📚 Source citations for transparency
        - ⚡ Fast vector-based retrieval
        
        **Note:** First time loading may take a few minutes to download models.
        """)

if __name__ == "__main__":
    main()