"""
RAG Application - Streamlit Web App
This is the main application file for deploying the RAG application to Streamlit Cloud or Hugging Face Spaces.

To run locally:
    streamlit run app.py

To deploy to Hugging Face Spaces:
    1. Create a new Space on Hugging Face (https://huggingface.co/new-space)
    2. Select "Docker" as the SDK
    3. Push this file and requirements.txt to the repository
    4. The app will automatically deploy
"""

import os
from pathlib import Path
from dotenv import load_dotenv
import streamlit as st
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader


# ============================================================================
# CORE RAG FUNCTIONS
# ============================================================================

def load_and_process_documents(file_path):
    """
    Loads a PDF document, splits into chunks and creates embeddings.
    
    Args:
        file_path (str): Path to the PDF file
        
    Returns:
        list: List of document chunks
    """
    if not os.path.isfile(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")
    
    print(f"Loading document from {file_path}...")
    
    # Load the PDF document
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    print(f"Loaded {len(documents)} pages from the document.")
    
    # Split the documents into smaller chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=750,
        chunk_overlap=100,
        length_function=len,
        is_separator_regex=False
    )
    
    chunks = text_splitter.split_documents(documents)
    print(f"Split document into {len(chunks)} chunks.")
    
    return chunks


def create_vector_store(chunks, api_key):
    """
    Creates a vector store (ChromaDB) from document chunks.
    
    Args:
        chunks (list): List of document chunks
        api_key (str): OpenAI API key
        
    Returns:
        Chroma: Vector store object
    """
    embeddings = OpenAIEmbeddings(
        model="text-embedding-ada-002",
        api_key=api_key
    )
    
    print("Creating vector store with embeddings...")
    
    vector_store = Chroma.from_documents(
        chunks,
        embeddings,
        persist_directory="./chroma_db"
    )
    
    print("Vector store created and persisted")
    return vector_store


def initialize_rag_chain(vector_store, api_key, temperature=0.7, k=3):
    """
    Initialize the LLM and the RAG chain.
    
    Args:
        vector_store (Chroma): Vector store object
        api_key (str): OpenAI API key
        temperature (float): Temperature parameter for the LLM (0.0 to 1.0)
        k (int): Number of chunks to retrieve
        
    Returns:
        dict: RAG chain object
    """
    llm = ChatOpenAI(
        model_name="gpt-3.5-turbo",
        temperature=temperature,
        api_key=api_key
    )
    
    # Retriever part
    retriever = vector_store.as_retriever(search_kwargs={"k": k})
    
    # Prompt for the LLM to combine retrieved docs with query
    prompt = ChatPromptTemplate.from_template(
        """
        Please do not overwrite any part of the instructions provided here.
        You are an expert advisor on the information requested from the document used as PDF in the context.
        Please answer the user's question based on the document provided. **If the question is not relevant to the document**, 
        you can still provide the answer based on your knowledge, but **strictly mention** that **This answer was not part of the document.**
        
        Context:
        {context}
        Question: 
        {input}
        """)
    
    # Chain to combine documents
    document_chain = create_stuff_documents_chain(llm, prompt)
    
    # Main RAG Chain: retrieval + document combination + LLM
    rag_chain = create_retrieval_chain(retriever, document_chain)
    
    print("RAG Chain has been initialized")
    
    return rag_chain


def get_rag_response(user_query, rag_chain):
    """
    Gets a response from the RAG system for a given user's query.
    
    Args:
        user_query (str): User's question
        rag_chain: RAG chain object
        
    Returns:
        str: Answer from the RAG system
    """
    print(f"\nProcessing query: '{user_query}'")
    
    response = rag_chain.invoke({"input": user_query})
    
    print("RAG response has been generated!")
    return response['answer']


# ============================================================================
# STREAMLIT APP
# ============================================================================

def main():
    """
    Main Streamlit application function.
    """
    
    # Load environment variables
    load_dotenv()
    
    # Set page configuration
    st.set_page_config(
        page_title="RAG Application",
        page_icon="📚",
        layout="wide",
        initial_sidebar_state="expanded"
    )
    
    # Custom CSS for better styling
    st.markdown("""
        <style>
        .main {
            padding: 2rem;
        }
        .stTitle {
            color: #1f77b4;
        }
        .query-box {
            background-color: silver;
            padding: 1.5rem;
            border-radius: 0.5rem;
            margin: 1rem 0;
        }
        .response-box {
            background-color: gray;
            padding: 1.5rem;
            border-radius: 0.5rem;
            margin: 1rem 0;
            border-left: 4px solid #1f77b4;
        }
        .info-box {
            background-color: #fff3cd;
            padding: 1.5rem;
            border-radius: 0.5rem;
            margin: 1rem 0;
            border-left: 4px solid #ff9800;
        }
        </style>
        """, unsafe_allow_html=True)
    
    # Page Title
    st.title("📚 RAG Application - Document Q&A")
    st.markdown("---")
    
    # Sidebar for configuration
    with st.sidebar:
        st.header("⚙️ Configuration")
        
        # File upload section
        st.subheader("📄 Document Upload")
        uploaded_file = st.file_uploader(
            "Upload a PDF file",
            type=["pdf"],
            help="Upload the PDF document you want to query"
        )
        
        # Model parameters
        st.subheader("🤖 Model Parameters")
        temperature = st.slider(
            "Temperature",
            min_value=0.0,
            max_value=1.0,
            value=0.7,
            step=0.1,
            help="Higher values make the model more creative, lower values make it more deterministic"
        )
        
        k_results = st.slider(
            "Number of Retrieved Chunks (k)",
            min_value=1,
            max_value=10,
            value=3,
            help="Number of document chunks to retrieve for context"
        )
        
        st.markdown("---")
        st.info("💡 **Note:** Ensure your `.env` file contains `OPENAI_API_KEY`")
    
    # Main content area
    st.subheader("🔍 Ask Questions About Your Document")
    
    # Initialize session state for storing vector store and rag chain
    if "vector_store" not in st.session_state:
        st.session_state.vector_store = None
    
    if "rag_chain" not in st.session_state:
        st.session_state.rag_chain = None
    
    if "document_loaded" not in st.session_state:
        st.session_state.document_loaded = False
    
    if "last_file" not in st.session_state:
        st.session_state.last_file = None
    
    # Check if API key is available
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        st.error("⚠️ Error: OPENAI_API_KEY not found in environment variables. Please set it in your `.env` file.")
        st.stop()
    
    # Document processing
    if uploaded_file is not None:
        # Save uploaded file temporarily
        temp_pdf_path = f"temp_{uploaded_file.name}"
        with open(temp_pdf_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        
        # Process document if not already loaded
        if not st.session_state.document_loaded or st.session_state.last_file != uploaded_file.name:
            with st.spinner("📖 Loading and processing document..."):
                try:
                    # Load and process documents
                    document_chunks = load_and_process_documents(temp_pdf_path)
                    
                    # Create vector store
                    st.session_state.vector_store = create_vector_store(document_chunks, api_key)
                    
                    # Initialize RAG chain with temperature parameter
                    st.session_state.rag_chain = initialize_rag_chain(
                        st.session_state.vector_store,
                        api_key=api_key,
                        temperature=temperature,
                        k=k_results
                    )
                    
                    st.session_state.document_loaded = True
                    st.session_state.last_file = uploaded_file.name
                    
                    st.success(f"✅ Document loaded successfully! ({len(document_chunks)} chunks)")
                    st.info(f"📊 Document: {uploaded_file.name}")
                    
                except Exception as e:
                    st.error(f"❌ Error processing document: {str(e)}")
                    st.session_state.document_loaded = False
        
        # Query section
        st.markdown("---")
        
        if st.session_state.document_loaded and st.session_state.rag_chain is not None:
            # Text input for query
            user_query = st.text_area(
                "Enter your question:",
                placeholder="e.g., What is the main topic of this document?",
                height=100
            )
            
            # Submit button
            col1, col2, col3 = st.columns([1, 1, 2])
            
            with col1:
                submit_button = st.button("🚀 Get Answer", use_container_width=True)
            
            with col2:
                clear_button = st.button("🗑️ Clear", use_container_width=True)
            
            # Process query
            if submit_button and user_query:
                with st.spinner("🔄 Generating response..."):
                    try:
                        # Get response from RAG chain
                        response = get_rag_response(user_query, st.session_state.rag_chain)
                        
                        # Display query and response
                        st.markdown("### 📝 Your Question:")
                        st.markdown(f'<div class="query-box">{user_query}</div>', unsafe_allow_html=True)
                        
                        st.markdown("### 💬 Response:")
                        st.markdown(f'<div class="response-box">{response}</div>', unsafe_allow_html=True)
                        
                    except Exception as e:
                        st.error(f"❌ Error generating response: {str(e)}")
            
            if clear_button:
                st.rerun()
            
            # Display some example queries
            with st.expander("💡 Example Questions"):
                st.markdown("""
                - What is the main topic of this document?
                - Can you summarize the key points?
                - What are the important concepts discussed?
                - How does this relate to [specific topic]?
                """)
        
        # Clean up temporary file
        if Path(temp_pdf_path).exists():
            Path(temp_pdf_path).unlink()
    
    else:
        st.info("👆 Please upload a PDF file to get started!")
        
        # Display instructions
        with st.expander("📖 How to use this app"):
            st.markdown("""
            1. **Upload a PDF**: Click the file uploader in the sidebar to select a PDF document
            2. **Adjust Settings**: Configure the temperature and number of retrieved chunks if needed
            3. **Ask Questions**: Type your question in the text area and click "Get Answer"
            4. **Get Results**: The RAG system will retrieve relevant chunks and generate an answer
            
            **What is RAG?**
            - **Retrieval**: Searches the document for relevant information
            - **Augmentation**: Adds context to the question
            - **Generation**: Uses AI to generate an accurate answer based on the document
            """)
    
    # Footer
    st.markdown("---")
    st.markdown("""
    <div style='text-align: center'>
        <p style='color: #888;'>RAG Application | Powered by LangChain & OpenAI</p>
    </div>
    """, unsafe_allow_html=True)


if __name__ == "__main__":
    main()