import os
import time
import streamlit as st
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import TextLoader, PyMuPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_chroma import Chroma
import shutil
import uuid

from dotenv import load_dotenv
load_dotenv()

# Set page configuration
st.set_page_config(page_title="Document Analyzer", layout="wide")

st.title("📚 Document Analyzer")

# Add instructions in an expander
with st.expander("ℹ️ Click here to view instructions"):
    st.markdown("""
    - Upload files by clicking on "Browse Files"
    - Avoid interrupting when file/files are under processing, this interrupts the execution and you would have to refresh the page to run the webapp again
    - You can add more files anytime, just avoid adding/removing files when it's processing the uploaded documents
    - The processing will trigger whenever you make any changes to the files
    """)

# Initialize session states
if 'initialized' not in st.session_state:
    st.session_state.initialized = False
if 'processing' not in st.session_state:
    st.session_state.processing = False
if 'chat_enabled' not in st.session_state:
    st.session_state.chat_enabled = False
if 'session_id' not in st.session_state:
    # Generate a unique session ID using UUID
    st.session_state.session_id = str(uuid.uuid4())[:8]

def get_chroma_directory():
    """Get unique directory name for current session's ChromaDB"""
    base_dir = "vectorstores"
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    return os.path.join(base_dir, f"chroma_db_{st.session_state.session_id}")

def cleanup_chroma_db():
    """Clean up existing ChromaDB for the current session"""
    try:
        chroma_dir = get_chroma_directory()
        if os.path.exists(chroma_dir):
            shutil.rmtree(chroma_dir)
    except Exception as e:
        print(f"Error cleaning up ChromaDB: {str(e)}")  # Log error internally

def cleanup_old_vectorstores():
    """Clean up vector stores that are older than 24 hours"""
    try:
        base_dir = "vectorstores"
        if not os.path.exists(base_dir):
            return
            
        current_time = time.time()
        one_day_in_seconds = 24 * 60 * 60
        
        # Get all directories in vectorstores
        for dir_name in os.listdir(base_dir):
            dir_path = os.path.join(base_dir, dir_name)
            if os.path.isdir(dir_path):
                # Get directory's last modification time
                last_modified = os.path.getmtime(dir_path)
                if current_time - last_modified > one_day_in_seconds:
                    shutil.rmtree(dir_path)
    except Exception as e:
        print(f"Error cleaning up old vector stores: {str(e)}")  # Log error internally

if not st.session_state.initialized:
    # Clean up old vector stores first
    cleanup_old_vectorstores()
    
    # Clear everything only on first run or page refresh
    if os.path.exists("data"):
        shutil.rmtree("data")
    os.makedirs("data")
    
    # Clear vectorstores directory for current session
    if os.path.exists("vectorstores"):
        os.makedirs("vectorstores", exist_ok=True)
    
    st.session_state.uploaded_files = {}
    st.session_state.previous_files = set()
    st.session_state.initialized = True

def save_uploaded_file(uploaded_file):
    """Save uploaded file to the data directory"""
    try:
        # Create full path
        file_path = os.path.join("data", uploaded_file.name)
        
        # Save the file
        with open(file_path, "wb") as f:
            file_bytes = uploaded_file.getvalue()  # Get file bytes
            f.write(file_bytes)
            
        # Verify file was saved
        if os.path.exists(file_path):
            return file_path
        else:
            print(f"File not saved: {file_path}")  # Log error internally
            return None
            
    except Exception as e:
        print(f"Error saving file: {str(e)}")  # Log error internally
        return None

def process_documents(uploaded_files_dict):
    """Process documents and store in ChromaDB"""
    warning_placeholder = st.empty()
    warning_placeholder.warning("⚠️ Document processing in progress. Please wait before adding or removing files.")
    success_placeholder = st.empty()
    
    try:
        with st.spinner('Processing documents...'):
            # Clean up existing ChromaDB before processing
            cleanup_chroma_db()
            
            docs = []
            # Process each file
            for filename, file_info in uploaded_files_dict.items():
                file_path = file_info["path"]
                
                if not os.path.exists(file_path):
                    print(f"File not found: {file_path}")  # Log error internally
                    continue
                    
                if filename.endswith(".pdf"):
                    document = PyMuPDFLoader(file_path)
                    file_doc = document.load()
                    docs.extend(file_doc)
                elif filename.endswith(".txt"):
                    document = TextLoader(file_path)
                    file_doc = document.load()
                    docs.extend(file_doc)
                elif filename.endswith(".docx"):
                    document = Docx2txtLoader(file_path)
                    file_doc = document.load()
                    docs.extend(file_doc)

            if not docs:
                st.warning("Unable to process the documents. Please try again.")
                return False

            # Split documents
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1500,
                chunk_overlap=400,
                length_function=len
            )
            chunks = text_splitter.split_documents(docs)

            # Initialize embeddings
            embed_func = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=512)
            
            try:
                # Create vectorstore and add documents
                vectorstore = Chroma.from_documents(
                    collection_name="collection",
                    documents=chunks,
                    embedding=embed_func,
                    persist_directory=get_chroma_directory()
                )

                st.session_state.chat_enabled = True
                success_placeholder.success('Documents processed successfully!')
                time.sleep(2)  # Show success message for 2 seconds
                success_placeholder.empty()  # Clear the success message
                return True
                
            except Exception as e:
                print(f"ChromaDB error: {str(e)}")  # Log error internally
                st.warning("Unable to process documents at the moment. Please try again.")
                st.session_state.chat_enabled = False
                return False
                    
    except Exception as e:
        print(f"Processing error: {str(e)}")  # Log error internally
        st.warning("Unable to process documents at the moment. Please try again.")
        st.session_state.chat_enabled = False
        return False
    finally:
        warning_placeholder.empty()

def doc2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def run_chatbot(retriever, llm):
    """Run the chatbot with the given components"""
    # Initialize chat prompt
    prompt = ChatPromptTemplate.from_template("""
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    
    <context>
    {context}
    </context>

    <important>
    Don't start revealing context in your responses until its asked. First look at the question and then think if the context is needed to answer this or its a normal question, once you have judged then only answer the question.                                              
    When there is no context, just respond on your own knowledge as a normal assistant.
    </important>
                                              
    Answer the following question:

    {question}""")

    # Create the QA chain
    qa_chain = (
        RunnablePassthrough.assign(context=lambda input: doc2str(retriever.invoke(input["question"])))
        | prompt
        | llm
        | StrOutputParser()
    )

    # Initialize messages in session state if not exists
    if "messages" not in st.session_state:
        st.session_state.messages = []

    # Display chat messages
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # Chat input
    if question := st.chat_input("Ask a question about your documents"):
        # Add user message to chat history
        st.session_state.messages.append({"role": "user", "content": question})
        with st.chat_message("user"):
            st.markdown(question)

        # Create a spinner outside the chat message
        with st.spinner("Thinking..."):
            try:
                # Generate response
                response = qa_chain.invoke({"question": question})
                
                # Display response in chat message after generation
                with st.chat_message("assistant"):
                    st.markdown(response)
                    # Add assistant response to chat history
                    st.session_state.messages.append({"role": "assistant", "content": response})
            except Exception as e:
                print(f"Chat error: {str(e)}")  # Log error internally
                with st.chat_message("assistant"):
                    error_msg = "I'm having trouble processing your question. Please try asking something else."
                    st.markdown(error_msg)
                    st.session_state.messages.append({"role": "assistant", "content": error_msg})

def process_and_chat():
    """Process documents and handle chat interface"""
    # File uploader section
    with st.container():
        uploaded_files = st.file_uploader(
            "Upload your documents",
            type=["pdf", "txt", "docx"],
            accept_multiple_files=True,
            key="file_uploader",
            label_visibility="collapsed" if st.session_state.processing else "visible"
        )

    # Get current uploaded filenames
    current_uploaded_filenames = {file.name for file in uploaded_files} if uploaded_files else set()

    # Check for removed files
    files_to_remove = set(st.session_state.uploaded_files.keys()) - current_uploaded_filenames
    if files_to_remove:
        # Set processing state immediately
        st.session_state.processing = True
        st.session_state.chat_enabled = False
        if "messages" in st.session_state:
            del st.session_state.messages
            
        # Clean up ChromaDB when files are removed
        cleanup_chroma_db()
        for file_name in files_to_remove:
            # Remove file from session state
            if file_name in st.session_state.uploaded_files:
                # Delete the file from data directory
                file_path = st.session_state.uploaded_files[file_name]["path"]
                if os.path.exists(file_path):
                    os.remove(file_path)
                # Remove from session state
                del st.session_state.uploaded_files[file_name]

    # Process newly uploaded files
    if uploaded_files:
        files_added = False
        for file in uploaded_files:
            # Only process files that haven't been uploaded before
            if file.name not in st.session_state.uploaded_files:
                # Set processing state immediately when new file is detected
                st.session_state.processing = True
                st.session_state.chat_enabled = False
                if "messages" in st.session_state:
                    del st.session_state.messages
                    
                file_path = save_uploaded_file(file)
                if file_path:  # Only add to session state if file was saved successfully
                    st.session_state.uploaded_files[file.name] = {
                        "path": file_path,
                        "type": file.type
                    }
                    files_added = True

    # Check for changes in files
    current_files = set(st.session_state.uploaded_files.keys())

    # If files have changed (added or removed), reset chat and process documents
    if current_files != st.session_state.previous_files or files_to_remove:
        st.session_state.previous_files = current_files
        
        if current_files:
            # Process documents and enable chat if successful
            if process_documents(st.session_state.uploaded_files):
                st.session_state.chat_enabled = True
            st.session_state.processing = False
        else:
            st.warning('Please upload a file to continue')
            st.session_state.processing = False
    
    # If files exist and chat is enabled, show chat interface
    if current_files and st.session_state.chat_enabled:
        try:
            # Initialize components for chat
            llm = ChatGroq(temperature=0, model_name="llama-3.3-70b-versatile", groq_api_key=os.getenv("GROQ_API_KEY"), max_tokens=8000)
            
            # Create vectorstore
            embed_func = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=512)
            vectorstore = Chroma(
                collection_name="collection",
                embedding_function=embed_func,
                persist_directory=get_chroma_directory()
            )
            
            # Create retrievers
            vectorstore_retriever = vectorstore.as_retriever(
                search_kwargs={"k": 3}
            )
            
            # Create keyword retriever
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1500,
                chunk_overlap=400,
                length_function=len
            )
            docs = []
            for file_info in st.session_state.uploaded_files.values():
                if file_info["path"].endswith(".pdf"):
                    docs.extend(PyMuPDFLoader(file_info["path"]).load())
                elif file_info["path"].endswith(".txt"):
                    docs.extend(TextLoader(file_info["path"]).load())
                elif file_info["path"].endswith(".docx"):
                    docs.extend(Docx2txtLoader(file_info["path"]).load())
            
            chunks = text_splitter.split_documents(docs)
            keyword_retriever = BM25Retriever.from_documents(chunks)
            keyword_retriever.k = 3
            
            # Combine retrievers
            ensemble_retriever = EnsembleRetriever(
                retrievers=[vectorstore_retriever, keyword_retriever],
                weights=[0.5, 0.5]
            )
            
            # Run chatbot with fresh components
            run_chatbot(ensemble_retriever, llm)
        except Exception as e:
            print(f"Chat interface error: {str(e)}")  # Log error internally
            st.warning("Please try uploading your documents again.")
            st.session_state.chat_enabled = False
            # Clear the previous files to force reprocessing
            st.session_state.previous_files = set()
            if "messages" in st.session_state:
                del st.session_state.messages

# Call the main function
process_and_chat()