import gradio as gr
from llama_index.core import VectorStoreIndex, Document  # Imports VectorStoreIndex for semantic search and Document class for text storage
from llama_index.llms.openai import OpenAI  # Imports OpenAI language model integration from LlamaIndex
from llama_index.core import Settings  # Imports Settings to configure global LlamaIndex parameters
import os  # Imports OS module for interacting with the operating system (file paths, environment variables)
import pdfplumber  # Imports pdfplumber library for extracting text from PDF files
from docx import Document as DocxDocument  # Imports Document class from python-docx, renamed to avoid conflict with LlamaIndex's Document
import json  # Imports JSON module for parsing and creating JSON data
from datetime import datetime  # Imports datetime class for working with dates and times
import hashlib  # Imports hashlib for creating hash functions (MD5, SHA, etc.) for data integrity/unique identifiers

# Global variables
chat_engine = None  # Stores the LlamaIndex chat engine instance; None until initialized with documents
conversation_history = []  # Empty list to store all chat messages (user questions and AI responses)
current_user_id = None  # Stores hashed identifier for current user based on their API key

# Function to generate user ID from API key
def get_user_id(api_key):  # Defines function that takes API key string as input
    if not api_key:  # Checks if api_key is None, empty string, or falsy value
        return None  # Returns None if no API key provided
    return hashlib.sha256(api_key.encode()).hexdigest()[:16]  # Encodes key to bytes, creates SHA-256 hash, converts to hex string, returns first 16 characters as unique user ID

# Function to get user-specific filename
def get_user_file(api_key):  # Defines function that generates unique filename for each user
    user_id = get_user_id(api_key)  # Calls get_user_id to generate unique identifier from API key
    if not user_id:  # Checks if user_id is None (happens when api_key is invalid/empty)
        return None  # Returns None if no valid user ID could be generated
    return f"conversations_{user_id}.json"  # Returns formatted string with user-specific filename for storing conversation history

# Function to read PDF files
def read_pdf(file_path):  # Defines function that takes a file path string as parameter
    with pdfplumber.open(file_path) as pdf:  # Opens PDF file using context manager (auto-closes after use)
        text = ''  # Initializes empty string to accumulate extracted text
        for page in pdf.pages:  # Loops through each page object in the PDF
            text += page.extract_text() + '\n'  # Extracts text from current page and appends it with newline character
    return text  # Returns the complete concatenated text from all pages

# Function to read DOCX files
def read_docx(file_path):  # Defines function that takes a file path string as parameter
    doc = DocxDocument(file_path)  # Creates a Document object by loading the .docx file
    text = ''  # Initializes empty string to store extracted text
    for paragraph in doc.paragraphs:  # Iterates through each paragraph object in the document
        text += paragraph.text + '\n'  # Extracts text from current paragraph and appends with newline
    return text  # Returns the complete text from all paragraphs

# Function to load and index documents
def load_data(files, api_key):  # Defines function that accepts uploaded files list and API key string
    global chat_engine, current_user_id  # Declares these as global so changes persist outside function scope
    
    if not api_key:  # Checks if API key is missing, empty, or None
        return "Please provide your OpenAI API key first."  # Returns error message prompting for API key
    
    if not files:  # Checks if files list is empty, None, or falsy
        return "Please upload files to proceed."  # Returns error message prompting for file upload
    
    try:  # Begins try block to catch any errors during document processing
        # Set current user
        current_user_id = get_user_id(api_key)  # Generates and stores unique user ID from API key in global variable
        
        docs = []  # Initializes empty list to store Document objects
        for file in files:  # Loops through each uploaded file object in the files list
            if file.name.endswith('.pdf'):  # Checks if filename ends with .pdf extension
                text = read_pdf(file.name)  # Extracts all text from PDF using previously defined function
                docs.append(Document(text=text))  # Creates LlamaIndex Document object from text and adds to list
            elif file.name.endswith('.docx'):  # Checks if filename ends with .docx extension
                text = read_docx(file.name)  # Extracts all text from Word document using previously defined function
                docs.append(Document(text=text))  # Creates Document object from extracted text and appends to list
        
        # Set OpenAI API key
        os.environ["OPENAI_API_KEY"] = api_key  # Sets environment variable so OpenAI library can automatically access the API key
        
        Settings.llm = OpenAI(  # Configures the global LLM (Large Language Model) settings for LlamaIndex
            model="gpt-5-nano",  # Specifies which OpenAI model to use (GPT-4 optimized mini version)
            temperature=0.5,  # Sets randomness level (0=deterministic/focused, 1=creative/random); 0.5 is balanced
            api_key=api_key,  # Passes API key directly to OpenAI client for authentication
            system_prompt="You are a helpful AI assistant that answers questions based on the provided documents. Always base your answers on the content of the uploaded documents. If the answer cannot be found in the documents, clearly state that. Be accurate, concise, and cite specific information from the documents when possible."  # Instructions that guide the AI's behavior, response style, and constrain it to document content
        )
        
        index = VectorStoreIndex.from_documents(docs)  # Creates vector embeddings of all documents for semantic similarity search
        chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)  # Converts index to conversational chat interface; condense_question mode reformulates follow-up questions using conversation context; verbose=True prints debug info
        
        return "Documents loaded and indexed successfully! You can now start chatting."  # Returns success message to display to user
    except Exception as e:  # Catches any error that occurred anywhere in the try block
        return f"Error loading documents: {str(e)}"  # Returns formatted error message with details of what went wrong

# Function to handle chat
def chat_with_docs(message, history, api_key):  # Defines function that takes user's message, chat history list, and API key as parameters
    global chat_engine, conversation_history, current_user_id  # Declares global variables so function can read/modify them
    
    if not api_key:  # Checks if API key is missing, empty, or None
        return history + [{"role": "assistant", "content": "Please enter your OpenAI API key first."}]  # Returns existing history plus error message as assistant response
    
    # Update current user
    current_user_id = get_user_id(api_key)  # Generates unique user ID from API key and stores in global variable
    
    if chat_engine is None:  # Checks if chat_engine hasn't been initialized (no documents loaded yet)
        return history + [  # Returns history with two new messages added to the list
            {"role": "user", "content": message},  # Adds user's question to history as dictionary
            {"role": "assistant", "content": "Please upload and load documents first before asking questions."}  # Adds assistant's error response
        ]
    
    try:  # Begins try block to catch errors during chat interaction
        response = chat_engine.chat(message)  # Sends user message to chat engine, which searches documents and generates response
        conversation_history.append({"role": "user", "content": message})  # Adds user message to global conversation history list
        conversation_history.append({"role": "assistant", "content": response.response})  # Adds AI response to global conversation history (response.response extracts text from response object)
        
        return history + [  # Returns updated history by concatenating existing history with new messages
            {"role": "user", "content": message},  # Adds current user message
            {"role": "assistant", "content": response.response}  # Adds AI's response text
        ]
    except Exception as e:  # Catches any error that occurred during chat processing
        return history + [  # Returns history with error message instead of crashing
            {"role": "user", "content": message},  # Still adds user's message to show what they asked
            {"role": "assistant", "content": f"Error: {str(e)}"}  # Adds error details as assistant response for debugging
        ]

# Function to save conversation (user-specific)
def save_conversation(api_key):  # Defines function that saves conversation to user-specific file
    global conversation_history  # Accesses global conversation_history variable
    
    if not api_key:  # Checks if API key is missing or empty
        return "Please enter your OpenAI API key first."  # Returns error message and exits function
    
    if not conversation_history:  # Checks if conversation_history list is empty (no messages to save)
        return "No conversation to save."  # Returns message indicating nothing to save
    
    try:  # Begins try block to handle file writing errors
        user_file = get_user_file(api_key)  # Generates unique filename based on user's API key (e.g., "conversations_abc123.json")
        timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")  # Gets current date/time and formats as string (e.g., "2026-01-14_15-30-45")
        
        with open(user_file, "a") as f:  # Opens user's file in append mode ("a" means add to end without overwriting); auto-closes when done
            conv_data = {  # Creates dictionary to structure the conversation data
                "timestamp": timestamp,  # Stores when conversation was saved
                "messages": conversation_history  # Stores all messages from current conversation
            }
            json.dump(conv_data, f)  # Converts dictionary to JSON format and writes to file
            f.write("\n")  # Adds newline character so each saved conversation is on separate line in file
        return "Conversation saved successfully!"  # Returns success message to display to user
    except Exception as e:  # Catches any errors during file operations (permission issues, disk full, etc.)
        return f"Error saving conversation: {str(e)}"  # Returns formatted error message with details

# Function to delete all conversations (user-specific)
def delete_all_conversations(api_key):  # Defines function to permanently delete user's conversation file
    if not api_key:  # Checks if API key is missing or empty
        return "Please enter your OpenAI API key first."  # Returns error message requiring API key
    
    try:  # Begins try block to handle file deletion errors
        user_file = get_user_file(api_key)  # Generates filename for this user's conversations
        if os.path.exists(user_file):  # Checks if file actually exists before attempting deletion
            os.remove(user_file)  # Deletes the file from disk permanently
            return "All your conversations deleted successfully!"  # Returns success confirmation message
        return "No conversations to delete."  # Returns message if file doesn't exist (nothing to delete)
    except Exception as e:  # Catches errors like permission denied, file in use, etc.
        return f"Error deleting conversations: {str(e)}"  # Returns error message with details for debugging

# Function to load previous conversations (user-specific)
def load_conversations(api_key):  # Defines function that retrieves and displays user's saved conversations
    if not api_key:  # Checks if API key is missing, empty, or None
        return "Please enter your OpenAI API key first to view your conversations."  # Returns error message prompting for API key
    
    user_file = get_user_file(api_key)  # Generates unique filename for this user based on their API key (e.g., "conversations_abc123.json")
    
    if os.path.exists(user_file):  # Checks if the user's conversation file actually exists on disk
        try:  # Begins try block to handle file reading and parsing errors
            with open(user_file, "r") as f:  # Opens user's file in read mode; auto-closes when done
                conversations = [json.loads(line) for line in f]  # List comprehension: reads each line, parses JSON, creates list of conversation dictionaries
            
            conv_text = ""  # Initializes empty string to build formatted conversation display
            for i, conv in enumerate(conversations):  # Loops through conversations with index (i) and conversation data (conv)
                conv_text += f"\n{'='*50}\nConversation {i + 1}\n{'='*50}\n"  # Adds separator line (50 equals signs), conversation number header, and another separator
                timestamp = conv.get("timestamp", "Unknown time")  # Retrieves timestamp from conversation dict; defaults to "Unknown time" if key doesn't exist
                conv_text += f"Timestamp: {timestamp}\n\n"  # Adds timestamp to output with two newlines for spacing
                
                messages = conv.get("messages", conv)  # Gets messages list from conversation; if "messages" key doesn't exist, uses entire conv dict as fallback
                for message in messages:  # Loops through each message dictionary in the messages list
                    role = message.get('role', 'unknown')  # Extracts role (user/assistant); defaults to 'unknown' if not found
                    content = message.get('content', '')  # Extracts message content; defaults to empty string if not found
                    conv_text += f"{role.upper()}: {content}\n\n"  # Adds formatted message with role in uppercase, content, and spacing
            
            return conv_text if conv_text else "No previous conversations found."  # Returns formatted text if any exists; otherwise returns "not found" message (ternary operator)
        except Exception as e:  # Catches any errors during file reading or JSON parsing
            return f"Error loading conversations: {str(e)}"  # Returns error message with exception details
    return "No previous conversations found for your account."  # Returns message if file doesn't exist (user has no saved conversations)

# Function to clear current conversation
def clear_conversation():  # Defines function to reset the current chat session
    global conversation_history  # Accesses global conversation_history variable to modify it
    conversation_history = []  # Resets conversation_history to empty list, clearing all messages
    return []  # Returns empty list to clear the Gradio chat interface display

# Create Gradio interface
with gr.Blocks(title="Chat with Documents 💬 📚", theme=gr.themes.Ocean()) as demo:  # Creates Gradio app using Blocks API (custom layout); sets browser tab title and applies Ocean color theme; assigns to 'demo' variable
    gr.Markdown("# Chat with Documents 💬 📚")  # Displays large heading text using Markdown syntax (# = h1)
    gr.Markdown("Upload PDF or DOCX files and chat with them using AI!")  # Displays instruction text as second line
    gr.Markdown("**Privacy Notice:** Your conversations are private and tied to your API key. Only you can see your saved conversations.")  # Displays privacy notice in bold (**text** = bold in Markdown)
    
    with gr.Row():  # Creates horizontal row container to arrange elements side-by-side
        with gr.Column(scale=2):  # Creates column inside row with scale=2 (takes 2/3 of width when combined with scale=1 column later)
            api_key_input = gr.Textbox(  # Creates text input box for API key
                label="OpenAI API Key",  # Sets label displayed above the textbox
                type="password",  # Masks input characters with dots/asterisks for security
                placeholder="Enter your OpenAI API key here..."  # Shows gray hint text when box is empty
            )
            
            file_upload = gr.File(  # Creates file upload widget
                label="Upload PDF or DOCX files",  # Sets label above file upload area
                file_count="multiple",  # Allows user to select multiple files at once
                file_types=[".pdf", ".docx"]  # Restricts file picker to only show PDF and DOCX files
            )
            
            load_btn = gr.Button("Load Documents", variant="primary")  # Creates button with text "Load Documents"; variant="primary" makes it blue/highlighted
            load_status = gr.Textbox(label="Status", interactive=False)  # Creates read-only textbox to display status messages; interactive=False prevents user editing
            
            load_btn.click(  # Defines what happens when load_btn is clicked
                fn=load_data,  # Calls load_data function when button clicked
                inputs=[file_upload, api_key_input],  # Passes file_upload and api_key_input values as arguments to load_data
                outputs=load_status  # Displays return value from load_data in load_status textbox
            )
    
    with gr.Row():  # Creates another horizontal row below the first one
        with gr.Column(scale=3):  # Creates column with scale=3 (takes 3/4 width; main chat area)
            chatbot = gr.Chatbot(  # Creates chatbot interface component for displaying conversation
                label="Chat",  # Sets label above chat window
                height=400  # Sets chat window height to 400 pixels
            )
            msg = gr.Textbox(  # Creates text input for user to type questions
                label="Your Question",  # Label displayed above input box
                placeholder="Ask a question about your documents..."  # Hint text shown when empty
            )
            
            with gr.Row():  # Creates row inside column for button group
                submit_btn = gr.Button("Send", variant="primary")  # Creates primary (highlighted) Send button
                clear_btn = gr.Button("Clear Chat")  # Creates Clear Chat button with default styling
            
            with gr.Row():  # Creates another row for save functionality
                save_btn = gr.Button("Save Conversation")  # Creates button to save chat history
                save_status = gr.Textbox(label="Save Status", interactive=False)  # Read-only textbox for save confirmation messages
        
        with gr.Column(scale=1):  # Creates sidebar column with scale=1 (takes 1/4 width; for conversation history)
            gr.Markdown("### Your Previous Conversations")  # Displays h3 heading (### = h3 in Markdown)
            load_convs_btn = gr.Button("Load Your Conversations")  # Creates button to retrieve saved conversations
            convs_display = gr.Textbox(  # Creates large textbox for displaying conversation history
                label="Conversation History",  # Label above the textbox
                lines=20,  # Sets textbox height to 20 lines of text
                interactive=False  # Makes textbox read-only (user cannot edit)
            )
            delete_all_btn = gr.Button("Delete All Your Conversations", variant="stop")  # Creates red warning-style button for deletion; variant="stop" makes it red
            delete_status = gr.Textbox(label="Delete Status", interactive=False)  # Read-only textbox for deletion confirmation messages
    
    # Event handlers
    submit_btn.click(  # Defines behavior when Send button is clicked
        fn=chat_with_docs,  # Calls chat_with_docs function
        inputs=[msg, chatbot, api_key_input],  # Passes message text, current chat history, and API key as arguments
        outputs=chatbot  # Updates chatbot display with return value (new conversation history)
    ).then(  # Chains another action after the first completes
        lambda: "",  # Anonymous function that returns empty string
        outputs=msg  # Clears the message input box after sending
    )
    
    msg.submit(  # Defines behavior when user presses Enter key in message textbox
        fn=chat_with_docs,  # Calls same chat function as submit button
        inputs=[msg, chatbot, api_key_input],  # Same inputs as submit button
        outputs=chatbot  # Updates chat display
    ).then(  # Chains follow-up action
        lambda: "",  # Returns empty string
        outputs=msg  # Clears message box after Enter is pressed
    )
    
    clear_btn.click(  # Defines behavior when Clear Chat button clicked
        fn=clear_conversation,  # Calls clear_conversation function (resets conversation_history to [])
        outputs=chatbot  # Updates chatbot display with empty list (clears visible chat)
    )
    
    save_btn.click(  # Defines behavior when Save Conversation button clicked
        fn=save_conversation,  # Calls save_conversation function to write to JSON file
        inputs=[api_key_input],  # Passes API key to identify which user's file to save to
        outputs=save_status  # Displays success/error message in save_status textbox
    )
    
    load_convs_btn.click(  # Defines behavior when Load Your Conversations button clicked
        fn=load_conversations,  # Calls load_conversations function to read from user's JSON file
        inputs=[api_key_input],  # Passes API key to identify which user's file to load
        outputs=convs_display  # Displays formatted conversation history in convs_display textbox
    )
    
    delete_all_btn.click(  # Defines behavior when Delete All button clicked
        fn=delete_all_conversations,  # Calls delete_all_conversations function to remove user's file
        inputs=[api_key_input],  # Passes API key to identify which file to delete
        outputs=delete_status  # Displays confirmation/error message in delete_status textbox
    )

if __name__ == "__main__":  # Python idiom: only runs code below if script is executed directly (not imported as module)
    demo.launch()  # Starts Gradio web server and opens app in browser; makes app accessible at local URL (e.g., http://127.0.0.1:7860)