Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from llama_index.core import VectorStoreIndex, Document # Imports VectorStoreIndex for semantic search and Document class for text storage | |
| from llama_index.llms.openai import OpenAI # Imports OpenAI language model integration from LlamaIndex | |
| from llama_index.core import Settings # Imports Settings to configure global LlamaIndex parameters | |
| import os # Imports OS module for interacting with the operating system (file paths, environment variables) | |
| import pdfplumber # Imports pdfplumber library for extracting text from PDF files | |
| from docx import Document as DocxDocument # Imports Document class from python-docx, renamed to avoid conflict with LlamaIndex's Document | |
| import json # Imports JSON module for parsing and creating JSON data | |
| from datetime import datetime # Imports datetime class for working with dates and times | |
| import hashlib # Imports hashlib for creating hash functions (MD5, SHA, etc.) for data integrity/unique identifiers | |
| # Global variables | |
| chat_engine = None # Stores the LlamaIndex chat engine instance; None until initialized with documents | |
| conversation_history = [] # Empty list to store all chat messages (user questions and AI responses) | |
| current_user_id = None # Stores hashed identifier for current user based on their API key | |
| # Function to generate user ID from API key | |
| def get_user_id(api_key): # Defines function that takes API key string as input | |
| if not api_key: # Checks if api_key is None, empty string, or falsy value | |
| return None # Returns None if no API key provided | |
| return hashlib.sha256(api_key.encode()).hexdigest()[:16] # Encodes key to bytes, creates SHA-256 hash, converts to hex string, returns first 16 characters as unique user ID | |
| # Function to get user-specific filename | |
| def get_user_file(api_key): # Defines function that generates unique filename for each user | |
| user_id = get_user_id(api_key) # Calls get_user_id to generate unique identifier from API key | |
| if not user_id: # Checks if user_id is None (happens when api_key is invalid/empty) | |
| return None # Returns None if no valid user ID could be generated | |
| return f"conversations_{user_id}.json" # Returns formatted string with user-specific filename for storing conversation history | |
| # Function to read PDF files | |
| def read_pdf(file_path): # Defines function that takes a file path string as parameter | |
| with pdfplumber.open(file_path) as pdf: # Opens PDF file using context manager (auto-closes after use) | |
| text = '' # Initializes empty string to accumulate extracted text | |
| for page in pdf.pages: # Loops through each page object in the PDF | |
| text += page.extract_text() + '\n' # Extracts text from current page and appends it with newline character | |
| return text # Returns the complete concatenated text from all pages | |
| # Function to read DOCX files | |
| def read_docx(file_path): # Defines function that takes a file path string as parameter | |
| doc = DocxDocument(file_path) # Creates a Document object by loading the .docx file | |
| text = '' # Initializes empty string to store extracted text | |
| for paragraph in doc.paragraphs: # Iterates through each paragraph object in the document | |
| text += paragraph.text + '\n' # Extracts text from current paragraph and appends with newline | |
| return text # Returns the complete text from all paragraphs | |
| # Function to load and index documents | |
| def load_data(files, api_key): # Defines function that accepts uploaded files list and API key string | |
| global chat_engine, current_user_id # Declares these as global so changes persist outside function scope | |
| if not api_key: # Checks if API key is missing, empty, or None | |
| return "Please provide your OpenAI API key first." # Returns error message prompting for API key | |
| if not files: # Checks if files list is empty, None, or falsy | |
| return "Please upload files to proceed." # Returns error message prompting for file upload | |
| try: # Begins try block to catch any errors during document processing | |
| # Set current user | |
| current_user_id = get_user_id(api_key) # Generates and stores unique user ID from API key in global variable | |
| docs = [] # Initializes empty list to store Document objects | |
| for file in files: # Loops through each uploaded file object in the files list | |
| if file.name.endswith('.pdf'): # Checks if filename ends with .pdf extension | |
| text = read_pdf(file.name) # Extracts all text from PDF using previously defined function | |
| docs.append(Document(text=text)) # Creates LlamaIndex Document object from text and adds to list | |
| elif file.name.endswith('.docx'): # Checks if filename ends with .docx extension | |
| text = read_docx(file.name) # Extracts all text from Word document using previously defined function | |
| docs.append(Document(text=text)) # Creates Document object from extracted text and appends to list | |
| # Set OpenAI API key | |
| os.environ["OPENAI_API_KEY"] = api_key # Sets environment variable so OpenAI library can automatically access the API key | |
| Settings.llm = OpenAI( # Configures the global LLM (Large Language Model) settings for LlamaIndex | |
| model="gpt-5-nano", # Specifies which OpenAI model to use (GPT-4 optimized mini version) | |
| temperature=0.5, # Sets randomness level (0=deterministic/focused, 1=creative/random); 0.5 is balanced | |
| api_key=api_key, # Passes API key directly to OpenAI client for authentication | |
| system_prompt="You are a helpful AI assistant that answers questions based on the provided documents. Always base your answers on the content of the uploaded documents. If the answer cannot be found in the documents, clearly state that. Be accurate, concise, and cite specific information from the documents when possible." # Instructions that guide the AI's behavior, response style, and constrain it to document content | |
| ) | |
| index = VectorStoreIndex.from_documents(docs) # Creates vector embeddings of all documents for semantic similarity search | |
| chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True) # Converts index to conversational chat interface; condense_question mode reformulates follow-up questions using conversation context; verbose=True prints debug info | |
| return "Documents loaded and indexed successfully! You can now start chatting." # Returns success message to display to user | |
| except Exception as e: # Catches any error that occurred anywhere in the try block | |
| return f"Error loading documents: {str(e)}" # Returns formatted error message with details of what went wrong | |
| # Function to handle chat | |
| def chat_with_docs(message, history, api_key): # Defines function that takes user's message, chat history list, and API key as parameters | |
| global chat_engine, conversation_history, current_user_id # Declares global variables so function can read/modify them | |
| if not api_key: # Checks if API key is missing, empty, or None | |
| return history + [{"role": "assistant", "content": "Please enter your OpenAI API key first."}] # Returns existing history plus error message as assistant response | |
| # Update current user | |
| current_user_id = get_user_id(api_key) # Generates unique user ID from API key and stores in global variable | |
| if chat_engine is None: # Checks if chat_engine hasn't been initialized (no documents loaded yet) | |
| return history + [ # Returns history with two new messages added to the list | |
| {"role": "user", "content": message}, # Adds user's question to history as dictionary | |
| {"role": "assistant", "content": "Please upload and load documents first before asking questions."} # Adds assistant's error response | |
| ] | |
| try: # Begins try block to catch errors during chat interaction | |
| response = chat_engine.chat(message) # Sends user message to chat engine, which searches documents and generates response | |
| conversation_history.append({"role": "user", "content": message}) # Adds user message to global conversation history list | |
| conversation_history.append({"role": "assistant", "content": response.response}) # Adds AI response to global conversation history (response.response extracts text from response object) | |
| return history + [ # Returns updated history by concatenating existing history with new messages | |
| {"role": "user", "content": message}, # Adds current user message | |
| {"role": "assistant", "content": response.response} # Adds AI's response text | |
| ] | |
| except Exception as e: # Catches any error that occurred during chat processing | |
| return history + [ # Returns history with error message instead of crashing | |
| {"role": "user", "content": message}, # Still adds user's message to show what they asked | |
| {"role": "assistant", "content": f"Error: {str(e)}"} # Adds error details as assistant response for debugging | |
| ] | |
| # Function to save conversation (user-specific) | |
| def save_conversation(api_key): # Defines function that saves conversation to user-specific file | |
| global conversation_history # Accesses global conversation_history variable | |
| if not api_key: # Checks if API key is missing or empty | |
| return "Please enter your OpenAI API key first." # Returns error message and exits function | |
| if not conversation_history: # Checks if conversation_history list is empty (no messages to save) | |
| return "No conversation to save." # Returns message indicating nothing to save | |
| try: # Begins try block to handle file writing errors | |
| user_file = get_user_file(api_key) # Generates unique filename based on user's API key (e.g., "conversations_abc123.json") | |
| timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") # Gets current date/time and formats as string (e.g., "2026-01-14_15-30-45") | |
| with open(user_file, "a") as f: # Opens user's file in append mode ("a" means add to end without overwriting); auto-closes when done | |
| conv_data = { # Creates dictionary to structure the conversation data | |
| "timestamp": timestamp, # Stores when conversation was saved | |
| "messages": conversation_history # Stores all messages from current conversation | |
| } | |
| json.dump(conv_data, f) # Converts dictionary to JSON format and writes to file | |
| f.write("\n") # Adds newline character so each saved conversation is on separate line in file | |
| return "Conversation saved successfully!" # Returns success message to display to user | |
| except Exception as e: # Catches any errors during file operations (permission issues, disk full, etc.) | |
| return f"Error saving conversation: {str(e)}" # Returns formatted error message with details | |
| # Function to delete all conversations (user-specific) | |
| def delete_all_conversations(api_key): # Defines function to permanently delete user's conversation file | |
| if not api_key: # Checks if API key is missing or empty | |
| return "Please enter your OpenAI API key first." # Returns error message requiring API key | |
| try: # Begins try block to handle file deletion errors | |
| user_file = get_user_file(api_key) # Generates filename for this user's conversations | |
| if os.path.exists(user_file): # Checks if file actually exists before attempting deletion | |
| os.remove(user_file) # Deletes the file from disk permanently | |
| return "All your conversations deleted successfully!" # Returns success confirmation message | |
| return "No conversations to delete." # Returns message if file doesn't exist (nothing to delete) | |
| except Exception as e: # Catches errors like permission denied, file in use, etc. | |
| return f"Error deleting conversations: {str(e)}" # Returns error message with details for debugging | |
| # Function to load previous conversations (user-specific) | |
| def load_conversations(api_key): # Defines function that retrieves and displays user's saved conversations | |
| if not api_key: # Checks if API key is missing, empty, or None | |
| return "Please enter your OpenAI API key first to view your conversations." # Returns error message prompting for API key | |
| user_file = get_user_file(api_key) # Generates unique filename for this user based on their API key (e.g., "conversations_abc123.json") | |
| if os.path.exists(user_file): # Checks if the user's conversation file actually exists on disk | |
| try: # Begins try block to handle file reading and parsing errors | |
| with open(user_file, "r") as f: # Opens user's file in read mode; auto-closes when done | |
| conversations = [json.loads(line) for line in f] # List comprehension: reads each line, parses JSON, creates list of conversation dictionaries | |
| conv_text = "" # Initializes empty string to build formatted conversation display | |
| for i, conv in enumerate(conversations): # Loops through conversations with index (i) and conversation data (conv) | |
| conv_text += f"\n{'='*50}\nConversation {i + 1}\n{'='*50}\n" # Adds separator line (50 equals signs), conversation number header, and another separator | |
| timestamp = conv.get("timestamp", "Unknown time") # Retrieves timestamp from conversation dict; defaults to "Unknown time" if key doesn't exist | |
| conv_text += f"Timestamp: {timestamp}\n\n" # Adds timestamp to output with two newlines for spacing | |
| messages = conv.get("messages", conv) # Gets messages list from conversation; if "messages" key doesn't exist, uses entire conv dict as fallback | |
| for message in messages: # Loops through each message dictionary in the messages list | |
| role = message.get('role', 'unknown') # Extracts role (user/assistant); defaults to 'unknown' if not found | |
| content = message.get('content', '') # Extracts message content; defaults to empty string if not found | |
| conv_text += f"{role.upper()}: {content}\n\n" # Adds formatted message with role in uppercase, content, and spacing | |
| return conv_text if conv_text else "No previous conversations found." # Returns formatted text if any exists; otherwise returns "not found" message (ternary operator) | |
| except Exception as e: # Catches any errors during file reading or JSON parsing | |
| return f"Error loading conversations: {str(e)}" # Returns error message with exception details | |
| return "No previous conversations found for your account." # Returns message if file doesn't exist (user has no saved conversations) | |
| # Function to clear current conversation | |
| def clear_conversation(): # Defines function to reset the current chat session | |
| global conversation_history # Accesses global conversation_history variable to modify it | |
| conversation_history = [] # Resets conversation_history to empty list, clearing all messages | |
| return [] # Returns empty list to clear the Gradio chat interface display | |
| # Create Gradio interface | |
| with gr.Blocks(title="Chat with Documents π¬ π", theme=gr.themes.Ocean()) as demo: # Creates Gradio app using Blocks API (custom layout); sets browser tab title and applies Ocean color theme; assigns to 'demo' variable | |
| gr.Markdown("# Chat with Documents π¬ π") # Displays large heading text using Markdown syntax (# = h1) | |
| gr.Markdown("Upload PDF or DOCX files and chat with them using AI!") # Displays instruction text as second line | |
| gr.Markdown("**Privacy Notice:** Your conversations are private and tied to your API key. Only you can see your saved conversations.") # Displays privacy notice in bold (**text** = bold in Markdown) | |
| with gr.Row(): # Creates horizontal row container to arrange elements side-by-side | |
| with gr.Column(scale=2): # Creates column inside row with scale=2 (takes 2/3 of width when combined with scale=1 column later) | |
| api_key_input = gr.Textbox( # Creates text input box for API key | |
| label="OpenAI API Key", # Sets label displayed above the textbox | |
| type="password", # Masks input characters with dots/asterisks for security | |
| placeholder="Enter your OpenAI API key here..." # Shows gray hint text when box is empty | |
| ) | |
| file_upload = gr.File( # Creates file upload widget | |
| label="Upload PDF or DOCX files", # Sets label above file upload area | |
| file_count="multiple", # Allows user to select multiple files at once | |
| file_types=[".pdf", ".docx"] # Restricts file picker to only show PDF and DOCX files | |
| ) | |
| load_btn = gr.Button("Load Documents", variant="primary") # Creates button with text "Load Documents"; variant="primary" makes it blue/highlighted | |
| load_status = gr.Textbox(label="Status", interactive=False) # Creates read-only textbox to display status messages; interactive=False prevents user editing | |
| load_btn.click( # Defines what happens when load_btn is clicked | |
| fn=load_data, # Calls load_data function when button clicked | |
| inputs=[file_upload, api_key_input], # Passes file_upload and api_key_input values as arguments to load_data | |
| outputs=load_status # Displays return value from load_data in load_status textbox | |
| ) | |
| with gr.Row(): # Creates another horizontal row below the first one | |
| with gr.Column(scale=3): # Creates column with scale=3 (takes 3/4 width; main chat area) | |
| chatbot = gr.Chatbot( # Creates chatbot interface component for displaying conversation | |
| label="Chat", # Sets label above chat window | |
| height=400 # Sets chat window height to 400 pixels | |
| ) | |
| msg = gr.Textbox( # Creates text input for user to type questions | |
| label="Your Question", # Label displayed above input box | |
| placeholder="Ask a question about your documents..." # Hint text shown when empty | |
| ) | |
| with gr.Row(): # Creates row inside column for button group | |
| submit_btn = gr.Button("Send", variant="primary") # Creates primary (highlighted) Send button | |
| clear_btn = gr.Button("Clear Chat") # Creates Clear Chat button with default styling | |
| with gr.Row(): # Creates another row for save functionality | |
| save_btn = gr.Button("Save Conversation") # Creates button to save chat history | |
| save_status = gr.Textbox(label="Save Status", interactive=False) # Read-only textbox for save confirmation messages | |
| with gr.Column(scale=1): # Creates sidebar column with scale=1 (takes 1/4 width; for conversation history) | |
| gr.Markdown("### Your Previous Conversations") # Displays h3 heading (### = h3 in Markdown) | |
| load_convs_btn = gr.Button("Load Your Conversations") # Creates button to retrieve saved conversations | |
| convs_display = gr.Textbox( # Creates large textbox for displaying conversation history | |
| label="Conversation History", # Label above the textbox | |
| lines=20, # Sets textbox height to 20 lines of text | |
| interactive=False # Makes textbox read-only (user cannot edit) | |
| ) | |
| delete_all_btn = gr.Button("Delete All Your Conversations", variant="stop") # Creates red warning-style button for deletion; variant="stop" makes it red | |
| delete_status = gr.Textbox(label="Delete Status", interactive=False) # Read-only textbox for deletion confirmation messages | |
| # Event handlers | |
| submit_btn.click( # Defines behavior when Send button is clicked | |
| fn=chat_with_docs, # Calls chat_with_docs function | |
| inputs=[msg, chatbot, api_key_input], # Passes message text, current chat history, and API key as arguments | |
| outputs=chatbot # Updates chatbot display with return value (new conversation history) | |
| ).then( # Chains another action after the first completes | |
| lambda: "", # Anonymous function that returns empty string | |
| outputs=msg # Clears the message input box after sending | |
| ) | |
| msg.submit( # Defines behavior when user presses Enter key in message textbox | |
| fn=chat_with_docs, # Calls same chat function as submit button | |
| inputs=[msg, chatbot, api_key_input], # Same inputs as submit button | |
| outputs=chatbot # Updates chat display | |
| ).then( # Chains follow-up action | |
| lambda: "", # Returns empty string | |
| outputs=msg # Clears message box after Enter is pressed | |
| ) | |
| clear_btn.click( # Defines behavior when Clear Chat button clicked | |
| fn=clear_conversation, # Calls clear_conversation function (resets conversation_history to []) | |
| outputs=chatbot # Updates chatbot display with empty list (clears visible chat) | |
| ) | |
| save_btn.click( # Defines behavior when Save Conversation button clicked | |
| fn=save_conversation, # Calls save_conversation function to write to JSON file | |
| inputs=[api_key_input], # Passes API key to identify which user's file to save to | |
| outputs=save_status # Displays success/error message in save_status textbox | |
| ) | |
| load_convs_btn.click( # Defines behavior when Load Your Conversations button clicked | |
| fn=load_conversations, # Calls load_conversations function to read from user's JSON file | |
| inputs=[api_key_input], # Passes API key to identify which user's file to load | |
| outputs=convs_display # Displays formatted conversation history in convs_display textbox | |
| ) | |
| delete_all_btn.click( # Defines behavior when Delete All button clicked | |
| fn=delete_all_conversations, # Calls delete_all_conversations function to remove user's file | |
| inputs=[api_key_input], # Passes API key to identify which file to delete | |
| outputs=delete_status # Displays confirmation/error message in delete_status textbox | |
| ) | |
| if __name__ == "__main__": # Python idiom: only runs code below if script is executed directly (not imported as module) | |
| demo.launch() # Starts Gradio web server and opens app in browser; makes app accessible at local URL (e.g., http://127.0.0.1:7860) |