CHATDOC-MEMORY / app.py
shukdev3's picture
Update app.py
123fc8d verified
Raw
History Blame Contribute Delete
22.6 kB
import gradio as gr
from llama_index.core import VectorStoreIndex, Document # Imports VectorStoreIndex for semantic search and Document class for text storage
from llama_index.llms.openai import OpenAI # Imports OpenAI language model integration from LlamaIndex
from llama_index.core import Settings # Imports Settings to configure global LlamaIndex parameters
import os # Imports OS module for interacting with the operating system (file paths, environment variables)
import pdfplumber # Imports pdfplumber library for extracting text from PDF files
from docx import Document as DocxDocument # Imports Document class from python-docx, renamed to avoid conflict with LlamaIndex's Document
import json # Imports JSON module for parsing and creating JSON data
from datetime import datetime # Imports datetime class for working with dates and times
import hashlib # Imports hashlib for creating hash functions (MD5, SHA, etc.) for data integrity/unique identifiers
# Global variables
chat_engine = None # Stores the LlamaIndex chat engine instance; None until initialized with documents
conversation_history = [] # Empty list to store all chat messages (user questions and AI responses)
current_user_id = None # Stores hashed identifier for current user based on their API key
# Function to generate user ID from API key
def get_user_id(api_key): # Defines function that takes API key string as input
if not api_key: # Checks if api_key is None, empty string, or falsy value
return None # Returns None if no API key provided
return hashlib.sha256(api_key.encode()).hexdigest()[:16] # Encodes key to bytes, creates SHA-256 hash, converts to hex string, returns first 16 characters as unique user ID
# Function to get user-specific filename
def get_user_file(api_key): # Defines function that generates unique filename for each user
user_id = get_user_id(api_key) # Calls get_user_id to generate unique identifier from API key
if not user_id: # Checks if user_id is None (happens when api_key is invalid/empty)
return None # Returns None if no valid user ID could be generated
return f"conversations_{user_id}.json" # Returns formatted string with user-specific filename for storing conversation history
# Function to read PDF files
def read_pdf(file_path): # Defines function that takes a file path string as parameter
with pdfplumber.open(file_path) as pdf: # Opens PDF file using context manager (auto-closes after use)
text = '' # Initializes empty string to accumulate extracted text
for page in pdf.pages: # Loops through each page object in the PDF
text += page.extract_text() + '\n' # Extracts text from current page and appends it with newline character
return text # Returns the complete concatenated text from all pages
# Function to read DOCX files
def read_docx(file_path): # Defines function that takes a file path string as parameter
doc = DocxDocument(file_path) # Creates a Document object by loading the .docx file
text = '' # Initializes empty string to store extracted text
for paragraph in doc.paragraphs: # Iterates through each paragraph object in the document
text += paragraph.text + '\n' # Extracts text from current paragraph and appends with newline
return text # Returns the complete text from all paragraphs
# Function to load and index documents
def load_data(files, api_key): # Defines function that accepts uploaded files list and API key string
global chat_engine, current_user_id # Declares these as global so changes persist outside function scope
if not api_key: # Checks if API key is missing, empty, or None
return "Please provide your OpenAI API key first." # Returns error message prompting for API key
if not files: # Checks if files list is empty, None, or falsy
return "Please upload files to proceed." # Returns error message prompting for file upload
try: # Begins try block to catch any errors during document processing
# Set current user
current_user_id = get_user_id(api_key) # Generates and stores unique user ID from API key in global variable
docs = [] # Initializes empty list to store Document objects
for file in files: # Loops through each uploaded file object in the files list
if file.name.endswith('.pdf'): # Checks if filename ends with .pdf extension
text = read_pdf(file.name) # Extracts all text from PDF using previously defined function
docs.append(Document(text=text)) # Creates LlamaIndex Document object from text and adds to list
elif file.name.endswith('.docx'): # Checks if filename ends with .docx extension
text = read_docx(file.name) # Extracts all text from Word document using previously defined function
docs.append(Document(text=text)) # Creates Document object from extracted text and appends to list
# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = api_key # Sets environment variable so OpenAI library can automatically access the API key
Settings.llm = OpenAI( # Configures the global LLM (Large Language Model) settings for LlamaIndex
model="gpt-5-nano", # Specifies which OpenAI model to use (GPT-4 optimized mini version)
temperature=0.5, # Sets randomness level (0=deterministic/focused, 1=creative/random); 0.5 is balanced
api_key=api_key, # Passes API key directly to OpenAI client for authentication
system_prompt="You are a helpful AI assistant that answers questions based on the provided documents. Always base your answers on the content of the uploaded documents. If the answer cannot be found in the documents, clearly state that. Be accurate, concise, and cite specific information from the documents when possible." # Instructions that guide the AI's behavior, response style, and constrain it to document content
)
index = VectorStoreIndex.from_documents(docs) # Creates vector embeddings of all documents for semantic similarity search
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True) # Converts index to conversational chat interface; condense_question mode reformulates follow-up questions using conversation context; verbose=True prints debug info
return "Documents loaded and indexed successfully! You can now start chatting." # Returns success message to display to user
except Exception as e: # Catches any error that occurred anywhere in the try block
return f"Error loading documents: {str(e)}" # Returns formatted error message with details of what went wrong
# Function to handle chat
def chat_with_docs(message, history, api_key): # Defines function that takes user's message, chat history list, and API key as parameters
global chat_engine, conversation_history, current_user_id # Declares global variables so function can read/modify them
if not api_key: # Checks if API key is missing, empty, or None
return history + [{"role": "assistant", "content": "Please enter your OpenAI API key first."}] # Returns existing history plus error message as assistant response
# Update current user
current_user_id = get_user_id(api_key) # Generates unique user ID from API key and stores in global variable
if chat_engine is None: # Checks if chat_engine hasn't been initialized (no documents loaded yet)
return history + [ # Returns history with two new messages added to the list
{"role": "user", "content": message}, # Adds user's question to history as dictionary
{"role": "assistant", "content": "Please upload and load documents first before asking questions."} # Adds assistant's error response
]
try: # Begins try block to catch errors during chat interaction
response = chat_engine.chat(message) # Sends user message to chat engine, which searches documents and generates response
conversation_history.append({"role": "user", "content": message}) # Adds user message to global conversation history list
conversation_history.append({"role": "assistant", "content": response.response}) # Adds AI response to global conversation history (response.response extracts text from response object)
return history + [ # Returns updated history by concatenating existing history with new messages
{"role": "user", "content": message}, # Adds current user message
{"role": "assistant", "content": response.response} # Adds AI's response text
]
except Exception as e: # Catches any error that occurred during chat processing
return history + [ # Returns history with error message instead of crashing
{"role": "user", "content": message}, # Still adds user's message to show what they asked
{"role": "assistant", "content": f"Error: {str(e)}"} # Adds error details as assistant response for debugging
]
# Function to save conversation (user-specific)
def save_conversation(api_key): # Defines function that saves conversation to user-specific file
global conversation_history # Accesses global conversation_history variable
if not api_key: # Checks if API key is missing or empty
return "Please enter your OpenAI API key first." # Returns error message and exits function
if not conversation_history: # Checks if conversation_history list is empty (no messages to save)
return "No conversation to save." # Returns message indicating nothing to save
try: # Begins try block to handle file writing errors
user_file = get_user_file(api_key) # Generates unique filename based on user's API key (e.g., "conversations_abc123.json")
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") # Gets current date/time and formats as string (e.g., "2026-01-14_15-30-45")
with open(user_file, "a") as f: # Opens user's file in append mode ("a" means add to end without overwriting); auto-closes when done
conv_data = { # Creates dictionary to structure the conversation data
"timestamp": timestamp, # Stores when conversation was saved
"messages": conversation_history # Stores all messages from current conversation
}
json.dump(conv_data, f) # Converts dictionary to JSON format and writes to file
f.write("\n") # Adds newline character so each saved conversation is on separate line in file
return "Conversation saved successfully!" # Returns success message to display to user
except Exception as e: # Catches any errors during file operations (permission issues, disk full, etc.)
return f"Error saving conversation: {str(e)}" # Returns formatted error message with details
# Function to delete all conversations (user-specific)
def delete_all_conversations(api_key): # Defines function to permanently delete user's conversation file
if not api_key: # Checks if API key is missing or empty
return "Please enter your OpenAI API key first." # Returns error message requiring API key
try: # Begins try block to handle file deletion errors
user_file = get_user_file(api_key) # Generates filename for this user's conversations
if os.path.exists(user_file): # Checks if file actually exists before attempting deletion
os.remove(user_file) # Deletes the file from disk permanently
return "All your conversations deleted successfully!" # Returns success confirmation message
return "No conversations to delete." # Returns message if file doesn't exist (nothing to delete)
except Exception as e: # Catches errors like permission denied, file in use, etc.
return f"Error deleting conversations: {str(e)}" # Returns error message with details for debugging
# Function to load previous conversations (user-specific)
def load_conversations(api_key): # Defines function that retrieves and displays user's saved conversations
if not api_key: # Checks if API key is missing, empty, or None
return "Please enter your OpenAI API key first to view your conversations." # Returns error message prompting for API key
user_file = get_user_file(api_key) # Generates unique filename for this user based on their API key (e.g., "conversations_abc123.json")
if os.path.exists(user_file): # Checks if the user's conversation file actually exists on disk
try: # Begins try block to handle file reading and parsing errors
with open(user_file, "r") as f: # Opens user's file in read mode; auto-closes when done
conversations = [json.loads(line) for line in f] # List comprehension: reads each line, parses JSON, creates list of conversation dictionaries
conv_text = "" # Initializes empty string to build formatted conversation display
for i, conv in enumerate(conversations): # Loops through conversations with index (i) and conversation data (conv)
conv_text += f"\n{'='*50}\nConversation {i + 1}\n{'='*50}\n" # Adds separator line (50 equals signs), conversation number header, and another separator
timestamp = conv.get("timestamp", "Unknown time") # Retrieves timestamp from conversation dict; defaults to "Unknown time" if key doesn't exist
conv_text += f"Timestamp: {timestamp}\n\n" # Adds timestamp to output with two newlines for spacing
messages = conv.get("messages", conv) # Gets messages list from conversation; if "messages" key doesn't exist, uses entire conv dict as fallback
for message in messages: # Loops through each message dictionary in the messages list
role = message.get('role', 'unknown') # Extracts role (user/assistant); defaults to 'unknown' if not found
content = message.get('content', '') # Extracts message content; defaults to empty string if not found
conv_text += f"{role.upper()}: {content}\n\n" # Adds formatted message with role in uppercase, content, and spacing
return conv_text if conv_text else "No previous conversations found." # Returns formatted text if any exists; otherwise returns "not found" message (ternary operator)
except Exception as e: # Catches any errors during file reading or JSON parsing
return f"Error loading conversations: {str(e)}" # Returns error message with exception details
return "No previous conversations found for your account." # Returns message if file doesn't exist (user has no saved conversations)
# Function to clear current conversation
def clear_conversation(): # Defines function to reset the current chat session
global conversation_history # Accesses global conversation_history variable to modify it
conversation_history = [] # Resets conversation_history to empty list, clearing all messages
return [] # Returns empty list to clear the Gradio chat interface display
# Create Gradio interface
with gr.Blocks(title="Chat with Documents πŸ’¬ πŸ“š", theme=gr.themes.Ocean()) as demo: # Creates Gradio app using Blocks API (custom layout); sets browser tab title and applies Ocean color theme; assigns to 'demo' variable
gr.Markdown("# Chat with Documents πŸ’¬ πŸ“š") # Displays large heading text using Markdown syntax (# = h1)
gr.Markdown("Upload PDF or DOCX files and chat with them using AI!") # Displays instruction text as second line
gr.Markdown("**Privacy Notice:** Your conversations are private and tied to your API key. Only you can see your saved conversations.") # Displays privacy notice in bold (**text** = bold in Markdown)
with gr.Row(): # Creates horizontal row container to arrange elements side-by-side
with gr.Column(scale=2): # Creates column inside row with scale=2 (takes 2/3 of width when combined with scale=1 column later)
api_key_input = gr.Textbox( # Creates text input box for API key
label="OpenAI API Key", # Sets label displayed above the textbox
type="password", # Masks input characters with dots/asterisks for security
placeholder="Enter your OpenAI API key here..." # Shows gray hint text when box is empty
)
file_upload = gr.File( # Creates file upload widget
label="Upload PDF or DOCX files", # Sets label above file upload area
file_count="multiple", # Allows user to select multiple files at once
file_types=[".pdf", ".docx"] # Restricts file picker to only show PDF and DOCX files
)
load_btn = gr.Button("Load Documents", variant="primary") # Creates button with text "Load Documents"; variant="primary" makes it blue/highlighted
load_status = gr.Textbox(label="Status", interactive=False) # Creates read-only textbox to display status messages; interactive=False prevents user editing
load_btn.click( # Defines what happens when load_btn is clicked
fn=load_data, # Calls load_data function when button clicked
inputs=[file_upload, api_key_input], # Passes file_upload and api_key_input values as arguments to load_data
outputs=load_status # Displays return value from load_data in load_status textbox
)
with gr.Row(): # Creates another horizontal row below the first one
with gr.Column(scale=3): # Creates column with scale=3 (takes 3/4 width; main chat area)
chatbot = gr.Chatbot( # Creates chatbot interface component for displaying conversation
label="Chat", # Sets label above chat window
height=400 # Sets chat window height to 400 pixels
)
msg = gr.Textbox( # Creates text input for user to type questions
label="Your Question", # Label displayed above input box
placeholder="Ask a question about your documents..." # Hint text shown when empty
)
with gr.Row(): # Creates row inside column for button group
submit_btn = gr.Button("Send", variant="primary") # Creates primary (highlighted) Send button
clear_btn = gr.Button("Clear Chat") # Creates Clear Chat button with default styling
with gr.Row(): # Creates another row for save functionality
save_btn = gr.Button("Save Conversation") # Creates button to save chat history
save_status = gr.Textbox(label="Save Status", interactive=False) # Read-only textbox for save confirmation messages
with gr.Column(scale=1): # Creates sidebar column with scale=1 (takes 1/4 width; for conversation history)
gr.Markdown("### Your Previous Conversations") # Displays h3 heading (### = h3 in Markdown)
load_convs_btn = gr.Button("Load Your Conversations") # Creates button to retrieve saved conversations
convs_display = gr.Textbox( # Creates large textbox for displaying conversation history
label="Conversation History", # Label above the textbox
lines=20, # Sets textbox height to 20 lines of text
interactive=False # Makes textbox read-only (user cannot edit)
)
delete_all_btn = gr.Button("Delete All Your Conversations", variant="stop") # Creates red warning-style button for deletion; variant="stop" makes it red
delete_status = gr.Textbox(label="Delete Status", interactive=False) # Read-only textbox for deletion confirmation messages
# Event handlers
submit_btn.click( # Defines behavior when Send button is clicked
fn=chat_with_docs, # Calls chat_with_docs function
inputs=[msg, chatbot, api_key_input], # Passes message text, current chat history, and API key as arguments
outputs=chatbot # Updates chatbot display with return value (new conversation history)
).then( # Chains another action after the first completes
lambda: "", # Anonymous function that returns empty string
outputs=msg # Clears the message input box after sending
)
msg.submit( # Defines behavior when user presses Enter key in message textbox
fn=chat_with_docs, # Calls same chat function as submit button
inputs=[msg, chatbot, api_key_input], # Same inputs as submit button
outputs=chatbot # Updates chat display
).then( # Chains follow-up action
lambda: "", # Returns empty string
outputs=msg # Clears message box after Enter is pressed
)
clear_btn.click( # Defines behavior when Clear Chat button clicked
fn=clear_conversation, # Calls clear_conversation function (resets conversation_history to [])
outputs=chatbot # Updates chatbot display with empty list (clears visible chat)
)
save_btn.click( # Defines behavior when Save Conversation button clicked
fn=save_conversation, # Calls save_conversation function to write to JSON file
inputs=[api_key_input], # Passes API key to identify which user's file to save to
outputs=save_status # Displays success/error message in save_status textbox
)
load_convs_btn.click( # Defines behavior when Load Your Conversations button clicked
fn=load_conversations, # Calls load_conversations function to read from user's JSON file
inputs=[api_key_input], # Passes API key to identify which user's file to load
outputs=convs_display # Displays formatted conversation history in convs_display textbox
)
delete_all_btn.click( # Defines behavior when Delete All button clicked
fn=delete_all_conversations, # Calls delete_all_conversations function to remove user's file
inputs=[api_key_input], # Passes API key to identify which file to delete
outputs=delete_status # Displays confirmation/error message in delete_status textbox
)
if __name__ == "__main__": # Python idiom: only runs code below if script is executed directly (not imported as module)
demo.launch() # Starts Gradio web server and opens app in browser; makes app accessible at local URL (e.g., http://127.0.0.1:7860)