Spaces:

shukdev3
/

CHATDOC-MEMORY

Sleeping

App Files Files Community

CHATDOC-MEMORY / app.py

shukdev3

Update app.py

123fc8d verified 6 months ago

Raw

History Blame Contribute Delete

22.6 kB

	import gradio as gr
	from llama_index.core import VectorStoreIndex, Document # Imports VectorStoreIndex for semantic search and Document class for text storage
	from llama_index.llms.openai import OpenAI # Imports OpenAI language model integration from LlamaIndex
	from llama_index.core import Settings # Imports Settings to configure global LlamaIndex parameters
	import os # Imports OS module for interacting with the operating system (file paths, environment variables)
	import pdfplumber # Imports pdfplumber library for extracting text from PDF files
	from docx import Document as DocxDocument # Imports Document class from python-docx, renamed to avoid conflict with LlamaIndex's Document
	import json # Imports JSON module for parsing and creating JSON data
	from datetime import datetime # Imports datetime class for working with dates and times
	import hashlib # Imports hashlib for creating hash functions (MD5, SHA, etc.) for data integrity/unique identifiers

	# Global variables
	chat_engine = None # Stores the LlamaIndex chat engine instance; None until initialized with documents
	conversation_history = [] # Empty list to store all chat messages (user questions and AI responses)
	current_user_id = None # Stores hashed identifier for current user based on their API key

	# Function to generate user ID from API key
	def get_user_id(api_key): # Defines function that takes API key string as input
	if not api_key: # Checks if api_key is None, empty string, or falsy value
	return None # Returns None if no API key provided
	return hashlib.sha256(api_key.encode()).hexdigest()[:16] # Encodes key to bytes, creates SHA-256 hash, converts to hex string, returns first 16 characters as unique user ID

	# Function to get user-specific filename
	def get_user_file(api_key): # Defines function that generates unique filename for each user
	user_id = get_user_id(api_key) # Calls get_user_id to generate unique identifier from API key
	if not user_id: # Checks if user_id is None (happens when api_key is invalid/empty)
	return None # Returns None if no valid user ID could be generated
	return f"conversations_{user_id}.json" # Returns formatted string with user-specific filename for storing conversation history

	# Function to read PDF files
	def read_pdf(file_path): # Defines function that takes a file path string as parameter
	with pdfplumber.open(file_path) as pdf: # Opens PDF file using context manager (auto-closes after use)
	text = '' # Initializes empty string to accumulate extracted text
	for page in pdf.pages: # Loops through each page object in the PDF
	text += page.extract_text() + '\n' # Extracts text from current page and appends it with newline character
	return text # Returns the complete concatenated text from all pages

	# Function to read DOCX files
	def read_docx(file_path): # Defines function that takes a file path string as parameter
	doc = DocxDocument(file_path) # Creates a Document object by loading the .docx file
	text = '' # Initializes empty string to store extracted text
	for paragraph in doc.paragraphs: # Iterates through each paragraph object in the document
	text += paragraph.text + '\n' # Extracts text from current paragraph and appends with newline
	return text # Returns the complete text from all paragraphs

	# Function to load and index documents
	def load_data(files, api_key): # Defines function that accepts uploaded files list and API key string
	global chat_engine, current_user_id # Declares these as global so changes persist outside function scope

	if not api_key: # Checks if API key is missing, empty, or None
	return "Please provide your OpenAI API key first." # Returns error message prompting for API key

	if not files: # Checks if files list is empty, None, or falsy
	return "Please upload files to proceed." # Returns error message prompting for file upload

	try: # Begins try block to catch any errors during document processing
	# Set current user
	current_user_id = get_user_id(api_key) # Generates and stores unique user ID from API key in global variable

	docs = [] # Initializes empty list to store Document objects
	for file in files: # Loops through each uploaded file object in the files list
	if file.name.endswith('.pdf'): # Checks if filename ends with .pdf extension
	text = read_pdf(file.name) # Extracts all text from PDF using previously defined function
	docs.append(Document(text=text)) # Creates LlamaIndex Document object from text and adds to list
	elif file.name.endswith('.docx'): # Checks if filename ends with .docx extension
	text = read_docx(file.name) # Extracts all text from Word document using previously defined function
	docs.append(Document(text=text)) # Creates Document object from extracted text and appends to list

	# Set OpenAI API key
	os.environ["OPENAI_API_KEY"] = api_key # Sets environment variable so OpenAI library can automatically access the API key

	Settings.llm = OpenAI( # Configures the global LLM (Large Language Model) settings for LlamaIndex
	model="gpt-5-nano", # Specifies which OpenAI model to use (GPT-4 optimized mini version)
	temperature=0.5, # Sets randomness level (0=deterministic/focused, 1=creative/random); 0.5 is balanced
	api_key=api_key, # Passes API key directly to OpenAI client for authentication
	system_prompt="You are a helpful AI assistant that answers questions based on the provided documents. Always base your answers on the content of the uploaded documents. If the answer cannot be found in the documents, clearly state that. Be accurate, concise, and cite specific information from the documents when possible." # Instructions that guide the AI's behavior, response style, and constrain it to document content
	)

	index = VectorStoreIndex.from_documents(docs) # Creates vector embeddings of all documents for semantic similarity search
	chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True) # Converts index to conversational chat interface; condense_question mode reformulates follow-up questions using conversation context; verbose=True prints debug info

	return "Documents loaded and indexed successfully! You can now start chatting." # Returns success message to display to user
	except Exception as e: # Catches any error that occurred anywhere in the try block
	return f"Error loading documents: {str(e)}" # Returns formatted error message with details of what went wrong

	# Function to handle chat
	def chat_with_docs(message, history, api_key): # Defines function that takes user's message, chat history list, and API key as parameters
	global chat_engine, conversation_history, current_user_id # Declares global variables so function can read/modify them

	if not api_key: # Checks if API key is missing, empty, or None
	return history + [{"role": "assistant", "content": "Please enter your OpenAI API key first."}] # Returns existing history plus error message as assistant response

	# Update current user
	current_user_id = get_user_id(api_key) # Generates unique user ID from API key and stores in global variable

	if chat_engine is None: # Checks if chat_engine hasn't been initialized (no documents loaded yet)
	return history + [ # Returns history with two new messages added to the list
	{"role": "user", "content": message}, # Adds user's question to history as dictionary
	{"role": "assistant", "content": "Please upload and load documents first before asking questions."} # Adds assistant's error response
	]

	try: # Begins try block to catch errors during chat interaction
	response = chat_engine.chat(message) # Sends user message to chat engine, which searches documents and generates response
	conversation_history.append({"role": "user", "content": message}) # Adds user message to global conversation history list
	conversation_history.append({"role": "assistant", "content": response.response}) # Adds AI response to global conversation history (response.response extracts text from response object)

	return history + [ # Returns updated history by concatenating existing history with new messages
	{"role": "user", "content": message}, # Adds current user message
	{"role": "assistant", "content": response.response} # Adds AI's response text
	]
	except Exception as e: # Catches any error that occurred during chat processing
	return history + [ # Returns history with error message instead of crashing
	{"role": "user", "content": message}, # Still adds user's message to show what they asked
	{"role": "assistant", "content": f"Error: {str(e)}"} # Adds error details as assistant response for debugging
	]

	# Function to save conversation (user-specific)
	def save_conversation(api_key): # Defines function that saves conversation to user-specific file
	global conversation_history # Accesses global conversation_history variable

	if not api_key: # Checks if API key is missing or empty
	return "Please enter your OpenAI API key first." # Returns error message and exits function

	if not conversation_history: # Checks if conversation_history list is empty (no messages to save)
	return "No conversation to save." # Returns message indicating nothing to save

	try: # Begins try block to handle file writing errors
	user_file = get_user_file(api_key) # Generates unique filename based on user's API key (e.g., "conversations_abc123.json")
	timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") # Gets current date/time and formats as string (e.g., "2026-01-14_15-30-45")

	with open(user_file, "a") as f: # Opens user's file in append mode ("a" means add to end without overwriting); auto-closes when done
	conv_data = { # Creates dictionary to structure the conversation data
	"timestamp": timestamp, # Stores when conversation was saved
	"messages": conversation_history # Stores all messages from current conversation
	}
	json.dump(conv_data, f) # Converts dictionary to JSON format and writes to file
	f.write("\n") # Adds newline character so each saved conversation is on separate line in file
	return "Conversation saved successfully!" # Returns success message to display to user
	except Exception as e: # Catches any errors during file operations (permission issues, disk full, etc.)
	return f"Error saving conversation: {str(e)}" # Returns formatted error message with details

	# Function to delete all conversations (user-specific)
	def delete_all_conversations(api_key): # Defines function to permanently delete user's conversation file
	if not api_key: # Checks if API key is missing or empty
	return "Please enter your OpenAI API key first." # Returns error message requiring API key

	try: # Begins try block to handle file deletion errors
	user_file = get_user_file(api_key) # Generates filename for this user's conversations
	if os.path.exists(user_file): # Checks if file actually exists before attempting deletion
	os.remove(user_file) # Deletes the file from disk permanently
	return "All your conversations deleted successfully!" # Returns success confirmation message
	return "No conversations to delete." # Returns message if file doesn't exist (nothing to delete)
	except Exception as e: # Catches errors like permission denied, file in use, etc.
	return f"Error deleting conversations: {str(e)}" # Returns error message with details for debugging

	# Function to load previous conversations (user-specific)
	def load_conversations(api_key): # Defines function that retrieves and displays user's saved conversations
	if not api_key: # Checks if API key is missing, empty, or None
	return "Please enter your OpenAI API key first to view your conversations." # Returns error message prompting for API key

	user_file = get_user_file(api_key) # Generates unique filename for this user based on their API key (e.g., "conversations_abc123.json")

	if os.path.exists(user_file): # Checks if the user's conversation file actually exists on disk
	try: # Begins try block to handle file reading and parsing errors
	with open(user_file, "r") as f: # Opens user's file in read mode; auto-closes when done
	conversations = [json.loads(line) for line in f] # List comprehension: reads each line, parses JSON, creates list of conversation dictionaries

	conv_text = "" # Initializes empty string to build formatted conversation display
	for i, conv in enumerate(conversations): # Loops through conversations with index (i) and conversation data (conv)
	conv_text += f"\n{'='50}\nConversation {i + 1}\n{'='50}\n" # Adds separator line (50 equals signs), conversation number header, and another separator
	timestamp = conv.get("timestamp", "Unknown time") # Retrieves timestamp from conversation dict; defaults to "Unknown time" if key doesn't exist
	conv_text += f"Timestamp: {timestamp}\n\n" # Adds timestamp to output with two newlines for spacing

	messages = conv.get("messages", conv) # Gets messages list from conversation; if "messages" key doesn't exist, uses entire conv dict as fallback
	for message in messages: # Loops through each message dictionary in the messages list
	role = message.get('role', 'unknown') # Extracts role (user/assistant); defaults to 'unknown' if not found
	content = message.get('content', '') # Extracts message content; defaults to empty string if not found
	conv_text += f"{role.upper()}: {content}\n\n" # Adds formatted message with role in uppercase, content, and spacing

	return conv_text if conv_text else "No previous conversations found." # Returns formatted text if any exists; otherwise returns "not found" message (ternary operator)
	except Exception as e: # Catches any errors during file reading or JSON parsing
	return f"Error loading conversations: {str(e)}" # Returns error message with exception details
	return "No previous conversations found for your account." # Returns message if file doesn't exist (user has no saved conversations)

	# Function to clear current conversation
	def clear_conversation(): # Defines function to reset the current chat session
	global conversation_history # Accesses global conversation_history variable to modify it
	conversation_history = [] # Resets conversation_history to empty list, clearing all messages
	return [] # Returns empty list to clear the Gradio chat interface display

	# Create Gradio interface
	with gr.Blocks(title="Chat with Documents 💬 📚", theme=gr.themes.Ocean()) as demo: # Creates Gradio app using Blocks API (custom layout); sets browser tab title and applies Ocean color theme; assigns to 'demo' variable
	gr.Markdown("# Chat with Documents 💬 📚") # Displays large heading text using Markdown syntax (# = h1)
	gr.Markdown("Upload PDF or DOCX files and chat with them using AI!") # Displays instruction text as second line
	gr.Markdown("Privacy Notice: Your conversations are private and tied to your API key. Only you can see your saved conversations.") # Displays privacy notice in bold (text = bold in Markdown)

	with gr.Row(): # Creates horizontal row container to arrange elements side-by-side
	with gr.Column(scale=2): # Creates column inside row with scale=2 (takes 2/3 of width when combined with scale=1 column later)
	api_key_input = gr.Textbox( # Creates text input box for API key
	label="OpenAI API Key", # Sets label displayed above the textbox
	type="password", # Masks input characters with dots/asterisks for security
	placeholder="Enter your OpenAI API key here..." # Shows gray hint text when box is empty
	)

	file_upload = gr.File( # Creates file upload widget
	label="Upload PDF or DOCX files", # Sets label above file upload area
	file_count="multiple", # Allows user to select multiple files at once
	file_types=[".pdf", ".docx"] # Restricts file picker to only show PDF and DOCX files
	)

	load_btn = gr.Button("Load Documents", variant="primary") # Creates button with text "Load Documents"; variant="primary" makes it blue/highlighted
	load_status = gr.Textbox(label="Status", interactive=False) # Creates read-only textbox to display status messages; interactive=False prevents user editing

	load_btn.click( # Defines what happens when load_btn is clicked
	fn=load_data, # Calls load_data function when button clicked
	inputs=[file_upload, api_key_input], # Passes file_upload and api_key_input values as arguments to load_data
	outputs=load_status # Displays return value from load_data in load_status textbox
	)

	with gr.Row(): # Creates another horizontal row below the first one
	with gr.Column(scale=3): # Creates column with scale=3 (takes 3/4 width; main chat area)
	chatbot = gr.Chatbot( # Creates chatbot interface component for displaying conversation
	label="Chat", # Sets label above chat window
	height=400 # Sets chat window height to 400 pixels
	)
	msg = gr.Textbox( # Creates text input for user to type questions
	label="Your Question", # Label displayed above input box
	placeholder="Ask a question about your documents..." # Hint text shown when empty
	)

	with gr.Row(): # Creates row inside column for button group
	submit_btn = gr.Button("Send", variant="primary") # Creates primary (highlighted) Send button
	clear_btn = gr.Button("Clear Chat") # Creates Clear Chat button with default styling

	with gr.Row(): # Creates another row for save functionality
	save_btn = gr.Button("Save Conversation") # Creates button to save chat history
	save_status = gr.Textbox(label="Save Status", interactive=False) # Read-only textbox for save confirmation messages

	with gr.Column(scale=1): # Creates sidebar column with scale=1 (takes 1/4 width; for conversation history)
	gr.Markdown("### Your Previous Conversations") # Displays h3 heading (### = h3 in Markdown)
	load_convs_btn = gr.Button("Load Your Conversations") # Creates button to retrieve saved conversations
	convs_display = gr.Textbox( # Creates large textbox for displaying conversation history
	label="Conversation History", # Label above the textbox
	lines=20, # Sets textbox height to 20 lines of text
	interactive=False # Makes textbox read-only (user cannot edit)
	)
	delete_all_btn = gr.Button("Delete All Your Conversations", variant="stop") # Creates red warning-style button for deletion; variant="stop" makes it red
	delete_status = gr.Textbox(label="Delete Status", interactive=False) # Read-only textbox for deletion confirmation messages

	# Event handlers
	submit_btn.click( # Defines behavior when Send button is clicked
	fn=chat_with_docs, # Calls chat_with_docs function
	inputs=[msg, chatbot, api_key_input], # Passes message text, current chat history, and API key as arguments
	outputs=chatbot # Updates chatbot display with return value (new conversation history)
	).then( # Chains another action after the first completes
	lambda: "", # Anonymous function that returns empty string
	outputs=msg # Clears the message input box after sending
	)

	msg.submit( # Defines behavior when user presses Enter key in message textbox
	fn=chat_with_docs, # Calls same chat function as submit button
	inputs=[msg, chatbot, api_key_input], # Same inputs as submit button
	outputs=chatbot # Updates chat display
	).then( # Chains follow-up action
	lambda: "", # Returns empty string
	outputs=msg # Clears message box after Enter is pressed
	)

	clear_btn.click( # Defines behavior when Clear Chat button clicked
	fn=clear_conversation, # Calls clear_conversation function (resets conversation_history to [])
	outputs=chatbot # Updates chatbot display with empty list (clears visible chat)
	)

	save_btn.click( # Defines behavior when Save Conversation button clicked
	fn=save_conversation, # Calls save_conversation function to write to JSON file
	inputs=[api_key_input], # Passes API key to identify which user's file to save to
	outputs=save_status # Displays success/error message in save_status textbox
	)

	load_convs_btn.click( # Defines behavior when Load Your Conversations button clicked
	fn=load_conversations, # Calls load_conversations function to read from user's JSON file
	inputs=[api_key_input], # Passes API key to identify which user's file to load
	outputs=convs_display # Displays formatted conversation history in convs_display textbox
	)

	delete_all_btn.click( # Defines behavior when Delete All button clicked
	fn=delete_all_conversations, # Calls delete_all_conversations function to remove user's file
	inputs=[api_key_input], # Passes API key to identify which file to delete
	outputs=delete_status # Displays confirmation/error message in delete_status textbox
	)

	if __name__ == "__main__": # Python idiom: only runs code below if script is executed directly (not imported as module)
	demo.launch() # Starts Gradio web server and opens app in browser; makes app accessible at local URL (e.g., http://127.0.0.1:7860)