Spaces:

sanjaymalladi
/

AI_Document_Chat_Assistant

Runtime error

App Files Files Community

AI_Document_Chat_Assistant / app.py

realsanjay

Upload 7 files

bd094f3 verified about 1 year ago

raw

history blame contribute delete

10.2 kB

	import streamlit as st
	import os
	from dotenv import load_dotenv
	from mistralai.client import MistralClient
	from mistralai.models.chat_completion import ChatMessage
	import PyPDF2
	import tempfile
	import time
	from tenacity import retry, stop_after_attempt, wait_exponential

	# Load environment variables
	load_dotenv()

	# Initialize Mistral client with increased timeout
	client = MistralClient(
	api_key=os.environ["MISTRAL_API_KEY"],
	timeout=90 # Increase default timeout to 90 seconds
	)

	# Set page configuration
	st.set_page_config(
	page_title="Mistral AI Document Chat",
	page_icon="📚",
	layout="wide"
	)

	# Add custom CSS
	st.markdown("""
	<style>
	.stTextInput > div > div > input {
	background-color: #f0f2f6;
	}
	.stTextArea > div > div > textarea {
	background-color: #f0f2f6;
	}
	.stProgress > div > div {
	background-color: #00ff00;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize session states
	if "messages" not in st.session_state:
	st.session_state.messages = []
	if "document_content" not in st.session_state:
	st.session_state.document_content = None
	if "notes" not in st.session_state:
	st.session_state.notes = None

	def extract_text_from_pdf(uploaded_file, progress_bar=None):
	pdf_reader = PyPDF2.PdfReader(uploaded_file)
	total_pages = len(pdf_reader.pages)
	text = ""

	for i, page in enumerate(pdf_reader.pages):
	text += page.extract_text() + "\n"
	if progress_bar:
	progress = (i + 1) / total_pages
	progress_bar.progress(progress, f"Extracting page {i + 1}/{total_pages}")

	return text

	def chunk_text(text, max_chunk_size=4000): # Reduced chunk size for better reliability
	"""Split text into smaller chunks with overlap."""
	words = text.split()
	chunks = []
	current_chunk = []
	current_size = 0
	overlap_size = 200 # Number of words to overlap between chunks

	for word in words:
	word_size = len(word) + 1
	if current_size + word_size > max_chunk_size and current_chunk:
	chunk_text = ' '.join(current_chunk)
	chunks.append(chunk_text)
	# Keep last few words for overlap
	current_chunk = current_chunk[-overlap_size:] if len(current_chunk) > overlap_size else current_chunk
	current_size = sum(len(word) + 1 for word in current_chunk)
	current_chunk.append(word)
	current_size += word_size

	if current_chunk:
	chunks.append(' '.join(current_chunk))

	return chunks

	@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
	def call_mistral_with_retry(messages):
	"""Call Mistral API with retry logic"""
	try:
	return client.chat(
	model="mistral-medium",
	messages=messages
	)
	except Exception as e:
	st.warning(f"API call failed, retrying... ({str(e)})")
	raise

	def generate_notes(text):
	try:
	# Split text into chunks if it's too long
	chunks = chunk_text(text)
	all_notes = []

	# Create progress tracking
	progress_bar = st.progress(0)
	status_text = st.empty()

	total_chunks = len(chunks)
	for i, chunk in enumerate(chunks):
	status_text.text(f"Processing part {i + 1} of {total_chunks}")

	try:
	chunk_prompt = f"Part {i+1}/{total_chunks}: Create concise but comprehensive notes from this text section:\n\n{chunk}"

	response = call_mistral_with_retry([
	ChatMessage(
	role="system",
	content="You are an expert at creating clear, concise notes. Focus on key points and main ideas. Use bullet points and clear formatting."
	),
	ChatMessage(
	role="user",
	content=chunk_prompt
	)
	])

	all_notes.append(response.choices[0].message.content)
	progress_bar.progress((i + 1) / total_chunks)

	except Exception as e:
	st.error(f"Error processing chunk {i + 1}: {str(e)}")
	if i > 0: # If we have some notes, continue with what we have
	st.warning("Continuing with partial notes...")
	break
	else:
	raise e

	# Combine notes with progress tracking
	if len(all_notes) > 1:
	status_text.text("Combining all notes...")
	try:
	# Split combined notes if too large
	combined_notes = "\n\n".join(all_notes)
	summary_chunks = chunk_text(combined_notes, max_chunk_size=6000)
	final_notes = []

	for i, summary_chunk in enumerate(summary_chunks):
	status_text.text(f"Summarizing part {i + 1} of {len(summary_chunks)}")

	response = call_mistral_with_retry([
	ChatMessage(
	role="system",
	content="You are an expert at summarizing and organizing notes. Create a clear, well-structured summary that maintains key information while eliminating redundancy."
	),
	ChatMessage(
	role="user",
	content=f"Summarize this section of notes:\n\n{summary_chunk}"
	)
	])
	final_notes.append(response.choices[0].message.content)

	result = "\n\n".join(final_notes)
	except Exception as e:
	st.warning("Error during final summarization. Using concatenated notes instead.")
	result = combined_notes
	else:
	result = all_notes[0] if all_notes else None

	# Clean up progress indicators
	progress_bar.empty()
	status_text.empty()

	return result

	except Exception as e:
	st.error(f"Error generating notes: {str(e)}")
	return None

	# Title
	st.title("📚 Mistral AI Document Chat Assistant")
	st.markdown("---")

	# File upload section
	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	if uploaded_file is not None:
	try:
	# Extract text from PDF
	text = extract_text_from_pdf(uploaded_file)

	# Store the extracted text
	st.session_state.document_content = text

	# Generate and store notes
	if st.button("Generate Notes"):
	with st.spinner("Generating notes... This may take a moment for large documents."):
	notes = generate_notes(text)
	if notes:
	st.session_state.notes = notes
	st.success("Notes generated successfully!")

	except Exception as e:
	st.error(f"Error processing file: {str(e)}")

	# Display notes if available
	if st.session_state.notes:
	st.markdown("### Generated Notes")
	st.markdown(st.session_state.notes)
	st.markdown("---")

	# Chat interface
	st.markdown("### Chat with your Document")

	# Display chat messages
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Chat input
	if prompt := st.chat_input("Ask questions about your document..."):
	if st.session_state.document_content is None:
	st.warning("Please upload a document first!")
	else:
	st.session_state.messages.append({"role": "user", "content": prompt})
	with st.chat_message("user"):
	st.markdown(prompt)

	with st.chat_message("assistant"):
	message_placeholder = st.empty()

	try:
	doc_excerpt = st.session_state.document_content[:4000] # Reduced context size
	context = f"""Document excerpt: {doc_excerpt}...

	Generated notes: {st.session_state.notes if st.session_state.notes else 'No notes generated yet'}

	Please answer the following question about the document: {prompt}"""

	response = call_mistral_with_retry([
	ChatMessage(
	role="system",
	content="You are an expert at analyzing documents and answering questions about their content. Provide detailed, accurate answers based on the document content and notes provided."
	),
	ChatMessage(role="user", content=context)
	])

	assistant_response = response.choices[0].message.content
	message_placeholder.markdown(assistant_response)
	st.session_state.messages.append(
	{"role": "assistant", "content": assistant_response}
	)

	except Exception as e:
	message_placeholder.error(f"Error: {str(e)}")

	# Sidebar
	with st.sidebar:
	st.title("About")
	st.markdown("""
	This is a document analysis and chat interface powered by Mistral AI.

	### Features:
	- Upload PDF files
	- Generate comprehensive notes
	- Chat about document content
	- Real-time AI responses

	### How to use:
	1. Upload your PDF document
	2. Generate notes (optional)
	3. Ask questions about the content
	4. Get AI-powered responses
	""")

	# Clear chat and document button
	if st.button("Clear All"):
	st.session_state.messages = []
	st.session_state.document_content = None
	st.session_state.notes = None
	st.rerun()