realsanjay's picture
Upload 7 files
bd094f3 verified
import streamlit as st
import os
from dotenv import load_dotenv
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
import PyPDF2
import tempfile
import time
from tenacity import retry, stop_after_attempt, wait_exponential
# Load environment variables
load_dotenv()
# Initialize Mistral client with increased timeout
client = MistralClient(
api_key=os.environ["MISTRAL_API_KEY"],
timeout=90 # Increase default timeout to 90 seconds
)
# Set page configuration
st.set_page_config(
page_title="Mistral AI Document Chat",
page_icon="๐Ÿ“š",
layout="wide"
)
# Add custom CSS
st.markdown("""
<style>
.stTextInput > div > div > input {
background-color: #f0f2f6;
}
.stTextArea > div > div > textarea {
background-color: #f0f2f6;
}
.stProgress > div > div {
background-color: #00ff00;
}
</style>
""", unsafe_allow_html=True)
# Initialize session states
if "messages" not in st.session_state:
st.session_state.messages = []
if "document_content" not in st.session_state:
st.session_state.document_content = None
if "notes" not in st.session_state:
st.session_state.notes = None
def extract_text_from_pdf(uploaded_file, progress_bar=None):
pdf_reader = PyPDF2.PdfReader(uploaded_file)
total_pages = len(pdf_reader.pages)
text = ""
for i, page in enumerate(pdf_reader.pages):
text += page.extract_text() + "\n"
if progress_bar:
progress = (i + 1) / total_pages
progress_bar.progress(progress, f"Extracting page {i + 1}/{total_pages}")
return text
def chunk_text(text, max_chunk_size=4000): # Reduced chunk size for better reliability
"""Split text into smaller chunks with overlap."""
words = text.split()
chunks = []
current_chunk = []
current_size = 0
overlap_size = 200 # Number of words to overlap between chunks
for word in words:
word_size = len(word) + 1
if current_size + word_size > max_chunk_size and current_chunk:
chunk_text = ' '.join(current_chunk)
chunks.append(chunk_text)
# Keep last few words for overlap
current_chunk = current_chunk[-overlap_size:] if len(current_chunk) > overlap_size else current_chunk
current_size = sum(len(word) + 1 for word in current_chunk)
current_chunk.append(word)
current_size += word_size
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def call_mistral_with_retry(messages):
"""Call Mistral API with retry logic"""
try:
return client.chat(
model="mistral-medium",
messages=messages
)
except Exception as e:
st.warning(f"API call failed, retrying... ({str(e)})")
raise
def generate_notes(text):
try:
# Split text into chunks if it's too long
chunks = chunk_text(text)
all_notes = []
# Create progress tracking
progress_bar = st.progress(0)
status_text = st.empty()
total_chunks = len(chunks)
for i, chunk in enumerate(chunks):
status_text.text(f"Processing part {i + 1} of {total_chunks}")
try:
chunk_prompt = f"Part {i+1}/{total_chunks}: Create concise but comprehensive notes from this text section:\n\n{chunk}"
response = call_mistral_with_retry([
ChatMessage(
role="system",
content="You are an expert at creating clear, concise notes. Focus on key points and main ideas. Use bullet points and clear formatting."
),
ChatMessage(
role="user",
content=chunk_prompt
)
])
all_notes.append(response.choices[0].message.content)
progress_bar.progress((i + 1) / total_chunks)
except Exception as e:
st.error(f"Error processing chunk {i + 1}: {str(e)}")
if i > 0: # If we have some notes, continue with what we have
st.warning("Continuing with partial notes...")
break
else:
raise e
# Combine notes with progress tracking
if len(all_notes) > 1:
status_text.text("Combining all notes...")
try:
# Split combined notes if too large
combined_notes = "\n\n".join(all_notes)
summary_chunks = chunk_text(combined_notes, max_chunk_size=6000)
final_notes = []
for i, summary_chunk in enumerate(summary_chunks):
status_text.text(f"Summarizing part {i + 1} of {len(summary_chunks)}")
response = call_mistral_with_retry([
ChatMessage(
role="system",
content="You are an expert at summarizing and organizing notes. Create a clear, well-structured summary that maintains key information while eliminating redundancy."
),
ChatMessage(
role="user",
content=f"Summarize this section of notes:\n\n{summary_chunk}"
)
])
final_notes.append(response.choices[0].message.content)
result = "\n\n".join(final_notes)
except Exception as e:
st.warning("Error during final summarization. Using concatenated notes instead.")
result = combined_notes
else:
result = all_notes[0] if all_notes else None
# Clean up progress indicators
progress_bar.empty()
status_text.empty()
return result
except Exception as e:
st.error(f"Error generating notes: {str(e)}")
return None
# Title
st.title("๐Ÿ“š Mistral AI Document Chat Assistant")
st.markdown("---")
# File upload section
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
try:
# Extract text from PDF
text = extract_text_from_pdf(uploaded_file)
# Store the extracted text
st.session_state.document_content = text
# Generate and store notes
if st.button("Generate Notes"):
with st.spinner("Generating notes... This may take a moment for large documents."):
notes = generate_notes(text)
if notes:
st.session_state.notes = notes
st.success("Notes generated successfully!")
except Exception as e:
st.error(f"Error processing file: {str(e)}")
# Display notes if available
if st.session_state.notes:
st.markdown("### Generated Notes")
st.markdown(st.session_state.notes)
st.markdown("---")
# Chat interface
st.markdown("### Chat with your Document")
# Display chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Chat input
if prompt := st.chat_input("Ask questions about your document..."):
if st.session_state.document_content is None:
st.warning("Please upload a document first!")
else:
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
try:
doc_excerpt = st.session_state.document_content[:4000] # Reduced context size
context = f"""Document excerpt: {doc_excerpt}...
Generated notes: {st.session_state.notes if st.session_state.notes else 'No notes generated yet'}
Please answer the following question about the document: {prompt}"""
response = call_mistral_with_retry([
ChatMessage(
role="system",
content="You are an expert at analyzing documents and answering questions about their content. Provide detailed, accurate answers based on the document content and notes provided."
),
ChatMessage(role="user", content=context)
])
assistant_response = response.choices[0].message.content
message_placeholder.markdown(assistant_response)
st.session_state.messages.append(
{"role": "assistant", "content": assistant_response}
)
except Exception as e:
message_placeholder.error(f"Error: {str(e)}")
# Sidebar
with st.sidebar:
st.title("About")
st.markdown("""
This is a document analysis and chat interface powered by Mistral AI.
### Features:
- Upload PDF files
- Generate comprehensive notes
- Chat about document content
- Real-time AI responses
### How to use:
1. Upload your PDF document
2. Generate notes (optional)
3. Ask questions about the content
4. Get AI-powered responses
""")
# Clear chat and document button
if st.button("Clear All"):
st.session_state.messages = []
st.session_state.document_content = None
st.session_state.notes = None
st.rerun()