import streamlit as st import os from dotenv import load_dotenv from mistralai.client import MistralClient from mistralai.models.chat_completion import ChatMessage import PyPDF2 import tempfile import time from tenacity import retry, stop_after_attempt, wait_exponential # Load environment variables load_dotenv() # Initialize Mistral client with increased timeout client = MistralClient( api_key=os.environ["MISTRAL_API_KEY"], timeout=90 # Increase default timeout to 90 seconds ) # Set page configuration st.set_page_config( page_title="Mistral AI Document Chat", page_icon="📚", layout="wide" ) # Add custom CSS st.markdown(""" """, unsafe_allow_html=True) # Initialize session states if "messages" not in st.session_state: st.session_state.messages = [] if "document_content" not in st.session_state: st.session_state.document_content = None if "notes" not in st.session_state: st.session_state.notes = None def extract_text_from_pdf(uploaded_file, progress_bar=None): pdf_reader = PyPDF2.PdfReader(uploaded_file) total_pages = len(pdf_reader.pages) text = "" for i, page in enumerate(pdf_reader.pages): text += page.extract_text() + "\n" if progress_bar: progress = (i + 1) / total_pages progress_bar.progress(progress, f"Extracting page {i + 1}/{total_pages}") return text def chunk_text(text, max_chunk_size=4000): # Reduced chunk size for better reliability """Split text into smaller chunks with overlap.""" words = text.split() chunks = [] current_chunk = [] current_size = 0 overlap_size = 200 # Number of words to overlap between chunks for word in words: word_size = len(word) + 1 if current_size + word_size > max_chunk_size and current_chunk: chunk_text = ' '.join(current_chunk) chunks.append(chunk_text) # Keep last few words for overlap current_chunk = current_chunk[-overlap_size:] if len(current_chunk) > overlap_size else current_chunk current_size = sum(len(word) + 1 for word in current_chunk) current_chunk.append(word) current_size += word_size if current_chunk: chunks.append(' '.join(current_chunk)) return chunks @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def call_mistral_with_retry(messages): """Call Mistral API with retry logic""" try: return client.chat( model="mistral-medium", messages=messages ) except Exception as e: st.warning(f"API call failed, retrying... ({str(e)})") raise def generate_notes(text): try: # Split text into chunks if it's too long chunks = chunk_text(text) all_notes = [] # Create progress tracking progress_bar = st.progress(0) status_text = st.empty() total_chunks = len(chunks) for i, chunk in enumerate(chunks): status_text.text(f"Processing part {i + 1} of {total_chunks}") try: chunk_prompt = f"Part {i+1}/{total_chunks}: Create concise but comprehensive notes from this text section:\n\n{chunk}" response = call_mistral_with_retry([ ChatMessage( role="system", content="You are an expert at creating clear, concise notes. Focus on key points and main ideas. Use bullet points and clear formatting." ), ChatMessage( role="user", content=chunk_prompt ) ]) all_notes.append(response.choices[0].message.content) progress_bar.progress((i + 1) / total_chunks) except Exception as e: st.error(f"Error processing chunk {i + 1}: {str(e)}") if i > 0: # If we have some notes, continue with what we have st.warning("Continuing with partial notes...") break else: raise e # Combine notes with progress tracking if len(all_notes) > 1: status_text.text("Combining all notes...") try: # Split combined notes if too large combined_notes = "\n\n".join(all_notes) summary_chunks = chunk_text(combined_notes, max_chunk_size=6000) final_notes = [] for i, summary_chunk in enumerate(summary_chunks): status_text.text(f"Summarizing part {i + 1} of {len(summary_chunks)}") response = call_mistral_with_retry([ ChatMessage( role="system", content="You are an expert at summarizing and organizing notes. Create a clear, well-structured summary that maintains key information while eliminating redundancy." ), ChatMessage( role="user", content=f"Summarize this section of notes:\n\n{summary_chunk}" ) ]) final_notes.append(response.choices[0].message.content) result = "\n\n".join(final_notes) except Exception as e: st.warning("Error during final summarization. Using concatenated notes instead.") result = combined_notes else: result = all_notes[0] if all_notes else None # Clean up progress indicators progress_bar.empty() status_text.empty() return result except Exception as e: st.error(f"Error generating notes: {str(e)}") return None # Title st.title("📚 Mistral AI Document Chat Assistant") st.markdown("---") # File upload section uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: try: # Extract text from PDF text = extract_text_from_pdf(uploaded_file) # Store the extracted text st.session_state.document_content = text # Generate and store notes if st.button("Generate Notes"): with st.spinner("Generating notes... This may take a moment for large documents."): notes = generate_notes(text) if notes: st.session_state.notes = notes st.success("Notes generated successfully!") except Exception as e: st.error(f"Error processing file: {str(e)}") # Display notes if available if st.session_state.notes: st.markdown("### Generated Notes") st.markdown(st.session_state.notes) st.markdown("---") # Chat interface st.markdown("### Chat with your Document") # Display chat messages for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Chat input if prompt := st.chat_input("Ask questions about your document..."): if st.session_state.document_content is None: st.warning("Please upload a document first!") else: st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): message_placeholder = st.empty() try: doc_excerpt = st.session_state.document_content[:4000] # Reduced context size context = f"""Document excerpt: {doc_excerpt}... Generated notes: {st.session_state.notes if st.session_state.notes else 'No notes generated yet'} Please answer the following question about the document: {prompt}""" response = call_mistral_with_retry([ ChatMessage( role="system", content="You are an expert at analyzing documents and answering questions about their content. Provide detailed, accurate answers based on the document content and notes provided." ), ChatMessage(role="user", content=context) ]) assistant_response = response.choices[0].message.content message_placeholder.markdown(assistant_response) st.session_state.messages.append( {"role": "assistant", "content": assistant_response} ) except Exception as e: message_placeholder.error(f"Error: {str(e)}") # Sidebar with st.sidebar: st.title("About") st.markdown(""" This is a document analysis and chat interface powered by Mistral AI. ### Features: - Upload PDF files - Generate comprehensive notes - Chat about document content - Real-time AI responses ### How to use: 1. Upload your PDF document 2. Generate notes (optional) 3. Ask questions about the content 4. Get AI-powered responses """) # Clear chat and document button if st.button("Clear All"): st.session_state.messages = [] st.session_state.document_content = None st.session_state.notes = None st.rerun()