Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import os | |
| from dotenv import load_dotenv | |
| from mistralai.client import MistralClient | |
| from mistralai.models.chat_completion import ChatMessage | |
| import PyPDF2 | |
| import tempfile | |
| import time | |
| from tenacity import retry, stop_after_attempt, wait_exponential | |
| # Load environment variables | |
| load_dotenv() | |
| # Initialize Mistral client with increased timeout | |
| client = MistralClient( | |
| api_key=os.environ["MISTRAL_API_KEY"], | |
| timeout=90 # Increase default timeout to 90 seconds | |
| ) | |
| # Set page configuration | |
| st.set_page_config( | |
| page_title="Mistral AI Document Chat", | |
| page_icon="๐", | |
| layout="wide" | |
| ) | |
| # Add custom CSS | |
| st.markdown(""" | |
| <style> | |
| .stTextInput > div > div > input { | |
| background-color: #f0f2f6; | |
| } | |
| .stTextArea > div > div > textarea { | |
| background-color: #f0f2f6; | |
| } | |
| .stProgress > div > div { | |
| background-color: #00ff00; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Initialize session states | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| if "document_content" not in st.session_state: | |
| st.session_state.document_content = None | |
| if "notes" not in st.session_state: | |
| st.session_state.notes = None | |
| def extract_text_from_pdf(uploaded_file, progress_bar=None): | |
| pdf_reader = PyPDF2.PdfReader(uploaded_file) | |
| total_pages = len(pdf_reader.pages) | |
| text = "" | |
| for i, page in enumerate(pdf_reader.pages): | |
| text += page.extract_text() + "\n" | |
| if progress_bar: | |
| progress = (i + 1) / total_pages | |
| progress_bar.progress(progress, f"Extracting page {i + 1}/{total_pages}") | |
| return text | |
| def chunk_text(text, max_chunk_size=4000): # Reduced chunk size for better reliability | |
| """Split text into smaller chunks with overlap.""" | |
| words = text.split() | |
| chunks = [] | |
| current_chunk = [] | |
| current_size = 0 | |
| overlap_size = 200 # Number of words to overlap between chunks | |
| for word in words: | |
| word_size = len(word) + 1 | |
| if current_size + word_size > max_chunk_size and current_chunk: | |
| chunk_text = ' '.join(current_chunk) | |
| chunks.append(chunk_text) | |
| # Keep last few words for overlap | |
| current_chunk = current_chunk[-overlap_size:] if len(current_chunk) > overlap_size else current_chunk | |
| current_size = sum(len(word) + 1 for word in current_chunk) | |
| current_chunk.append(word) | |
| current_size += word_size | |
| if current_chunk: | |
| chunks.append(' '.join(current_chunk)) | |
| return chunks | |
| def call_mistral_with_retry(messages): | |
| """Call Mistral API with retry logic""" | |
| try: | |
| return client.chat( | |
| model="mistral-medium", | |
| messages=messages | |
| ) | |
| except Exception as e: | |
| st.warning(f"API call failed, retrying... ({str(e)})") | |
| raise | |
| def generate_notes(text): | |
| try: | |
| # Split text into chunks if it's too long | |
| chunks = chunk_text(text) | |
| all_notes = [] | |
| # Create progress tracking | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| total_chunks = len(chunks) | |
| for i, chunk in enumerate(chunks): | |
| status_text.text(f"Processing part {i + 1} of {total_chunks}") | |
| try: | |
| chunk_prompt = f"Part {i+1}/{total_chunks}: Create concise but comprehensive notes from this text section:\n\n{chunk}" | |
| response = call_mistral_with_retry([ | |
| ChatMessage( | |
| role="system", | |
| content="You are an expert at creating clear, concise notes. Focus on key points and main ideas. Use bullet points and clear formatting." | |
| ), | |
| ChatMessage( | |
| role="user", | |
| content=chunk_prompt | |
| ) | |
| ]) | |
| all_notes.append(response.choices[0].message.content) | |
| progress_bar.progress((i + 1) / total_chunks) | |
| except Exception as e: | |
| st.error(f"Error processing chunk {i + 1}: {str(e)}") | |
| if i > 0: # If we have some notes, continue with what we have | |
| st.warning("Continuing with partial notes...") | |
| break | |
| else: | |
| raise e | |
| # Combine notes with progress tracking | |
| if len(all_notes) > 1: | |
| status_text.text("Combining all notes...") | |
| try: | |
| # Split combined notes if too large | |
| combined_notes = "\n\n".join(all_notes) | |
| summary_chunks = chunk_text(combined_notes, max_chunk_size=6000) | |
| final_notes = [] | |
| for i, summary_chunk in enumerate(summary_chunks): | |
| status_text.text(f"Summarizing part {i + 1} of {len(summary_chunks)}") | |
| response = call_mistral_with_retry([ | |
| ChatMessage( | |
| role="system", | |
| content="You are an expert at summarizing and organizing notes. Create a clear, well-structured summary that maintains key information while eliminating redundancy." | |
| ), | |
| ChatMessage( | |
| role="user", | |
| content=f"Summarize this section of notes:\n\n{summary_chunk}" | |
| ) | |
| ]) | |
| final_notes.append(response.choices[0].message.content) | |
| result = "\n\n".join(final_notes) | |
| except Exception as e: | |
| st.warning("Error during final summarization. Using concatenated notes instead.") | |
| result = combined_notes | |
| else: | |
| result = all_notes[0] if all_notes else None | |
| # Clean up progress indicators | |
| progress_bar.empty() | |
| status_text.empty() | |
| return result | |
| except Exception as e: | |
| st.error(f"Error generating notes: {str(e)}") | |
| return None | |
| # Title | |
| st.title("๐ Mistral AI Document Chat Assistant") | |
| st.markdown("---") | |
| # File upload section | |
| uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
| if uploaded_file is not None: | |
| try: | |
| # Extract text from PDF | |
| text = extract_text_from_pdf(uploaded_file) | |
| # Store the extracted text | |
| st.session_state.document_content = text | |
| # Generate and store notes | |
| if st.button("Generate Notes"): | |
| with st.spinner("Generating notes... This may take a moment for large documents."): | |
| notes = generate_notes(text) | |
| if notes: | |
| st.session_state.notes = notes | |
| st.success("Notes generated successfully!") | |
| except Exception as e: | |
| st.error(f"Error processing file: {str(e)}") | |
| # Display notes if available | |
| if st.session_state.notes: | |
| st.markdown("### Generated Notes") | |
| st.markdown(st.session_state.notes) | |
| st.markdown("---") | |
| # Chat interface | |
| st.markdown("### Chat with your Document") | |
| # Display chat messages | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| # Chat input | |
| if prompt := st.chat_input("Ask questions about your document..."): | |
| if st.session_state.document_content is None: | |
| st.warning("Please upload a document first!") | |
| else: | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| with st.chat_message("assistant"): | |
| message_placeholder = st.empty() | |
| try: | |
| doc_excerpt = st.session_state.document_content[:4000] # Reduced context size | |
| context = f"""Document excerpt: {doc_excerpt}... | |
| Generated notes: {st.session_state.notes if st.session_state.notes else 'No notes generated yet'} | |
| Please answer the following question about the document: {prompt}""" | |
| response = call_mistral_with_retry([ | |
| ChatMessage( | |
| role="system", | |
| content="You are an expert at analyzing documents and answering questions about their content. Provide detailed, accurate answers based on the document content and notes provided." | |
| ), | |
| ChatMessage(role="user", content=context) | |
| ]) | |
| assistant_response = response.choices[0].message.content | |
| message_placeholder.markdown(assistant_response) | |
| st.session_state.messages.append( | |
| {"role": "assistant", "content": assistant_response} | |
| ) | |
| except Exception as e: | |
| message_placeholder.error(f"Error: {str(e)}") | |
| # Sidebar | |
| with st.sidebar: | |
| st.title("About") | |
| st.markdown(""" | |
| This is a document analysis and chat interface powered by Mistral AI. | |
| ### Features: | |
| - Upload PDF files | |
| - Generate comprehensive notes | |
| - Chat about document content | |
| - Real-time AI responses | |
| ### How to use: | |
| 1. Upload your PDF document | |
| 2. Generate notes (optional) | |
| 3. Ask questions about the content | |
| 4. Get AI-powered responses | |
| """) | |
| # Clear chat and document button | |
| if st.button("Clear All"): | |
| st.session_state.messages = [] | |
| st.session_state.document_content = None | |
| st.session_state.notes = None | |
| st.rerun() |