import streamlit as st import os import time import hashlib from pathlib import Path from streamlit import config # ─── Page Config ────────────────────────────────────────────────────────────── st.set_page_config( page_title="DocMind AI – Multimodal RAG", page_icon="🧠", layout="wide", initial_sidebar_state="expanded", ) config.set_option("server.enableCORS", False) config.set_option("server.enableXsrfProtection", False) MAX_FILES = 5 # ─── CSS ────────────────────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) # ─── Cache RAG engine ───────────────────────────────────────────────────────── @st.cache_resource(show_spinner=False) def load_rag_engine(): from rag_engine import RAGEngine return RAGEngine() # ─── Session state ──────────────────────────────────────────────────────────── defaults = { "messages": [], "processed_files": {}, # {filename: md5_hash} } for k, v in defaults.items(): if k not in st.session_state: st.session_state[k] = v def file_type_badge(suffix: str) -> str: m = { ".pdf": ("pdf", "PDF"), ".txt": ("text", "TXT"), ".docx": ("docx", "DOCX"), ".doc": ("docx", "DOC"), ".csv": ("csv", "CSV"), ".xlsx": ("excel", "XLSX"), ".xls": ("excel", "XLS"), ".jpg": ("image", "IMAGE"), ".jpeg": ("image", "IMAGE"), ".png": ("image", "IMAGE"), ".webp": ("image", "IMAGE"), } cls, label = m.get(suffix, ("text", suffix.upper())) return f'{label}' def type_emoji(suffix: str) -> str: m = { ".pdf": "📄", ".txt": "📄", ".docx": "📝", ".doc": "📝", ".csv": "📊", ".xlsx": "📊", ".xls": "📊", ".jpg": "🖼️", ".jpeg": "🖼️", ".png": "🖼️", ".webp": "🖼️", } return m.get(suffix, "📄") # ─── Load RAG engine & get document state ───────────────────────────────────── rag = load_rag_engine() documents = rag.get_documents() # [{name, type, chunk_count}] doc_loaded = len(documents) > 0 total_chunks = rag.get_total_chunks() file_count = rag.get_file_count() # ─── Sidebar ────────────────────────────────────────────────────────────────── with st.sidebar: st.markdown('

🧠 DocMind AI

', unsafe_allow_html=True) st.markdown('

Multimodal RAG · Multi-File · Memory

', unsafe_allow_html=True) st.markdown("---") # ── Document List ───────────────────────────────────────────────────────── if documents: mem_count = rag.get_memory_count() st.markdown( f'✓ Ready ' f'{file_count}/{MAX_FILES} files', unsafe_allow_html=True, ) st.markdown( f'

' f'{total_chunks} total chunks · {mem_count} exchanges in memory

', unsafe_allow_html=True, ) st.markdown("") # Show each document with a remove button for doc in documents: col_doc, col_rm = st.columns([5, 1]) with col_doc: badge = file_type_badge(doc["type"]) emoji = type_emoji(doc["type"]) st.markdown( f'

' f'{badge} {doc["name"]}' f'
' f'{emoji} {doc["chunk_count"]} chunks' f'

', unsafe_allow_html=True, ) with col_rm: st.markdown('

', unsafe_allow_html=True) if st.button("❌", key=f"rm_{doc['name']}", help=f"Remove {doc['name']}"): rag.remove_file(doc["name"]) # Remove from processed_files tracking st.session_state.processed_files = { k: v for k, v in st.session_state.processed_files.items() if k != doc["name"] } st.rerun() else: st.markdown('○ No documents loaded', unsafe_allow_html=True) st.markdown("---") # ── Upload Area ─────────────────────────────────────────────────────────── st.markdown( '

' 'Upload Document

', unsafe_allow_html=True, ) st.markdown( '

' 'PDF · TXT · DOCX · CSV · XLSX · JPG · PNG

', unsafe_allow_html=True, ) if file_count >= MAX_FILES: st.warning(f"Maximum {MAX_FILES} files reached. Remove a file to upload more.") uploaded_file = None else: uploaded_file = st.file_uploader( "Upload", type=["pdf", "txt", "docx", "doc", "csv", "xlsx", "xls", "jpg", "jpeg", "png", "webp"], label_visibility="collapsed", ) if uploaded_file: file_hash = hashlib.md5(uploaded_file.read()).hexdigest() uploaded_file.seek(0) # Check if this exact file (by hash) was already processed already_processed = file_hash in st.session_state.processed_files.values() if not already_processed: suffix = Path(uploaded_file.name).suffix.lower() type_msg = { ".pdf": "Reading PDF...", ".txt": "Reading text...", ".docx": "Reading Word doc...", ".csv": "Parsing CSV...", ".xlsx": "Parsing Excel...", ".xls": "Parsing Excel...", ".jpg": "🖼️ Processing image (OCR + captioning)...", ".jpeg": "🖼️ Processing image (OCR + captioning)...", ".png": "🖼️ Processing image (OCR + captioning)...", ".webp": "🖼️ Processing image (OCR + captioning)...", }.get(suffix, "Processing...") with st.spinner(type_msg): try: chunks = rag.ingest_file(uploaded_file) st.session_state.processed_files[uploaded_file.name] = file_hash st.success(f"✓ Indexed {chunks} chunks from {uploaded_file.name}!") st.rerun() except ValueError as e: st.error(str(e)) except Exception as e: st.error(f"Failed to process file: {e}") st.markdown("---") # ── Sample doc ──────────────────────────────────────────────────────────── st.markdown( '

' 'Or try a sample

', unsafe_allow_html=True, ) if st.button("📥 Load Sample: AI Report", use_container_width=True): if file_count >= MAX_FILES: st.error(f"Maximum {MAX_FILES} files reached. Remove a file first.") else: with st.spinner("Downloading sample..."): from data_downloader import download_sample_doc path, name = download_sample_doc() try: chunks = rag.ingest_path(path, name) st.session_state.processed_files[name] = "sample" st.success(f"✓ {chunks} chunks loaded!") st.rerun() except ValueError as e: st.error(str(e)) st.markdown("---") # ── Action buttons ──────────────────────────────────────────────────────── col_a, col_b = st.columns(2) with col_a: if st.button("🗑️ Clear Chat", use_container_width=True): st.session_state.messages = [] rag.clear_memory() st.rerun() with col_b: if st.button("🔄 Reset All", use_container_width=True): rag.reset() st.session_state.messages = [] st.session_state.processed_files = {} st.rerun() st.markdown("---") st.markdown("""

Stack
🔗 LangChain · ChromaDB
🤗 MiniLM Embeddings
🦙 Llama-3 / Mistral-7B
🖼️ BLIP + VLM Captioning
💬 Conversation Memory
📁 Up to 5 files simultaneously
🌊 Streamlit + FastAPI

""", unsafe_allow_html=True) # ─── Main Area ──────────────────────────────────────────────────────────────── st.markdown('

DocMind AI

', unsafe_allow_html=True) st.markdown( '

' 'PDF · Word · CSV · Excel · Images — Upload up to 5 files. Ask anything. Remembers your conversation.' '

', unsafe_allow_html=True, ) # ── Stats ───────────────────────────────────────────────────────────────────── c1, c2, c3, c4 = st.columns(4) with c1: st.markdown( f'

' f'

{total_chunks or "—"}

' f'

Chunks Indexed

', unsafe_allow_html=True, ) with c2: st.markdown( f'

' f'

{file_count}/{MAX_FILES}

' f'

Files Loaded

', unsafe_allow_html=True, ) with c3: st.markdown( f'

' f'

{len(st.session_state.messages) // 2}

' f'

Questions Asked

', unsafe_allow_html=True, ) with c4: st.markdown( f'

' f'

{rag.get_memory_count()}

' f'

Memory Window

', unsafe_allow_html=True, ) st.markdown("
", unsafe_allow_html=True) # ─── Chat history ───────────────────────────────────────────────────────────── if not st.session_state.messages: if doc_loaded: # Show loaded files summary file_names = ", ".join(f"{d['name']}" for d in documents) emojis = " ".join(set(type_emoji(d["type"]) for d in documents)) st.markdown(f"""

{emojis}

{file_count} document{'s' if file_count > 1 else ''} ready!

Ask anything about {file_names}

I'll remember your conversation — ask follow-up questions naturally. {'You can also upload more files (up to 5).' if file_count < MAX_FILES else ''}

""", unsafe_allow_html=True) else: st.markdown("""

🧠

Multimodal RAG — Upload up to 5 files

📄 PDF · 📝 Word · 📊 CSV/Excel · 🖼️ Images

Upload in the sidebar or load the sample AI report to get started.
You can upload multiple files and ask questions across all of them.

""", unsafe_allow_html=True) else: for msg in st.session_state.messages: if msg["role"] == "user": st.markdown(f"""

You

{msg["content"]}

""", unsafe_allow_html=True) else: mem = msg.get("memory_count", 0) mem_badge = f'💬 {mem} in memory' if mem > 0 else "" sources_html = "" if msg.get("sources"): pills = "".join(f'📎 {s}' for s in msg["sources"]) sources_html = f'

{pills}

' st.markdown(f"""

DocMind AI {mem_badge}

{msg["content"]} {sources_html}

""", unsafe_allow_html=True) # ─── Chat Input ─────────────────────────────────────────────────────────────── st.markdown("
", unsafe_allow_html=True) if not doc_loaded: st.chat_input("Upload a document first...", disabled=True) else: # Build a placeholder based on loaded file types loaded_types = set(d["type"] for d in documents) image_exts = {".jpg", ".jpeg", ".png", ".webp"} table_exts = {".csv", ".xlsx", ".xls"} if file_count == 1: doc_type = documents[0]["type"] placeholder = { ".pdf": "Ask anything about this PDF...", ".txt": "Ask anything about this text...", ".docx": "Ask anything about this document...", ".doc": "Ask anything about this document...", ".csv": "Ask about the data, columns, or statistics...", ".xlsx": "Ask about the spreadsheet data...", ".xls": "Ask about the spreadsheet data...", ".jpg": "Ask me what I see in this image...", ".jpeg": "Ask me what I see in this image...", ".png": "Ask me what I see in this image...", ".webp": "Ask me what I see in this image...", }.get(doc_type, "Ask anything about your document...") else: placeholder = f"Ask anything about your {file_count} documents..." if prompt := st.chat_input(placeholder): st.session_state.messages.append({"role": "user", "content": prompt}) with st.spinner("🔍 Retrieving & generating..."): answer, sources = rag.query(prompt) mem_count = rag.get_memory_count() st.session_state.messages.append({ "role": "assistant", "content": answer, "sources": sources, "memory_count": mem_count, }) st.rerun()