| | import os |
| | import streamlit as st |
| | from datetime import datetime |
| | import re |
| | from werkzeug.utils import secure_filename |
| | import fitz |
| | import base64 |
| |
|
| | from src.gpp import GPP, GPPConfig |
| | from src.qa import AnswerGenerator |
| |
|
| | |
| | st.set_page_config( |
| | page_title="Document Intelligence", |
| | page_icon="🤖", |
| | layout="wide" |
| | ) |
| |
|
| | |
| | if 'chat_history' not in st.session_state: |
| | st.session_state.chat_history = [] |
| | if 'parsed_info' not in st.session_state: |
| | st.session_state.parsed_info = None |
| | if "selected_chunks" not in st.session_state: |
| | st.session_state.selected_chunks = [] |
| |
|
| | |
| | st.markdown( |
| | """ |
| | <style> |
| | /* Main app background */ |
| | .stApp { |
| | background-color: #121212; /* Dark background */ |
| | color: #EAEAEA; /* Light text */ |
| | } |
| | |
| | /* Ensure all text in the main content area is light */ |
| | .st-emotion-cache-16txtl3, |
| | .st-emotion-cache-16txtl3 h1, |
| | .st-emotion-cache-16txtl3 h2, |
| | .st-emotion-cache-16txtl3 h3 { |
| | color: #EAEAEA; |
| | } |
| | |
| | /* Sidebar adjustments */ |
| | .st-emotion-cache-16txtl3 { |
| | padding-top: 2rem; |
| | } |
| | |
| | /* Main chat window container */ |
| | .chat-window { |
| | height: 75vh; |
| | background: #1E1E1E; /* Slightly lighter dark for chat window */ |
| | border-radius: 10px; |
| | box-shadow: 0 4px 8px rgba(0,0,0,0.4); |
| | display: flex; |
| | flex-direction: column; |
| | overflow: hidden; |
| | } |
| | |
| | /* Chat message history */ |
| | .chat-history { |
| | flex-grow: 1; |
| | overflow-y: auto; |
| | padding: 20px; |
| | display: flex; |
| | flex-direction: column; |
| | gap: 15px; |
| | } |
| | |
| | /* General message styling */ |
| | .message-row { |
| | display: flex; |
| | align-items: flex-end; |
| | gap: 10px; |
| | } |
| | |
| | /* Assistant message alignment */ |
| | .assistant-row { |
| | justify-content: flex-start; |
| | } |
| | |
| | /* User message alignment */ |
| | .user-row { |
| | justify-content: flex-end; |
| | } |
| | |
| | /* Avatar styling */ |
| | .avatar { |
| | width: 40px; |
| | height: 40px; |
| | border-radius: 50%; |
| | display: flex; |
| | align-items: center; |
| | justify-content: center; |
| | font-size: 20px; |
| | background-color: #3A3B3C; /* Dark gray for avatar */ |
| | color: white; |
| | } |
| | |
| | /* Chat bubble styling */ |
| | .message-bubble { |
| | max-width: 70%; |
| | padding: 10px 15px; |
| | border-radius: 18px; |
| | overflow-wrap: break-word; |
| | color: #EAEAEA; /* Light text for all bubbles */ |
| | } |
| | |
| | .message-bubble p { |
| | margin: 0; |
| | } |
| | |
| | /* Assistant bubble color */ |
| | .assistant-bubble { |
| | background-color: #3A3B3C; /* Dark gray for assistant */ |
| | } |
| | |
| | /* User bubble color */ |
| | .user-bubble { |
| | background-color: #0084FF; |
| | color: white; /* White text for user bubble */ |
| | } |
| | |
| | /* Chat input container */ |
| | .chat-input-container { |
| | padding: 15px 20px; |
| | background: #1E1E1E; /* Match chat window background */ |
| | border-top: 1px solid #3A3B3C; |
| | } |
| | |
| | /* Input field styling */ |
| | .stTextInput>div>div>input { |
| | border-radius: 18px; |
| | border: 1px solid #555; |
| | background-color: #3A3B3C; /* Dark input field */ |
| | color: #EAEAEA; /* Light text in input */ |
| | padding: 10px 15px; |
| | } |
| | |
| | /* Button styling */ |
| | .stButton>button { |
| | border-radius: 18px; |
| | border: none; |
| | background-color: #0084FF; |
| | color: white; |
| | height: 42px; |
| | } |
| | |
| | /* Hide the default "Get Answer" header for a cleaner look */ |
| | .st-emotion-cache-16txtl3 > h1 { |
| | display: none; |
| | } |
| | |
| | /* Empty chat placeholder */ |
| | .empty-chat-placeholder { |
| | flex-grow: 1; |
| | display: flex; |
| | flex-direction: column; |
| | justify-content: center; |
| | align-items: center; |
| | color: #A0A0A0; /* Lighter gray for placeholder text */ |
| | } |
| | |
| | .empty-chat-placeholder .icon { |
| | font-size: 50px; |
| | margin-bottom: 10px; |
| | } |
| | |
| | </style> |
| | """, unsafe_allow_html=True |
| | ) |
| |
|
| | |
| | with st.sidebar: |
| | |
| | st.image("https://img.icons8.com/ios-filled/50/4A90E2/document.png", width=40) |
| | st.title("Document Intelligence") |
| | st.caption(f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M')}") |
| | |
| | with st.expander("How It Works", expanded=True): |
| | st.markdown("1. **Upload & Parse**: Select your PDF to begin.\n2. **Ask Questions**: Use the chat to query your document.\n3. **Get Answers**: The AI provides instant, evidence-backed responses.") |
| | |
| | st.markdown("---") |
| | |
| | |
| | st.subheader("Upload Document") |
| | uploaded_file = st.file_uploader("Select a PDF", type=["pdf"], help="Upload a PDF file to analyze") |
| | |
| | if uploaded_file: |
| | filename = secure_filename(uploaded_file.name) |
| | |
| | collection_name = re.sub(r'[^a-zA-Z0-9_-]', '_', os.path.splitext(filename)[0]) |
| |
|
| | if st.button("Parse Document", use_container_width=True, key="parse_button"): |
| | output_dir = os.path.join("./parsed", filename) |
| | os.makedirs(output_dir, exist_ok=True) |
| | pdf_path = os.path.join(output_dir, filename) |
| | |
| | with open(pdf_path, "wb") as f: |
| | f.write(uploaded_file.getbuffer()) |
| | |
| | with st.spinner("Processing document..."): |
| | try: |
| | gpp = GPP(GPPConfig()) |
| | parsed_info = gpp.run(pdf_path, output_dir, collection_name) |
| | st.session_state.parsed_info = parsed_info |
| | st.session_state.chat_history = [] |
| | st.session_state.selected_chunks = [] |
| | st.success("Document ready!") |
| | except Exception as e: |
| | st.error(f"Processing failed: {str(e)}") |
| | st.session_state.parsed_info = None |
| |
|
| | |
| | if st.session_state.parsed_info: |
| | st.markdown("---") |
| | st.subheader("Document Preview") |
| | parsed = st.session_state.parsed_info |
| | |
| | |
| | layout_pdf = parsed.get("layout_pdf") |
| | if layout_pdf and os.path.exists(layout_pdf): |
| | with st.expander("View Layout PDF", expanded=False): |
| | st.markdown(f"[Open in new tab]({layout_pdf})") |
| | doc = fitz.open(layout_pdf) |
| | thumb_width = 500 |
| | thumbs = [] |
| | for page_num in range(len(doc)): |
| | page = doc.load_page(page_num) |
| | pix = page.get_pixmap(matrix=fitz.Matrix(thumb_width / page.rect.width, thumb_width / page.rect.width)) |
| | img_bytes = pix.tobytes("png") |
| | b64 = base64.b64encode(img_bytes).decode("utf-8") |
| | thumbs.append((page_num, b64)) |
| | st.markdown("<div style='overflow-x: auto; white-space: nowrap; border: 1px solid #eee; border-radius: 8px; padding: 8px; background: #fafbfc; max-width: 100%;'>", unsafe_allow_html=True) |
| | for page_num, b64 in thumbs: |
| | st.markdown(f"<a href='{layout_pdf}#page={page_num+1}' target='_blank' style='display:inline-block;margin-right:8px;'><img src='data:image/png;base64,{b64}' width='{thumb_width}' style='border:1px solid #ccc;border-radius:4px;box-shadow:0 1px 2px #0001;'/></a>", unsafe_allow_html=True) |
| | st.markdown("</div>", unsafe_allow_html=True) |
| | |
| | |
| | md_path = parsed.get("md_path") |
| | if md_path and os.path.exists(md_path): |
| | try: |
| | with open(md_path, 'r', encoding='utf-8') as md_file: |
| | md_text = md_file.read() |
| | with st.expander("Content Preview", expanded=False): |
| | st.markdown(f"<pre style='font-size:12px;max-height:300px;overflow-y:auto'>{md_text[:3000]}{'...' if len(md_text)>3000 else ''}</pre>", unsafe_allow_html=True) |
| | except Exception as e: |
| | st.warning(f"Could not preview content: {str(e)}") |
| |
|
| | st.markdown("---") |
| | st.subheader("Chat Controls") |
| | if st.button("Clear Chat", use_container_width=True): |
| | st.session_state.chat_history = [] |
| | st.session_state.selected_chunks = [] |
| | st.rerun() |
| |
|
| | |
| | main_col, evidence_col = st.columns([2, 1]) |
| |
|
| | with main_col: |
| | if not st.session_state.parsed_info: |
| | st.info("Please upload and parse a document to start the chat.") |
| | else: |
| | |
| | st.markdown("<div class='chat-window'>", unsafe_allow_html=True) |
| | |
| | |
| | st.markdown("<div class='chat-history'>", unsafe_allow_html=True) |
| | if not st.session_state.chat_history: |
| | st.markdown(""" |
| | <div class='empty-chat-placeholder'> |
| | <span class="icon">🤖</span> |
| | <h3>Ask me anything about your document!</h3> |
| | </div> |
| | """, unsafe_allow_html=True) |
| | else: |
| | for message in st.session_state.chat_history: |
| | if message["role"] == "user": |
| | st.markdown(f""" |
| | <div class="message-row user-row"> |
| | <div class="message-bubble user-bubble"> |
| | <p>{message["content"]}</p> |
| | </div> |
| | </div> |
| | """, unsafe_allow_html=True) |
| | else: |
| | st.markdown(f""" |
| | <div class="message-row assistant-row"> |
| | <div class="avatar">🤖</div> |
| | <div class="message-bubble assistant-bubble"> |
| | <p>{message["content"]}</p> |
| | </div> |
| | </div> |
| | """, unsafe_allow_html=True) |
| | st.markdown("</div>", unsafe_allow_html=True) |
| | |
| | |
| | st.markdown("<div class='chat-input-container'>", unsafe_allow_html=True) |
| | input_col, button_col = st.columns([4, 1]) |
| | with input_col: |
| | question = st.text_input("Ask a question...", key="question_input", label_visibility="collapsed") |
| | with button_col: |
| | send_button = st.button("Send", use_container_width=True) |
| | |
| | st.markdown("</div>", unsafe_allow_html=True) |
| | st.markdown("</div>", unsafe_allow_html=True) |
| |
|
| | |
| | if send_button and question: |
| | st.session_state.chat_history.append({"role": "user", "content": question}) |
| | |
| | with st.spinner("Thinking..."): |
| | generator = AnswerGenerator(st.session_state.parsed_info['collection_name']) |
| | answer, supporting_chunks = generator.answer(question) |
| | st.session_state.chat_history.append({"role": "assistant", "content": answer}) |
| | st.session_state.selected_chunks = supporting_chunks |
| | |
| | st.rerun() |
| |
|
| | |
| | with evidence_col: |
| | if st.session_state.parsed_info: |
| | st.markdown("### Supporting Evidence") |
| | |
| | if not st.session_state.selected_chunks: |
| | st.info("Evidence chunks will appear here after you ask a question.") |
| | else: |
| | for idx, chunk in enumerate(st.session_state.selected_chunks): |
| | with st.expander(f"Evidence Chunk #{idx+1}", expanded=True): |
| | st.markdown(chunk.get('narration', 'No narration available')) |
| | if 'table_structure' in chunk: |
| | st.dataframe(chunk['table_structure'], use_container_width=True) |
| | for blk in chunk.get('blocks', []): |
| | if blk.get('type') == 'img_path' and 'images_dir' in st.session_state.parsed_info: |
| | img_path = os.path.join(st.session_state.parsed_info['images_dir'], blk.get('img_path','')) |
| | if os.path.exists(img_path): |
| | st.image(img_path, use_column_width=True) |
| |
|
| | |
| | def handle_error(func): |
| | try: |
| | func() |
| | except Exception as e: |
| | st.error(f"An unexpected error occurred: {str(e)}") |
| | st.info("Please refresh the page and try again.") |
| |
|
| | |
| | handle_error(lambda: None) |