Spaces:

akryldigital
/

audit_assistant

Running

App Files Files Community

Ara Yeroyan commited on Nov 3, 2025

Commit

264ca84

2 Parent(s): 763a8b9 21eb407

Merge branch 'main' of https://huggingface.co/spaces/akryldigital/audit_assistant

Browse files

Files changed (4) hide show

.gitignore +112 -0
Dockerfile +1 -0
app.py +158 -18
src/config/paths.py +59 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,112 @@

+# ==========================================
+# PYTHON
+# ==========================================
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+*$py.class
+# Virtual environments
+.venv/
+venv/
+env/
+ENV/
+.conda/
+.venv*/
+# Byte-compiled / optimized / DLL files
+*.so
+*.dll
+*.dylib
+# Logs and debug
+*.log
+*.out
+*.err
+logs/
+debug/
+*.sqlite3
+# ==========================================
+# BUILD / PACKAGING
+# ==========================================
+build/
+dist/
+*.egg-info/
+.eggs/
+pip-wheel-metadata/
+.wheels/
+# ==========================================
+# JUPYTER / NOTEBOOKS
+# ==========================================
+.ipynb_checkpoints/
+*.ipynb_convert/
+# ==========================================
+# DATA / MODELS / CACHE
+# ==========================================
+data/
+datasets/
+.cache/
+*.ckpt
+*.h5
+*.hdf5
+*.tflite
+*.onnx
+*.pth
+*.pt
+*.joblib
+*.pkl
+*.pickle
+*.npz
+*.npy
+outputs/
+artifacts/
+checkpoints/
+runs/
+wandb/
+mlruns/
+lightning_logs/
+# Hugging Face
+huggingface/
+~/.cache/huggingface/
+~/.cache/torch/
+~/.cache/datasets/
+~/.cache/transformers/
+# ==========================================
+# EDITORS / TOOLS
+# ==========================================
+.vscode/
+.idea/
+*.swp
+*.swo
+*.bak
+.DS_Store
+Thumbs.db
+# ==========================================
+# ENV FILES / CREDENTIALS
+# ==========================================
+.env
+.env.*
+*.env.local
+secrets.*
+config.json
+token.json
+# ==========================================
+# TESTS / TEMP FILES
+# ==========================================
+__tests__/
+.tox/
+.coverage
+.cache/
+pytest_cache/
+tmp/
+temp/
+*.tmp
+*.temp

Dockerfile CHANGED Viewed

@@ -59,6 +59,7 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
     CMD curl --fail http://localhost:8501/_stcore/health || exit 1
 #temp developement commands
 # RUN mkdir /app/conversations && chmod -R 777 conversations
 # RUN mkdir /app/feedback && chmod -R 777 feedback

     CMD curl --fail http://localhost:8501/_stcore/health || exit 1
 #temp developement commands
+RUN pip3 install plotly
 # RUN mkdir /app/conversations && chmod -R 777 conversations
 # RUN mkdir /app/feedback && chmod -R 777 feedback

app.py CHANGED Viewed

@@ -483,7 +483,11 @@ def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieva
         return
     # Wrap in styled container
     st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
     st.subheader(f"📊 {title}")
@@ -618,6 +622,40 @@ def main():
     # Sidebar for filters
     with st.sidebar:
         st.markdown("### 🔍 Search Filters")
         st.markdown("Select filters to narrow down your search. Leave empty to search all data.")
@@ -952,6 +990,44 @@ def main():
                 st.info("No documents were retrieved for the last query.")
         else:
             st.info("No documents have been retrieved yet. Start a conversation to see retrieved documents here.")
     # Feedback Dashboard Section
     st.markdown("---")
@@ -1153,25 +1229,89 @@ def main():
                     # Scroll to conversation - this is handled by the auto-scroll at bottom
                     pass
-    # Display retrieval history stats
-    if st.session_state.rag_retrieval_history:
-        st.markdown("---")
-        st.markdown("#### 📊 Retrieval History")
-        with st.expander(f"View {len(st.session_state.rag_retrieval_history)} retrieval entries", expanded=False):
-            for idx, entry in enumerate(st.session_state.rag_retrieval_history, 1):
-                st.markdown(f"**Retrieval #{idx}**")
-                # Display the actual RAG query
-                rag_query_expansion = entry.get("rag_query_expansion", "No query available")
-                st.code(rag_query_expansion, language="text")
-                # Display summary stats
-                st.json({
-                    "conversation_length": len(entry.get("conversation_up_to", [])),
-                    "documents_retrieved": len(entry.get("docs_retrieved", []))
-                })
-                st.markdown("---")
     # Example Questions Section
     st.markdown("---")

         return
     # Wrap in styled container
+<<<<<<< HEAD
     st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
+=======
+    # st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
+>>>>>>> 21eb407535b7e67a7dc3ea192c84831c0ae680d3
     st.subheader(f"📊 {title}")
     # Sidebar for filters
     with st.sidebar:
+        # Instructions section (collapsible)
+        with st.expander("📖 How to Use", expanded=False):
+            st.markdown("""
+            #### 🎯 Using Filters
+            1. **Select filters** from the sidebar to narrow your search:
+            2. **Leave filters empty** to search across all data
+            3. **Type your question** in the chat and click "Send"
+            4. **Choose sample questions from the bottom of the page**
+            #### 💡 Tips
+            - Use specific questions for better results
+            - Combine multiple filters for precise searches
+            - Check the "Retrieved Documents" tab to get various insights
+            #### 💬 Feedback Section
+            - Rate your experience (1-5 stars)
+            - Provide optional text feedback
+            - Located at the bottom of the page
+            #### ⚠️ Important
+            **When finished, please close the browser window** to free up computational resources.
+            ---
+            For more detailed help, see the example questions at the bottom of the page.
+            """)
         st.markdown("### 🔍 Search Filters")
         st.markdown("Select filters to narrow down your search. Leave empty to search all data.")
                 st.info("No documents were retrieved for the last query.")
         else:
             st.info("No documents have been retrieved yet. Start a conversation to see retrieved documents here.")
+        # Display retrieval history stats
+        st.markdown("---")
+        if st.session_state.rag_retrieval_history:
+            st.markdown("#### 📊 Retrieval History")
+            st.markdown(f"This conversation has **{len(st.session_state.rag_retrieval_history)}** retrieval entries.")
+            with st.expander(f"View {len(st.session_state.rag_retrieval_history)} retrieval entries", expanded=False):
+                for idx, entry in enumerate(st.session_state.rag_retrieval_history, 1):
+                    with st.expander(f"Entry {idx}: {entry.get('rag_query_expansion', 'N/A')[:50]}...", expanded=False):
+                        st.markdown(f"**Query:** {entry.get('rag_query_expansion', 'N/A')}")
+                        st.markdown(f"**Documents Retrieved:** {len(entry.get('docs_retrieved', []))}")
+                        # Show conversation up to this point
+                        conversation = entry.get('conversation_up_to', [])
+                        if conversation:
+                            st.markdown("**Conversation Context:**")
+                            for msg in conversation[-3:]:  # Show last 3 messages
+                                role = msg.get('type', 'unknown')
+                                content = msg.get('content', '')[:200] + "..." if len(msg.get('content', '')) > 200 else msg.get('content', '')
+                                if role == 'human':
+                                    st.markdown(f"- **You:** {content}")
+                                elif role == 'ai':
+                                    st.markdown(f"- **Bot:** {content}")
+                        # Show retrieved documents summary
+                        docs = entry.get('docs_retrieved', [])
+                        if docs:
+                            st.markdown("**Retrieved Documents:**")
+                            for doc_idx, doc in enumerate(docs[:5], 1):  # Show first 5
+                                doc_meta = doc.get('metadata', {})
+                                filename = doc_meta.get('filename', 'Unknown')[:50]
+                                st.markdown(f"{doc_idx}. {filename}")
+                            if len(docs) > 5:
+                                st.markdown(f"... and {len(docs) - 5} more documents")
+        else:
+            st.markdown("---")
+            st.info("📊 Retrieval history will appear here after you start asking questions.")
     # Feedback Dashboard Section
     st.markdown("---")
                     # Scroll to conversation - this is handled by the auto-scroll at bottom
                     pass
+    # Example Questions Section
+    st.markdown("---")
+    st.markdown(
+        "<h3 class='example-questions-header'>💡 Example Questions</h3>",
+        unsafe_allow_html=True
+    )
+    st.markdown(
+        "<p class='example-questions-description'>Click on any question below to use it, or modify the editable examples:</p>",
+        unsafe_allow_html=True
+    )
+    # Initialize example question state
+    if 'custom_question_1' not in st.session_state:
+        st.session_state.custom_question_1 = "How were administrative costs managed in the PDM implementation, and what issues arose with budget execution regarding staff salaries?"
+    if 'custom_question_2' not in st.session_state:
+        st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
+    # Question 1: Filename insights (fixed, clickable)
+    st.markdown("#### 📄 Question 1: List insights from a specific file")
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        example_q1 = "List couple of insights from the filename."
+        st.markdown(f"**Example:** `{example_q1}`")
+        st.info("💡 **Filter to apply:** Select a Filename from the sidebar panel before asking this question.")
+    with col2:
+        if st.button("📋 Use This Question", key="use_example_1", use_container_width=True):
+            st.session_state.pending_question = example_q1
+            st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
+            st.rerun()
+    st.markdown("---")
+    # Questions 2 & 3: Editable examples (collapsible, side by side)
+    with st.expander("#### ✏️ Customizable Questions (Edit and use)", expanded=False):
+        # Place questions side by side
+        col1, col2 = st.columns(2)
+        # Question 2
+        with col1:
+            st.markdown("**Question 2:**")
+            custom_q1 = st.text_area(
+                "Edit question 2:",
+                value=st.session_state.custom_question_1,
+                height=100,
+                key="edit_question_2",
+                help="Modify this question to fit your needs, then click 'Use This Question'",
+                label_visibility="collapsed"
+            )
+            if st.button("📋 Use Question 2", key="use_custom_1", use_container_width=True):
+                if custom_q1.strip():
+                    st.session_state.pending_question = custom_q1.strip()
+                    st.session_state.custom_question_1 = custom_q1.strip()
+                    st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
+                    st.rerun()
+                else:
+                    st.warning("Please enter a question first!")
+            st.caption("💡 Tip: Add specific details like dates, names, or amounts to get more precise answers")
+            st.info("💡 **Filter to apply:** Select District(s) and Year(s) from sidebar panel")
+        # Question 3
+        with col2:
+            st.markdown("**Question 3:**")
+            custom_q2 = st.text_area(
+                "Edit question 3:",
+                value=st.session_state.custom_question_2,
+                height=100,
+                key="edit_question_3",
+                help="Modify this question to fit your needs, then click 'Use This Question'",
+                label_visibility="collapsed"
+            )
+            if st.button("📋 Use Question 3", key="use_custom_2", use_container_width=True):
+                if custom_q2.strip():
+                    st.session_state.pending_question = custom_q2.strip()
+                    st.session_state.custom_question_2 = custom_q2.strip()
+                    st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
+                    st.rerun()
+                else:
+                    st.warning("Please enter a question first!")
+            st.caption("💡 Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
+    # Store selected question for next render (handled in input section above)
+    # This ensures the question populates the input field correctly
     # Example Questions Section
     st.markdown("---")

src/config/paths.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""
+Path configuration for local vs deployed environments.
+This module handles different paths for local development vs deployed (HF Spaces) environments.
+"""
+import os
+from pathlib import Path
+# Determine if we're in a deployed environment (HF Spaces/Docker) or local
+# Check for environment variable or Docker-like paths
+IS_DEPLOYED = (
+    os.getenv("DEPLOYED", "false").lower() == "true" or
+    os.path.exists("/app") or
+    os.getenv("SPACES_ID") is not None or
+    os.path.exists("/.dockerenv")
+)
+# PROJECT_DIR: Base directory for application files
+# In deployed: /app, in local: current working directory or project root
+if IS_DEPLOYED:
+    PROJECT_DIR = Path("/app")
+else:
+    # For local development, use current working directory or find project root
+    cwd = Path.cwd()
+    # Try to find project root (directory containing this src/ folder)
+    project_root = cwd
+    while project_root != project_root.parent:
+        if (project_root / "src" / "config").exists():
+            break
+        project_root = project_root.parent
+    PROJECT_DIR = project_root
+# Cache directories - different for local vs deployed
+# Local: Use default user cache locations (don't override)
+# Deployed: Use PROJECT_DIR/.cache
+if IS_DEPLOYED:
+    CACHE_DIR = PROJECT_DIR / ".cache"
+    HF_CACHE_DIR = CACHE_DIR / "huggingface"
+    STREAMLIT_CACHE_DIR = CACHE_DIR / "streamlit"
+else:
+    # For local, use default user cache (let libraries use their defaults)
+    HF_CACHE_DIR = None  # Will use HF defaults (~/.cache/huggingface)
+    STREAMLIT_CACHE_DIR = None  # Will use Streamlit defaults
+# Application directories
+FEEDBACK_DIR = PROJECT_DIR / "feedback"
+CONVERSATIONS_DIR = PROJECT_DIR / "conversations"
+STREAMLIT_CONFIG_DIR = PROJECT_DIR / ".streamlit"
+# Log the configuration
+if __name__ == "__main__":
+    print(f"IS_DEPLOYED: {IS_DEPLOYED}")
+    print(f"PROJECT_DIR: {PROJECT_DIR}")
+    print(f"HF_CACHE_DIR: {HF_CACHE_DIR}")
+    print(f"FEEDBACK_DIR: {FEEDBACK_DIR}")
+    print(f"CONVERSATIONS_DIR: {CONVERSATIONS_DIR}")