Ara Yeroyan commited on
Commit
264ca84
Β·
2 Parent(s): 763a8b9 21eb407

Merge branch 'main' of https://huggingface.co/spaces/akryldigital/audit_assistant

Browse files
Files changed (4) hide show
  1. .gitignore +112 -0
  2. Dockerfile +1 -0
  3. app.py +158 -18
  4. src/config/paths.py +59 -0
.gitignore ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==========================================
2
+ # PYTHON
3
+ # ==========================================
4
+ __pycache__/
5
+ *.py[cod]
6
+ *.pyo
7
+ *.pyd
8
+ *$py.class
9
+
10
+ # Virtual environments
11
+ .venv/
12
+ venv/
13
+ env/
14
+ ENV/
15
+ .conda/
16
+ .venv*/
17
+
18
+ # Byte-compiled / optimized / DLL files
19
+ *.so
20
+ *.dll
21
+ *.dylib
22
+
23
+ # Logs and debug
24
+ *.log
25
+ *.out
26
+ *.err
27
+ logs/
28
+ debug/
29
+ *.sqlite3
30
+
31
+ # ==========================================
32
+ # BUILD / PACKAGING
33
+ # ==========================================
34
+ build/
35
+ dist/
36
+ *.egg-info/
37
+ .eggs/
38
+ pip-wheel-metadata/
39
+ .wheels/
40
+
41
+ # ==========================================
42
+ # JUPYTER / NOTEBOOKS
43
+ # ==========================================
44
+ .ipynb_checkpoints/
45
+ *.ipynb_convert/
46
+
47
+ # ==========================================
48
+ # DATA / MODELS / CACHE
49
+ # ==========================================
50
+ data/
51
+ datasets/
52
+ .cache/
53
+ *.ckpt
54
+ *.h5
55
+ *.hdf5
56
+ *.tflite
57
+ *.onnx
58
+ *.pth
59
+ *.pt
60
+ *.joblib
61
+ *.pkl
62
+ *.pickle
63
+ *.npz
64
+ *.npy
65
+ outputs/
66
+ artifacts/
67
+ checkpoints/
68
+ runs/
69
+ wandb/
70
+ mlruns/
71
+ lightning_logs/
72
+
73
+ # Hugging Face
74
+ huggingface/
75
+ ~/.cache/huggingface/
76
+ ~/.cache/torch/
77
+ ~/.cache/datasets/
78
+ ~/.cache/transformers/
79
+
80
+ # ==========================================
81
+ # EDITORS / TOOLS
82
+ # ==========================================
83
+ .vscode/
84
+ .idea/
85
+ *.swp
86
+ *.swo
87
+ *.bak
88
+ .DS_Store
89
+ Thumbs.db
90
+
91
+ # ==========================================
92
+ # ENV FILES / CREDENTIALS
93
+ # ==========================================
94
+ .env
95
+ .env.*
96
+ *.env.local
97
+ secrets.*
98
+ config.json
99
+ token.json
100
+
101
+ # ==========================================
102
+ # TESTS / TEMP FILES
103
+ # ==========================================
104
+ __tests__/
105
+ .tox/
106
+ .coverage
107
+ .cache/
108
+ pytest_cache/
109
+ tmp/
110
+ temp/
111
+ *.tmp
112
+ *.temp
Dockerfile CHANGED
@@ -59,6 +59,7 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
59
  CMD curl --fail http://localhost:8501/_stcore/health || exit 1
60
 
61
  #temp developement commands
 
62
  # RUN mkdir /app/conversations && chmod -R 777 conversations
63
  # RUN mkdir /app/feedback && chmod -R 777 feedback
64
 
 
59
  CMD curl --fail http://localhost:8501/_stcore/health || exit 1
60
 
61
  #temp developement commands
62
+ RUN pip3 install plotly
63
  # RUN mkdir /app/conversations && chmod -R 777 conversations
64
  # RUN mkdir /app/feedback && chmod -R 777 feedback
65
 
app.py CHANGED
@@ -483,7 +483,11 @@ def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieva
483
  return
484
 
485
  # Wrap in styled container
 
486
  st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
 
 
 
487
 
488
  st.subheader(f"πŸ“Š {title}")
489
 
@@ -618,6 +622,40 @@ def main():
618
 
619
  # Sidebar for filters
620
  with st.sidebar:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
621
  st.markdown("### πŸ” Search Filters")
622
  st.markdown("Select filters to narrow down your search. Leave empty to search all data.")
623
 
@@ -952,6 +990,44 @@ def main():
952
  st.info("No documents were retrieved for the last query.")
953
  else:
954
  st.info("No documents have been retrieved yet. Start a conversation to see retrieved documents here.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
955
 
956
  # Feedback Dashboard Section
957
  st.markdown("---")
@@ -1153,25 +1229,89 @@ def main():
1153
  # Scroll to conversation - this is handled by the auto-scroll at bottom
1154
  pass
1155
 
1156
- # Display retrieval history stats
1157
- if st.session_state.rag_retrieval_history:
1158
- st.markdown("---")
1159
- st.markdown("#### πŸ“Š Retrieval History")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1160
 
1161
- with st.expander(f"View {len(st.session_state.rag_retrieval_history)} retrieval entries", expanded=False):
1162
- for idx, entry in enumerate(st.session_state.rag_retrieval_history, 1):
1163
- st.markdown(f"**Retrieval #{idx}**")
1164
-
1165
- # Display the actual RAG query
1166
- rag_query_expansion = entry.get("rag_query_expansion", "No query available")
1167
- st.code(rag_query_expansion, language="text")
1168
-
1169
- # Display summary stats
1170
- st.json({
1171
- "conversation_length": len(entry.get("conversation_up_to", [])),
1172
- "documents_retrieved": len(entry.get("docs_retrieved", []))
1173
- })
1174
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1175
 
1176
  # Example Questions Section
1177
  st.markdown("---")
 
483
  return
484
 
485
  # Wrap in styled container
486
+ <<<<<<< HEAD
487
  st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
488
+ =======
489
+ # st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
490
+ >>>>>>> 21eb407535b7e67a7dc3ea192c84831c0ae680d3
491
 
492
  st.subheader(f"πŸ“Š {title}")
493
 
 
622
 
623
  # Sidebar for filters
624
  with st.sidebar:
625
+ # Instructions section (collapsible)
626
+ with st.expander("πŸ“– How to Use", expanded=False):
627
+ st.markdown("""
628
+ #### 🎯 Using Filters
629
+
630
+ 1. **Select filters** from the sidebar to narrow your search:
631
+
632
+ 2. **Leave filters empty** to search across all data
633
+
634
+ 3. **Type your question** in the chat and click "Send"
635
+
636
+ 4. **Choose sample questions from the bottom of the page**
637
+
638
+ #### πŸ’‘ Tips
639
+
640
+ - Use specific questions for better results
641
+ - Combine multiple filters for precise searches
642
+ - Check the "Retrieved Documents" tab to get various insights
643
+
644
+ #### πŸ’¬ Feedback Section
645
+
646
+ - Rate your experience (1-5 stars)
647
+ - Provide optional text feedback
648
+ - Located at the bottom of the page
649
+
650
+ #### ⚠️ Important
651
+
652
+ **When finished, please close the browser window** to free up computational resources.
653
+
654
+ ---
655
+
656
+ For more detailed help, see the example questions at the bottom of the page.
657
+ """)
658
+
659
  st.markdown("### πŸ” Search Filters")
660
  st.markdown("Select filters to narrow down your search. Leave empty to search all data.")
661
 
 
990
  st.info("No documents were retrieved for the last query.")
991
  else:
992
  st.info("No documents have been retrieved yet. Start a conversation to see retrieved documents here.")
993
+
994
+ # Display retrieval history stats
995
+ st.markdown("---")
996
+ if st.session_state.rag_retrieval_history:
997
+ st.markdown("#### πŸ“Š Retrieval History")
998
+ st.markdown(f"This conversation has **{len(st.session_state.rag_retrieval_history)}** retrieval entries.")
999
+
1000
+ with st.expander(f"View {len(st.session_state.rag_retrieval_history)} retrieval entries", expanded=False):
1001
+ for idx, entry in enumerate(st.session_state.rag_retrieval_history, 1):
1002
+ with st.expander(f"Entry {idx}: {entry.get('rag_query_expansion', 'N/A')[:50]}...", expanded=False):
1003
+ st.markdown(f"**Query:** {entry.get('rag_query_expansion', 'N/A')}")
1004
+ st.markdown(f"**Documents Retrieved:** {len(entry.get('docs_retrieved', []))}")
1005
+
1006
+ # Show conversation up to this point
1007
+ conversation = entry.get('conversation_up_to', [])
1008
+ if conversation:
1009
+ st.markdown("**Conversation Context:**")
1010
+ for msg in conversation[-3:]: # Show last 3 messages
1011
+ role = msg.get('type', 'unknown')
1012
+ content = msg.get('content', '')[:200] + "..." if len(msg.get('content', '')) > 200 else msg.get('content', '')
1013
+ if role == 'human':
1014
+ st.markdown(f"- **You:** {content}")
1015
+ elif role == 'ai':
1016
+ st.markdown(f"- **Bot:** {content}")
1017
+
1018
+ # Show retrieved documents summary
1019
+ docs = entry.get('docs_retrieved', [])
1020
+ if docs:
1021
+ st.markdown("**Retrieved Documents:**")
1022
+ for doc_idx, doc in enumerate(docs[:5], 1): # Show first 5
1023
+ doc_meta = doc.get('metadata', {})
1024
+ filename = doc_meta.get('filename', 'Unknown')[:50]
1025
+ st.markdown(f"{doc_idx}. {filename}")
1026
+ if len(docs) > 5:
1027
+ st.markdown(f"... and {len(docs) - 5} more documents")
1028
+ else:
1029
+ st.markdown("---")
1030
+ st.info("πŸ“Š Retrieval history will appear here after you start asking questions.")
1031
 
1032
  # Feedback Dashboard Section
1033
  st.markdown("---")
 
1229
  # Scroll to conversation - this is handled by the auto-scroll at bottom
1230
  pass
1231
 
1232
+ # Example Questions Section
1233
+ st.markdown("---")
1234
+ st.markdown(
1235
+ "<h3 class='example-questions-header'>πŸ’‘ Example Questions</h3>",
1236
+ unsafe_allow_html=True
1237
+ )
1238
+ st.markdown(
1239
+ "<p class='example-questions-description'>Click on any question below to use it, or modify the editable examples:</p>",
1240
+ unsafe_allow_html=True
1241
+ )
1242
+
1243
+ # Initialize example question state
1244
+ if 'custom_question_1' not in st.session_state:
1245
+ st.session_state.custom_question_1 = "How were administrative costs managed in the PDM implementation, and what issues arose with budget execution regarding staff salaries?"
1246
+ if 'custom_question_2' not in st.session_state:
1247
+ st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
1248
+
1249
+ # Question 1: Filename insights (fixed, clickable)
1250
+ st.markdown("#### πŸ“„ Question 1: List insights from a specific file")
1251
+ col1, col2 = st.columns([3, 1])
1252
+ with col1:
1253
+ example_q1 = "List couple of insights from the filename."
1254
+ st.markdown(f"**Example:** `{example_q1}`")
1255
+ st.info("πŸ’‘ **Filter to apply:** Select a Filename from the sidebar panel before asking this question.")
1256
+ with col2:
1257
+ if st.button("πŸ“‹ Use This Question", key="use_example_1", use_container_width=True):
1258
+ st.session_state.pending_question = example_q1
1259
+ st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
1260
+ st.rerun()
1261
+
1262
+ st.markdown("---")
1263
+
1264
+ # Questions 2 & 3: Editable examples (collapsible, side by side)
1265
+ with st.expander("#### ✏️ Customizable Questions (Edit and use)", expanded=False):
1266
+ # Place questions side by side
1267
+ col1, col2 = st.columns(2)
1268
 
1269
+ # Question 2
1270
+ with col1:
1271
+ st.markdown("**Question 2:**")
1272
+ custom_q1 = st.text_area(
1273
+ "Edit question 2:",
1274
+ value=st.session_state.custom_question_1,
1275
+ height=100,
1276
+ key="edit_question_2",
1277
+ help="Modify this question to fit your needs, then click 'Use This Question'",
1278
+ label_visibility="collapsed"
1279
+ )
1280
+ if st.button("πŸ“‹ Use Question 2", key="use_custom_1", use_container_width=True):
1281
+ if custom_q1.strip():
1282
+ st.session_state.pending_question = custom_q1.strip()
1283
+ st.session_state.custom_question_1 = custom_q1.strip()
1284
+ st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
1285
+ st.rerun()
1286
+ else:
1287
+ st.warning("Please enter a question first!")
1288
+ st.caption("πŸ’‘ Tip: Add specific details like dates, names, or amounts to get more precise answers")
1289
+ st.info("πŸ’‘ **Filter to apply:** Select District(s) and Year(s) from sidebar panel")
1290
+
1291
+ # Question 3
1292
+ with col2:
1293
+ st.markdown("**Question 3:**")
1294
+ custom_q2 = st.text_area(
1295
+ "Edit question 3:",
1296
+ value=st.session_state.custom_question_2,
1297
+ height=100,
1298
+ key="edit_question_3",
1299
+ help="Modify this question to fit your needs, then click 'Use This Question'",
1300
+ label_visibility="collapsed"
1301
+ )
1302
+ if st.button("πŸ“‹ Use Question 3", key="use_custom_2", use_container_width=True):
1303
+ if custom_q2.strip():
1304
+ st.session_state.pending_question = custom_q2.strip()
1305
+ st.session_state.custom_question_2 = custom_q2.strip()
1306
+ st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
1307
+ st.rerun()
1308
+ else:
1309
+ st.warning("Please enter a question first!")
1310
+ st.caption("πŸ’‘ Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
1311
+
1312
+
1313
+ # Store selected question for next render (handled in input section above)
1314
+ # This ensures the question populates the input field correctly
1315
 
1316
  # Example Questions Section
1317
  st.markdown("---")
src/config/paths.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Path configuration for local vs deployed environments.
3
+
4
+ This module handles different paths for local development vs deployed (HF Spaces) environments.
5
+ """
6
+ import os
7
+ from pathlib import Path
8
+
9
+ # Determine if we're in a deployed environment (HF Spaces/Docker) or local
10
+ # Check for environment variable or Docker-like paths
11
+ IS_DEPLOYED = (
12
+ os.getenv("DEPLOYED", "false").lower() == "true" or
13
+ os.path.exists("/app") or
14
+ os.getenv("SPACES_ID") is not None or
15
+ os.path.exists("/.dockerenv")
16
+ )
17
+
18
+ # PROJECT_DIR: Base directory for application files
19
+ # In deployed: /app, in local: current working directory or project root
20
+ if IS_DEPLOYED:
21
+ PROJECT_DIR = Path("/app")
22
+ else:
23
+ # For local development, use current working directory or find project root
24
+ cwd = Path.cwd()
25
+ # Try to find project root (directory containing this src/ folder)
26
+ project_root = cwd
27
+ while project_root != project_root.parent:
28
+ if (project_root / "src" / "config").exists():
29
+ break
30
+ project_root = project_root.parent
31
+ PROJECT_DIR = project_root
32
+
33
+ # Cache directories - different for local vs deployed
34
+ # Local: Use default user cache locations (don't override)
35
+ # Deployed: Use PROJECT_DIR/.cache
36
+ if IS_DEPLOYED:
37
+ CACHE_DIR = PROJECT_DIR / ".cache"
38
+ HF_CACHE_DIR = CACHE_DIR / "huggingface"
39
+ STREAMLIT_CACHE_DIR = CACHE_DIR / "streamlit"
40
+ else:
41
+ # For local, use default user cache (let libraries use their defaults)
42
+ HF_CACHE_DIR = None # Will use HF defaults (~/.cache/huggingface)
43
+ STREAMLIT_CACHE_DIR = None # Will use Streamlit defaults
44
+
45
+ # Application directories
46
+ FEEDBACK_DIR = PROJECT_DIR / "feedback"
47
+ CONVERSATIONS_DIR = PROJECT_DIR / "conversations"
48
+ STREAMLIT_CONFIG_DIR = PROJECT_DIR / ".streamlit"
49
+
50
+ # Log the configuration
51
+ if __name__ == "__main__":
52
+ print(f"IS_DEPLOYED: {IS_DEPLOYED}")
53
+ print(f"PROJECT_DIR: {PROJECT_DIR}")
54
+ print(f"HF_CACHE_DIR: {HF_CACHE_DIR}")
55
+ print(f"FEEDBACK_DIR: {FEEDBACK_DIR}")
56
+ print(f"CONVERSATIONS_DIR: {CONVERSATIONS_DIR}")
57
+
58
+
59
+