Ara Yeroyan commited on
Commit
02d7f4f
Β·
1 Parent(s): 7c8b783

refactor and add sample questions

Browse files
Files changed (2) hide show
  1. app.py +428 -211
  2. smart_chatbot.py +4 -3
app.py CHANGED
@@ -3,7 +3,29 @@ Intelligent Audit Report Chatbot UI
3
  """
4
 
5
  import os
6
- import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  # ===== CRITICAL: Fix OMP_NUM_THREADS FIRST, before ANY other imports =====
9
  # Some libraries load at import time and will fail if OMP_NUM_THREADS is invalid
@@ -29,42 +51,30 @@ except (ValueError, TypeError):
29
 
30
  # ===== Setup HuggingFace cache directories BEFORE any model imports =====
31
  # CRITICAL: Set these before any imports that might use HuggingFace (like sentence-transformers)
32
- # This ensures models downloaded during Docker build are found at runtime
33
- cache_dir = "/app/.cache/huggingface"
34
- os.environ["HF_HOME"] = cache_dir
35
- os.environ["TRANSFORMERS_CACHE"] = cache_dir
36
- os.environ["HF_DATASETS_CACHE"] = cache_dir
37
- os.environ["HF_HUB_CACHE"] = cache_dir
38
- os.environ["SENTENCE_TRANSFORMERS_HOME"] = cache_dir
39
-
40
- # Ensure cache directory exists (created in Dockerfile, but ensure it's there)
41
- try:
42
- os.makedirs(cache_dir, mode=0o755, exist_ok=True)
43
- except (PermissionError, OSError) as e:
44
- # If we can't create it, log but continue (might already exist from Dockerfile)
45
- # HuggingFace will try to create subdirectories, but we need parent to exist
46
- pass
47
-
48
- import time
49
- import json
50
- import uuid
51
- import logging
52
- from pathlib import Path
53
-
54
- import argparse
55
- import streamlit as st
56
- from langchain_core.messages import HumanMessage, AIMessage
57
-
58
- from multi_agent_chatbot import get_multi_agent_chatbot
59
- from smart_chatbot import get_chatbot as get_smart_chatbot
60
- from src.reporting.feedback_schema import create_feedback_from_dict
61
 
62
  # Configure logging
63
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
64
  logger = logging.getLogger(__name__)
65
 
66
  # Log environment setup for debugging
67
- logger.info(f"πŸ“ HuggingFace cache directory: {os.environ.get('HF_HOME', 'NOT SET')}")
 
 
68
  logger.info(f"πŸ”§ OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
69
 
70
 
@@ -94,6 +104,54 @@ st.markdown("""
94
  margin-bottom: 2rem;
95
  }
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  .session-info {
98
  background-color: #f0f2f6;
99
  padding: 10px;
@@ -218,10 +276,11 @@ def serialize_documents(sources):
218
  @st.cache_data
219
  def load_filter_options():
220
  try:
221
- with open("src/config/filter_options.json", "r") as f:
 
222
  return json.load(f)
223
  except FileNotFoundError:
224
- st.info([x for x in os.listdir() if x.endswith('.json')])
225
  st.error("filter_options.json not found. Please run the metadata analysis script.")
226
  return {"sources": [], "years": [], "districts": [], 'filenames': []}
227
 
@@ -254,16 +313,8 @@ def main():
254
  st.session_state.reset_conversation = False
255
  st.rerun()
256
 
257
- # Header with system indicator
258
- col1, col2 = st.columns([3, 1])
259
- with col1:
260
- st.markdown('<h1 class="main-header">πŸ€– Intelligent Audit Report Chatbot</h1>', unsafe_allow_html=True)
261
- with col2:
262
- system_type = get_system_type()
263
- if "Multi-Agent" in system_type:
264
- st.success(f"πŸ”§ {system_type}")
265
- else:
266
- st.info(f"πŸ”§ {system_type}")
267
  st.markdown('<p class="subtitle">Ask questions about audit reports. Use the sidebar filters to narrow down your search!</p>', unsafe_allow_html=True)
268
 
269
  # Session info
@@ -375,12 +426,85 @@ def main():
375
  if 'input_counter' not in st.session_state:
376
  st.session_state.input_counter = 0
377
 
 
 
 
 
 
 
 
 
 
 
 
378
  user_input = st.text_input(
379
  "Type your message here...",
380
  placeholder="Ask about budget allocations, expenditures, or audit findings...",
381
- key=f"user_input_{st.session_state.input_counter}",
382
- label_visibility="collapsed"
 
383
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
  with col2:
386
  send_button = st.button("Send", key="send_button", use_container_width=True)
@@ -389,12 +513,11 @@ def main():
389
  if st.button("πŸ—‘οΈ Clear Chat", key="clear_chat_button"):
390
  st.session_state.reset_conversation = True
391
  # Clear all conversation files
392
- import os
393
- conversations_dir = "conversations"
394
- if os.path.exists(conversations_dir):
395
- for file in os.listdir(conversations_dir):
396
- if file.endswith('.json'):
397
- os.remove(os.path.join(conversations_dir, file))
398
  st.rerun()
399
 
400
  # Handle user input
@@ -487,11 +610,11 @@ def main():
487
  filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
488
  unique_filenames.add(filename)
489
 
490
- st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 10):**")
491
  if len(unique_filenames) < len(sources):
492
  st.info(f"πŸ’‘ **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
493
 
494
- for i, doc in enumerate(sources[:10]): # Show top 10
495
  # Get relevance score and ID if available
496
  metadata = getattr(doc, 'metadata', {})
497
  score = metadata.get('reranked_score', metadata.get('original_score', None))
@@ -543,180 +666,187 @@ def main():
543
  if 'feedback_submitted' not in st.session_state:
544
  st.session_state.feedback_submitted = False
545
 
546
- # Feedback form
547
- with st.form("feedback_form", clear_on_submit=False):
548
- col1, col2 = st.columns([1, 1])
549
-
550
- with col1:
551
- feedback_score = st.slider(
552
- "Rate this conversation (1-5)",
553
- min_value=1,
554
- max_value=5,
555
- help="How satisfied are you with the conversation?"
556
- )
557
-
558
- with col2:
559
- is_feedback_about_last_retrieval = st.checkbox(
560
- "Feedback about last retrieval only",
561
- value=True,
562
- help="If checked, feedback applies to the most recent document retrieval"
563
- )
564
-
565
- open_ended_feedback = st.text_area(
566
- "Your feedback (optional)",
567
- placeholder="Tell us what went well or what could be improved...",
568
- height=100
569
- )
570
-
571
- # Disable submit if no score selected
572
- submit_disabled = feedback_score is None
573
-
574
- submitted = st.form_submit_button(
575
- "πŸ“€ Submit Feedback",
576
- use_container_width=True,
577
- disabled=submit_disabled
578
- )
579
-
580
- if submitted and not st.session_state.feedback_submitted:
581
- # Log the feedback data being submitted
582
- print("=" * 80)
583
- print("πŸ”„ FEEDBACK SUBMISSION: Starting...")
584
- print("=" * 80)
585
- st.write("πŸ” **Debug: Feedback Data Being Submitted:**")
586
 
587
- # Create feedback data dictionary
588
- feedback_dict = {
589
- "open_ended_feedback": open_ended_feedback,
590
- "score": feedback_score,
591
- "is_feedback_about_last_retrieval": is_feedback_about_last_retrieval,
592
- "retrieved_data": st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else [],
593
- "conversation_id": st.session_state.conversation_id,
594
- "timestamp": time.time(),
595
- "message_count": len(st.session_state.messages),
596
- "has_retrievals": has_retrievals,
597
- "retrieval_count": len(st.session_state.rag_retrieval_history)
598
- }
599
 
600
- print(f"πŸ“ FEEDBACK SUBMISSION: Score={feedback_score}, Retrievals={len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0}")
 
 
 
 
 
601
 
602
- # Create UserFeedback dataclass instance
603
- feedback_obj = None # Initialize outside try block
604
- try:
605
- feedback_obj = create_feedback_from_dict(feedback_dict)
606
- print(f"βœ… FEEDBACK SUBMISSION: Feedback object created - ID={feedback_obj.feedback_id}")
607
- st.write(f"βœ… **Feedback Object Created**")
608
- st.write(f"- Feedback ID: {feedback_obj.feedback_id}")
609
- st.write(f"- Score: {feedback_obj.score}/5")
610
- st.write(f"- Has Retrievals: {feedback_obj.has_retrievals}")
611
-
612
- # Convert back to dict for JSON serialization
613
- feedback_data = feedback_obj.to_dict()
614
- except Exception as e:
615
- print(f"❌ FEEDBACK SUBMISSION: Failed to create feedback object: {e}")
616
- st.error(f"Failed to create feedback object: {e}")
617
- feedback_data = feedback_dict
618
-
619
- # Display the data being submitted
620
- st.json(feedback_data)
621
 
622
- # Save feedback to file - use absolute path in /app to ensure writability
623
- feedback_dir = Path("/app/feedback")
624
- try:
625
- # Ensure directory exists with write permissions (777 for compatibility)
626
- feedback_dir.mkdir(parents=True, mode=0o777, exist_ok=True)
627
- except (PermissionError, OSError) as e:
628
- logger.warning(f"Could not create feedback directory at {feedback_dir}: {e}")
629
- # Fallback to relative path
630
- feedback_dir = Path("feedback")
631
- feedback_dir.mkdir(parents=True, mode=0o777, exist_ok=True)
632
 
633
- feedback_file = feedback_dir / f"feedback_{st.session_state.conversation_id}_{int(time.time())}.json"
 
 
 
 
634
 
635
- try:
636
- # Ensure parent directory exists before writing
637
- feedback_file.parent.mkdir(parents=True, mode=0o777, exist_ok=True)
638
-
639
- # Save to local file
640
- print(f"πŸ’Ύ FEEDBACK SAVE: Saving to local file: {feedback_file}")
641
- with open(feedback_file, 'w') as f:
642
- json.dump(feedback_data, f, indent=2, default=str)
643
 
644
- print(f"βœ… FEEDBACK SAVE: Local file saved successfully")
645
- st.success("βœ… Thank you for your feedback! It has been saved locally.")
646
- st.balloons()
 
 
 
 
 
 
 
 
 
647
 
648
- # Save to Snowflake if enabled and credentials available
649
- logger.info("πŸ”„ FEEDBACK SAVE: Starting Snowflake save process...")
650
- logger.info(f"πŸ“Š FEEDBACK SAVE: feedback_obj={'exists' if feedback_obj else 'None'}")
651
 
 
 
652
  try:
653
- import os
654
- snowflake_enabled = os.getenv("SNOWFLAKE_ENABLED", "false").lower() == "true"
655
- logger.info(f"πŸ” SNOWFLAKE CHECK: enabled={snowflake_enabled}")
 
 
 
656
 
657
- if snowflake_enabled:
658
- if feedback_obj:
659
- try:
660
- from src.reporting.snowflake_connector import save_to_snowflake
661
- logger.info("πŸ“€ SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
662
- print("πŸ“€ SNOWFLAKE UI: Attempting to save feedback to Snowflake...") # Also print to terminal
663
-
664
- if save_to_snowflake(feedback_obj):
665
- logger.info("βœ… SNOWFLAKE UI: Successfully saved to Snowflake")
666
- print("βœ… SNOWFLAKE UI: Successfully saved to Snowflake") # Also print to terminal
667
- st.success("βœ… Feedback also saved to Snowflake!")
668
- else:
669
- logger.warning("⚠️ SNOWFLAKE UI: Save failed")
670
- print("⚠️ SNOWFLAKE UI: Save failed") # Also print to terminal
671
- st.warning("⚠️ Snowflake save failed, but local save succeeded")
672
- except Exception as e:
673
- logger.error(f"❌ SNOWFLAKE UI ERROR: {e}")
674
- print(f"❌ SNOWFLAKE UI ERROR: {e}") # Also print to terminal
675
- import traceback
676
- traceback.print_exc() # Print full traceback to terminal
677
- st.warning(f"⚠️ Could not save to Snowflake: {e}")
678
- else:
679
- logger.warning("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)")
680
- print("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)") # Also print to terminal
681
- st.warning("⚠️ Skipping Snowflake save (feedback object not created)")
682
- else:
683
- logger.info("πŸ’‘ SNOWFLAKE UI: Integration disabled")
684
- print("πŸ’‘ SNOWFLAKE UI: Integration disabled") # Also print to terminal
685
- st.info("πŸ’‘ Snowflake integration disabled (set SNOWFLAKE_ENABLED=true to enable)")
686
- except NameError as e:
687
- import traceback
688
- traceback.print_exc()
689
- logger.error(f"❌ NameError in Snowflake save: {e}")
690
- print(f"❌ NameError in Snowflake save: {e}") # Also print to terminal
691
- st.warning(f"⚠️ Snowflake save error: {e}")
692
  except Exception as e:
693
- logger.error(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}")
694
- print(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}") # Also print to terminal
695
- st.warning(f"⚠️ Snowflake save error: {e}")
696
 
697
- # Mark feedback as submitted to prevent resubmission
698
- st.session_state.feedback_submitted = True
699
 
700
- print("=" * 80)
701
- print(f"βœ… FEEDBACK SUBMISSION: Completed successfully")
702
- print("=" * 80)
 
 
 
 
 
 
 
703
 
704
- # Log file location
705
- st.info(f"πŸ“ Feedback saved to: {feedback_file}")
706
 
707
- except Exception as e:
708
- print(f"❌ FEEDBACK SUBMISSION: Error saving feedback: {e}")
709
- print(f"❌ FEEDBACK SUBMISSION: Error type: {type(e).__name__}")
710
- import traceback
711
- traceback.print_exc()
712
- st.error(f"❌ Error saving feedback: {e}")
713
- st.write(f"Debug error: {str(e)}")
714
-
715
- elif st.session_state.feedback_submitted:
716
- st.success("βœ… Feedback already submitted for this conversation!")
717
- if st.button("πŸ”„ Submit New Feedback", key="new_feedback_button"):
718
- st.session_state.feedback_submitted = False
719
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720
 
721
  # Display retrieval history stats
722
  if st.session_state.rag_retrieval_history:
@@ -738,6 +868,93 @@ def main():
738
  })
739
  st.markdown("---")
740
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
  # Auto-scroll to bottom
742
  st.markdown("""
743
  <script>
 
3
  """
4
 
5
  import os
6
+
7
+ import time
8
+ import json
9
+ import uuid
10
+ import logging
11
+ import traceback
12
+ from pathlib import Path
13
+
14
+
15
+ import streamlit as st
16
+ from langchain_core.messages import HumanMessage, AIMessage
17
+
18
+ from multi_agent_chatbot import get_multi_agent_chatbot
19
+ from smart_chatbot import get_chatbot as get_smart_chatbot
20
+ from src.reporting.feedback_schema import create_feedback_from_dict
21
+ from src.reporting.snowflake_connector import save_to_snowflake
22
+ from src.config.paths import (
23
+ IS_DEPLOYED,
24
+ PROJECT_DIR,
25
+ HF_CACHE_DIR,
26
+ FEEDBACK_DIR,
27
+ CONVERSATIONS_DIR,
28
+ )
29
 
30
  # ===== CRITICAL: Fix OMP_NUM_THREADS FIRST, before ANY other imports =====
31
  # Some libraries load at import time and will fail if OMP_NUM_THREADS is invalid
 
51
 
52
  # ===== Setup HuggingFace cache directories BEFORE any model imports =====
53
  # CRITICAL: Set these before any imports that might use HuggingFace (like sentence-transformers)
54
+ # Only override cache directories in deployed environment (local uses defaults)
55
+ if IS_DEPLOYED and HF_CACHE_DIR:
56
+ cache_dir = str(HF_CACHE_DIR)
57
+ os.environ["HF_HOME"] = cache_dir
58
+ os.environ["TRANSFORMERS_CACHE"] = cache_dir
59
+ os.environ["HF_DATASETS_CACHE"] = cache_dir
60
+ os.environ["HF_HUB_CACHE"] = cache_dir
61
+ os.environ["SENTENCE_TRANSFORMERS_HOME"] = cache_dir
62
+
63
+ # Ensure cache directory exists (created in Dockerfile, but ensure it's there)
64
+ try:
65
+ os.makedirs(cache_dir, mode=0o755, exist_ok=True)
66
+ except (PermissionError, OSError):
67
+ # If we can't create it, log but continue (might already exist from Dockerfile)
68
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  # Configure logging
71
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
72
  logger = logging.getLogger(__name__)
73
 
74
  # Log environment setup for debugging
75
+ logger.info(f"🌍 Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
76
+ logger.info(f"πŸ“ PROJECT_DIR: {PROJECT_DIR}")
77
+ logger.info(f"πŸ“ HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
78
  logger.info(f"πŸ”§ OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
79
 
80
 
 
104
  margin-bottom: 2rem;
105
  }
106
 
107
+ .example-questions-header {
108
+ text-align: center;
109
+ margin-bottom: 1rem;
110
+ }
111
+
112
+ .example-questions-description {
113
+ text-align: center;
114
+ color: #666;
115
+ margin-bottom: 2rem;
116
+ }
117
+
118
+ /* Hide ALL default Streamlit text input help messages about Enter key */
119
+ /* This is the key one - hides "Press Enter to apply" message inside input field */
120
+ div[data-testid="InputInstructions"],
121
+ span[data-testid="InputInstructions"],
122
+ *[data-testid="InputInstructions"] {
123
+ display: none !important;
124
+ visibility: hidden !important;
125
+ opacity: 0 !important;
126
+ height: 0 !important;
127
+ width: 0 !important;
128
+ overflow: hidden !important;
129
+ position: absolute !important;
130
+ left: -9999px !important;
131
+ }
132
+
133
+ /* Also hide other potential locations */
134
+ div[data-testid="stTextInput"] + div > small,
135
+ div[data-testid="stTextInput"] ~ div > small,
136
+ div[data-testid="stTextInputContainer"] + div > small,
137
+ div[data-testid="stTextInputContainer"] ~ div > small,
138
+ div[data-baseweb="input"] + div > small,
139
+ div[data-baseweb="input"] ~ div > small {
140
+ display: none !important;
141
+ visibility: hidden !important;
142
+ opacity: 0 !important;
143
+ height: 0 !important;
144
+ overflow: hidden !important;
145
+ }
146
+
147
+ /* Custom help text for input */
148
+ .input-help-text {
149
+ font-size: 0.85rem;
150
+ color: #666;
151
+ margin-top: 0.25rem;
152
+ text-align: left;
153
+ }
154
+
155
  .session-info {
156
  background-color: #f0f2f6;
157
  padding: 10px;
 
276
  @st.cache_data
277
  def load_filter_options():
278
  try:
279
+ filter_options_path = PROJECT_DIR / "src" / "config" / "filter_options.json"
280
+ with open(filter_options_path, "r") as f:
281
  return json.load(f)
282
  except FileNotFoundError:
283
+ st.info(f"Looking for filter_options.json in: {PROJECT_DIR / 'src' / 'config'}")
284
  st.error("filter_options.json not found. Please run the metadata analysis script.")
285
  return {"sources": [], "years": [], "districts": [], 'filenames': []}
286
 
 
313
  st.session_state.reset_conversation = False
314
  st.rerun()
315
 
316
+ # Header - centered
317
+ st.markdown('<h1 class="main-header">πŸ€– Intelligent Audit Report Chatbot</h1>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
318
  st.markdown('<p class="subtitle">Ask questions about audit reports. Use the sidebar filters to narrow down your search!</p>', unsafe_allow_html=True)
319
 
320
  # Session info
 
426
  if 'input_counter' not in st.session_state:
427
  st.session_state.input_counter = 0
428
 
429
+ # Handle pending question from example questions section
430
+ if 'pending_question' in st.session_state and st.session_state.pending_question:
431
+ default_value = st.session_state.pending_question
432
+ # Increment counter to force new input widget
433
+ st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
434
+ del st.session_state.pending_question
435
+ key_suffix = st.session_state.input_counter
436
+ else:
437
+ default_value = ""
438
+ key_suffix = st.session_state.input_counter
439
+
440
  user_input = st.text_input(
441
  "Type your message here...",
442
  placeholder="Ask about budget allocations, expenditures, or audit findings...",
443
+ key=f"user_input_{key_suffix}",
444
+ label_visibility="collapsed",
445
+ value=default_value if default_value else None
446
  )
447
+
448
+ # Use JavaScript to specifically target and hide "Press Enter to apply" message
449
+ st.markdown("""
450
+ <script>
451
+ (function() {
452
+ // Hide InputInstructions element (contains "Press Enter to apply")
453
+ function hideInputInstructions() {
454
+ // Target the specific Streamlit element
455
+ const instructions = document.querySelector('[data-testid="InputInstructions"]');
456
+ if (instructions) {
457
+ instructions.style.display = 'none';
458
+ instructions.style.visibility = 'hidden';
459
+ instructions.style.opacity = '0';
460
+ instructions.style.height = '0';
461
+ instructions.style.width = '0';
462
+ instructions.style.overflow = 'hidden';
463
+ instructions.style.position = 'absolute';
464
+ instructions.style.left = '-9999px';
465
+ }
466
+
467
+ // Also search for any text containing "Press Enter" or "apply" inside input containers
468
+ const allElements = document.querySelectorAll('*');
469
+ allElements.forEach(el => {
470
+ const text = el.textContent || el.innerText || '';
471
+ if ((text.toLowerCase().includes('press enter') ||
472
+ text.toLowerCase().includes('enter to') ||
473
+ text.toLowerCase().includes('to apply')) &&
474
+ (el.tagName === 'SPAN' || el.tagName === 'DIV' || el.tagName === 'SMALL')) {
475
+ const style = window.getComputedStyle(el);
476
+ const fontSize = parseFloat(style.fontSize);
477
+ // Hide if it's small text (likely help text)
478
+ if (fontSize < 14 || el.hasAttribute('data-testid')) {
479
+ el.style.display = 'none';
480
+ el.style.visibility = 'hidden';
481
+ el.style.height = '0';
482
+ el.style.overflow = 'hidden';
483
+ }
484
+ }
485
+ });
486
+ }
487
+
488
+ // Run immediately and after delays to catch dynamic elements
489
+ hideInputInstructions();
490
+ setTimeout(hideInputInstructions, 50);
491
+ setTimeout(hideInputInstructions, 100);
492
+ setTimeout(hideInputInstructions, 500);
493
+
494
+ // Observe for new elements added by Streamlit
495
+ const observer = new MutationObserver(function(mutations) {
496
+ hideInputInstructions();
497
+ });
498
+ observer.observe(document.body, { childList: true, subtree: true, attributes: true });
499
+ })();
500
+ </script>
501
+ """, unsafe_allow_html=True)
502
+
503
+ # # Show custom help text below input - this replaces the default "Press Enter" message
504
+ # st.markdown(
505
+ # "<div class='input-help-text'>πŸ’‘ Press the <strong>Send</strong> button to submit your question</div>",
506
+ # unsafe_allow_html=True
507
+ # )
508
 
509
  with col2:
510
  send_button = st.button("Send", key="send_button", use_container_width=True)
 
513
  if st.button("πŸ—‘οΈ Clear Chat", key="clear_chat_button"):
514
  st.session_state.reset_conversation = True
515
  # Clear all conversation files
516
+ conversations_path = CONVERSATIONS_DIR
517
+ if conversations_path.exists():
518
+ for file in conversations_path.iterdir():
519
+ if file.suffix == '.json':
520
+ file.unlink()
 
521
  st.rerun()
522
 
523
  # Handle user input
 
610
  filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
611
  unique_filenames.add(filename)
612
 
613
+ st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
614
  if len(unique_filenames) < len(sources):
615
  st.info(f"πŸ’‘ **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
616
 
617
+ for i, doc in enumerate(sources): # Show top 10
618
  # Get relevance score and ID if available
619
  metadata = getattr(doc, 'metadata', {})
620
  score = metadata.get('reranked_score', metadata.get('original_score', None))
 
666
  if 'feedback_submitted' not in st.session_state:
667
  st.session_state.feedback_submitted = False
668
 
669
+ # Feedback form - only show if feedback not already submitted
670
+ if not st.session_state.feedback_submitted:
671
+ with st.form("feedback_form", clear_on_submit=False):
672
+ col1, col2 = st.columns([1, 1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
 
674
+ with col1:
675
+ feedback_score = st.slider(
676
+ "Rate this conversation (1-5)",
677
+ min_value=1,
678
+ max_value=5,
679
+ help="How satisfied are you with the conversation?"
680
+ )
 
 
 
 
 
681
 
682
+ with col2:
683
+ is_feedback_about_last_retrieval = st.checkbox(
684
+ "Feedback about last retrieval only",
685
+ value=True,
686
+ help="If checked, feedback applies to the most recent document retrieval"
687
+ )
688
 
689
+ open_ended_feedback = st.text_area(
690
+ "Your feedback (optional)",
691
+ placeholder="Tell us what went well or what could be improved...",
692
+ height=100
693
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
 
695
+ # Disable submit if no score selected
696
+ submit_disabled = feedback_score is None
 
 
 
 
 
 
 
 
697
 
698
+ submitted = st.form_submit_button(
699
+ "πŸ“€ Submit Feedback",
700
+ use_container_width=True,
701
+ disabled=submit_disabled
702
+ )
703
 
704
+ if submitted:
705
+ # Log the feedback data being submitted
706
+ print("=" * 80)
707
+ print("πŸ”„ FEEDBACK SUBMISSION: Starting...")
708
+ print("=" * 80)
709
+ st.write("πŸ” **Debug: Feedback Data Being Submitted:**")
 
 
710
 
711
+ # Create feedback data dictionary
712
+ feedback_dict = {
713
+ "open_ended_feedback": open_ended_feedback,
714
+ "score": feedback_score,
715
+ "is_feedback_about_last_retrieval": is_feedback_about_last_retrieval,
716
+ "retrieved_data": st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else [],
717
+ "conversation_id": st.session_state.conversation_id,
718
+ "timestamp": time.time(),
719
+ "message_count": len(st.session_state.messages),
720
+ "has_retrievals": has_retrievals,
721
+ "retrieval_count": len(st.session_state.rag_retrieval_history)
722
+ }
723
 
724
+ print(f"πŸ“ FEEDBACK SUBMISSION: Score={feedback_score}, Retrievals={len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0}")
 
 
725
 
726
+ # Create UserFeedback dataclass instance
727
+ feedback_obj = None # Initialize outside try block
728
  try:
729
+ feedback_obj = create_feedback_from_dict(feedback_dict)
730
+ print(f"βœ… FEEDBACK SUBMISSION: Feedback object created - ID={feedback_obj.feedback_id}")
731
+ st.write(f"βœ… **Feedback Object Created**")
732
+ st.write(f"- Feedback ID: {feedback_obj.feedback_id}")
733
+ st.write(f"- Score: {feedback_obj.score}/5")
734
+ st.write(f"- Has Retrievals: {feedback_obj.has_retrievals}")
735
 
736
+ # Convert back to dict for JSON serialization
737
+ feedback_data = feedback_obj.to_dict()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738
  except Exception as e:
739
+ print(f"❌ FEEDBACK SUBMISSION: Failed to create feedback object: {e}")
740
+ st.error(f"Failed to create feedback object: {e}")
741
+ feedback_data = feedback_dict
742
 
743
+ # Display the data being submitted
744
+ st.json(feedback_data)
745
 
746
+ # Save feedback to file - use PROJECT_DIR to ensure writability
747
+ feedback_dir = FEEDBACK_DIR
748
+ try:
749
+ # Ensure directory exists with write permissions (777 for compatibility)
750
+ feedback_dir.mkdir(parents=True, mode=0o777, exist_ok=True)
751
+ except (PermissionError, OSError) as e:
752
+ logger.warning(f"Could not create feedback directory at {feedback_dir}: {e}")
753
+ # Fallback to relative path
754
+ feedback_dir = Path("feedback")
755
+ feedback_dir.mkdir(parents=True, mode=0o777, exist_ok=True)
756
 
757
+ feedback_file = feedback_dir / f"feedback_{st.session_state.conversation_id}_{int(time.time())}.json"
 
758
 
759
+ try:
760
+ # Ensure parent directory exists before writing
761
+ feedback_file.parent.mkdir(parents=True, mode=0o777, exist_ok=True)
762
+
763
+ # Save to local file
764
+ print(f"πŸ’Ύ FEEDBACK SAVE: Saving to local file: {feedback_file}")
765
+ with open(feedback_file, 'w') as f:
766
+ json.dump(feedback_data, f, indent=2, default=str)
767
+
768
+ print(f"βœ… FEEDBACK SAVE: Local file saved successfully")
769
+ st.success("βœ… Thank you for your feedback! It has been saved locally.")
770
+ st.balloons()
771
+
772
+ # Save to Snowflake if enabled and credentials available
773
+ logger.info("πŸ”„ FEEDBACK SAVE: Starting Snowflake save process...")
774
+ logger.info(f"πŸ“Š FEEDBACK SAVE: feedback_obj={'exists' if feedback_obj else 'None'}")
775
+
776
+ try:
777
+ snowflake_enabled = os.getenv("SNOWFLAKE_ENABLED", "false").lower() == "true"
778
+ logger.info(f"πŸ” SNOWFLAKE CHECK: enabled={snowflake_enabled}")
779
+
780
+ if snowflake_enabled:
781
+ if feedback_obj:
782
+ try:
783
+ logger.info("πŸ“€ SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
784
+ print("πŸ“€ SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
785
+
786
+ if save_to_snowflake(feedback_obj):
787
+ logger.info("βœ… SNOWFLAKE UI: Successfully saved to Snowflake")
788
+ print("βœ… SNOWFLAKE UI: Successfully saved to Snowflake")
789
+ st.success("βœ… Feedback also saved to Snowflake!")
790
+ else:
791
+ logger.warning("⚠️ SNOWFLAKE UI: Save failed")
792
+ print("⚠️ SNOWFLAKE UI: Save failed")
793
+ st.warning("⚠️ Snowflake save failed, but local save succeeded")
794
+ except Exception as e:
795
+ logger.error(f"❌ SNOWFLAKE UI ERROR: {e}")
796
+ print(f"❌ SNOWFLAKE UI ERROR: {e}")
797
+ traceback.print_exc()
798
+ st.warning(f"⚠️ Could not save to Snowflake: {e}")
799
+ else:
800
+ logger.warning("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)")
801
+ print("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)")
802
+ st.warning("⚠️ Skipping Snowflake save (feedback object not created)")
803
+ else:
804
+ logger.info("πŸ’‘ SNOWFLAKE UI: Integration disabled")
805
+ print("πŸ’‘ SNOWFLAKE UI: Integration disabled")
806
+ st.info("πŸ’‘ Snowflake integration disabled (set SNOWFLAKE_ENABLED=true to enable)")
807
+ except NameError as e:
808
+ traceback.print_exc()
809
+ logger.error(f"❌ NameError in Snowflake save: {e}")
810
+ print(f"❌ NameError in Snowflake save: {e}")
811
+ st.warning(f"⚠️ Snowflake save error: {e}")
812
+ except Exception as e:
813
+ logger.error(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}")
814
+ print(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}")
815
+ st.warning(f"⚠️ Snowflake save error: {e}")
816
+
817
+ # Mark feedback as submitted to prevent resubmission
818
+ st.session_state.feedback_submitted = True
819
+
820
+ print("=" * 80)
821
+ print(f"βœ… FEEDBACK SUBMISSION: Completed successfully")
822
+ print("=" * 80)
823
+
824
+ # Log file location
825
+ st.info(f"πŸ“ Feedback saved to: {feedback_file}")
826
+
827
+ except Exception as e:
828
+ print(f"❌ FEEDBACK SUBMISSION: Error saving feedback: {e}")
829
+ print(f"❌ FEEDBACK SUBMISSION: Error type: {type(e).__name__}")
830
+ traceback.print_exc()
831
+ st.error(f"❌ Error saving feedback: {e}")
832
+ st.write(f"Debug error: {str(e)}")
833
+ else:
834
+ # Feedback already submitted - show success message and reset option
835
+ st.success("βœ… Feedback already submitted for this conversation!")
836
+ col1, col2 = st.columns([1, 1])
837
+ with col1:
838
+ if st.button("πŸ”„ Submit New Feedback", key="new_feedback_button", use_container_width=True):
839
+ try:
840
+ st.session_state.feedback_submitted = False
841
+ st.rerun()
842
+ except Exception as e:
843
+ # Handle any Streamlit API exceptions gracefully
844
+ logger.error(f"Error resetting feedback state: {e}")
845
+ st.error(f"Error resetting feedback. Please refresh the page.")
846
+ with col2:
847
+ if st.button("πŸ“‹ View Conversation", key="view_conversation_button", use_container_width=True):
848
+ # Scroll to conversation - this is handled by the auto-scroll at bottom
849
+ pass
850
 
851
  # Display retrieval history stats
852
  if st.session_state.rag_retrieval_history:
 
868
  })
869
  st.markdown("---")
870
 
871
+ # Example Questions Section
872
+ st.markdown("---")
873
+ st.markdown(
874
+ "<h3 class='example-questions-header'>πŸ’‘ Example Questions</h3>",
875
+ unsafe_allow_html=True
876
+ )
877
+ st.markdown(
878
+ "<p class='example-questions-description'>Click on any question below to use it, or modify the editable examples:</p>",
879
+ unsafe_allow_html=True
880
+ )
881
+
882
+ # Initialize example question state
883
+ if 'custom_question_1' not in st.session_state:
884
+ st.session_state.custom_question_1 = "How were administrative costs managed in the PDM implementation, and what issues arose with budget execution regarding staff salaries?"
885
+ if 'custom_question_2' not in st.session_state:
886
+ st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
887
+
888
+ # Question 1: Filename insights (fixed, clickable)
889
+ st.markdown("#### πŸ“„ Question 1: List insights from a specific file")
890
+ col1, col2 = st.columns([3, 1])
891
+ with col1:
892
+ example_q1 = "List couple of insights from the filename."
893
+ st.markdown(f"**Example:** `{example_q1}`")
894
+ st.info("πŸ’‘ **Filter to apply:** Select a Filename from the sidebar panel before asking this question.")
895
+ with col2:
896
+ if st.button("πŸ“‹ Use This Question", key="use_example_1", use_container_width=True):
897
+ st.session_state.pending_question = example_q1
898
+ st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
899
+ st.rerun()
900
+
901
+ st.markdown("---")
902
+
903
+ # Questions 2 & 3: Editable examples
904
+ st.markdown("#### ✏️ Customizable Questions (Edit and use)")
905
+
906
+ # Question 2
907
+ # st.markdown("**Question 2:**")
908
+ custom_q1 = st.text_area(
909
+ "Edit question 2:",
910
+ value=st.session_state.custom_question_1,
911
+ height=80,
912
+ key="edit_question_2",
913
+ help="Modify this question to fit your needs, then click 'Use This Question'"
914
+ )
915
+ col1, col2 = st.columns([1, 4])
916
+ with col1:
917
+ if st.button("πŸ“‹ Use Question 2", key="use_custom_1", use_container_width=True):
918
+ if custom_q1.strip():
919
+ st.session_state.pending_question = custom_q1.strip()
920
+ st.session_state.custom_question_1 = custom_q1.strip()
921
+ st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
922
+ st.rerun()
923
+ else:
924
+ st.warning("Please enter a question first!")
925
+ with col2:
926
+ st.caption("πŸ’‘ Tip: Add specific details like dates, names, or amounts to get more precise answers")
927
+
928
+ st.info("πŸ’‘ **Filter to apply:** Select District(s) and Year(s) sidebar panel before asking this question.")
929
+
930
+ st.markdown("---")
931
+
932
+ # Question 3
933
+ # st.markdown("**Question 3:**")
934
+ custom_q2 = st.text_area(
935
+ "Edit question 3:",
936
+ value=st.session_state.custom_question_2,
937
+ height=80,
938
+ key="edit_question_3",
939
+ help="Modify this question to fit your needs, then click 'Use This Question'"
940
+ )
941
+ col1, col2 = st.columns([1, 4])
942
+ with col1:
943
+ if st.button("πŸ“‹ Use Question 3", key="use_custom_2", use_container_width=True):
944
+ if custom_q2.strip():
945
+ st.session_state.pending_question = custom_q2.strip()
946
+ st.session_state.custom_question_2 = custom_q2.strip()
947
+ st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
948
+ st.rerun()
949
+ else:
950
+ st.warning("Please enter a question first!")
951
+ with col2:
952
+ st.caption("πŸ’‘ Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
953
+
954
+
955
+ # Store selected question for next render (handled in input section above)
956
+ # This ensures the question populates the input field correctly
957
+
958
  # Auto-scroll to bottom
959
  st.markdown("""
960
  <script>
smart_chatbot.py CHANGED
@@ -26,6 +26,7 @@ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
26
 
27
  from src.pipeline import PipelineManager
28
  from src.config.loader import load_config
 
29
 
30
 
31
  @dataclass
@@ -161,7 +162,7 @@ class IntelligentRAGChatbot:
161
 
162
  # Try to load district whitelist from filter_options.json
163
  try:
164
- fo = Path("filter_options.json")
165
  if fo.exists():
166
  with open(fo) as f:
167
  data = json.load(f)
@@ -174,7 +175,7 @@ class IntelligentRAGChatbot:
174
  except Exception:
175
  self.district_whitelist = self.available_metadata['districts']
176
 
177
- # Enrich whitelist from add_district_metadata.py if available
178
  try:
179
  from add_district_metadata import DistrictMetadataProcessor
180
  proc = DistrictMetadataProcessor()
@@ -195,7 +196,7 @@ class IntelligentRAGChatbot:
195
 
196
  # Get dynamic year list from filter_options.json
197
  try:
198
- fo = Path("filter_options.json")
199
  if fo.exists():
200
  with open(fo) as f:
201
  data = json.load(f)
 
26
 
27
  from src.pipeline import PipelineManager
28
  from src.config.loader import load_config
29
+ from src.config.paths import PROJECT_DIR
30
 
31
 
32
  @dataclass
 
162
 
163
  # Try to load district whitelist from filter_options.json
164
  try:
165
+ fo = PROJECT_DIR / "src" / "config" / "filter_options.json"
166
  if fo.exists():
167
  with open(fo) as f:
168
  data = json.load(f)
 
175
  except Exception:
176
  self.district_whitelist = self.available_metadata['districts']
177
 
178
+ # Enrich whitelist from add_district_metadata.py if available (optional module)
179
  try:
180
  from add_district_metadata import DistrictMetadataProcessor
181
  proc = DistrictMetadataProcessor()
 
196
 
197
  # Get dynamic year list from filter_options.json
198
  try:
199
+ fo = PROJECT_DIR / "src" / "config" / "filter_options.json"
200
  if fo.exists():
201
  with open(fo) as f:
202
  data = json.load(f)