Spaces:
Sleeping
Sleeping
Merge branch 'main' of https://huggingface.co/spaces/akryldigital/audit_assistant
Browse files- app.py +265 -241
- src/reporting/feedback_schema.py +36 -71
- src/reporting/snowflake_connector.py +67 -39
app.py
CHANGED
|
@@ -10,18 +10,19 @@ import uuid
|
|
| 10 |
import logging
|
| 11 |
import traceback
|
| 12 |
from pathlib import Path
|
| 13 |
-
from typing import List, Dict, Any
|
| 14 |
from collections import Counter
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
import streamlit as st
|
| 17 |
-
from langchain_core.messages import HumanMessage, AIMessage
|
| 18 |
import pandas as pd
|
|
|
|
| 19 |
import plotly.express as px
|
|
|
|
| 20 |
|
| 21 |
from multi_agent_chatbot import get_multi_agent_chatbot
|
| 22 |
from smart_chatbot import get_chatbot as get_smart_chatbot
|
| 23 |
-
from src.reporting.feedback_schema import create_feedback_from_dict
|
| 24 |
from src.reporting.snowflake_connector import save_to_snowflake
|
|
|
|
| 25 |
from src.config.paths import (
|
| 26 |
IS_DEPLOYED,
|
| 27 |
PROJECT_DIR,
|
|
@@ -98,6 +99,8 @@ st.markdown("""
|
|
| 98 |
color: #1f77b4;
|
| 99 |
text-align: center;
|
| 100 |
margin-bottom: 1rem;
|
|
|
|
|
|
|
| 101 |
}
|
| 102 |
|
| 103 |
.subtitle {
|
|
@@ -105,54 +108,8 @@ st.markdown("""
|
|
| 105 |
color: #666;
|
| 106 |
text-align: center;
|
| 107 |
margin-bottom: 2rem;
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
.example-questions-header {
|
| 111 |
-
text-align: center;
|
| 112 |
-
margin-bottom: 1rem;
|
| 113 |
-
}
|
| 114 |
-
|
| 115 |
-
.example-questions-description {
|
| 116 |
-
text-align: center;
|
| 117 |
-
color: #666;
|
| 118 |
-
margin-bottom: 2rem;
|
| 119 |
-
}
|
| 120 |
-
|
| 121 |
-
/* Hide ALL default Streamlit text input help messages about Enter key */
|
| 122 |
-
/* This is the key one - hides "Press Enter to apply" message inside input field */
|
| 123 |
-
div[data-testid="InputInstructions"],
|
| 124 |
-
span[data-testid="InputInstructions"],
|
| 125 |
-
*[data-testid="InputInstructions"] {
|
| 126 |
-
display: none !important;
|
| 127 |
-
visibility: hidden !important;
|
| 128 |
-
opacity: 0 !important;
|
| 129 |
-
height: 0 !important;
|
| 130 |
-
width: 0 !important;
|
| 131 |
-
overflow: hidden !important;
|
| 132 |
-
position: absolute !important;
|
| 133 |
-
left: -9999px !important;
|
| 134 |
-
}
|
| 135 |
-
|
| 136 |
-
/* Also hide other potential locations */
|
| 137 |
-
div[data-testid="stTextInput"] + div > small,
|
| 138 |
-
div[data-testid="stTextInput"] ~ div > small,
|
| 139 |
-
div[data-testid="stTextInputContainer"] + div > small,
|
| 140 |
-
div[data-testid="stTextInputContainer"] ~ div > small,
|
| 141 |
-
div[data-baseweb="input"] + div > small,
|
| 142 |
-
div[data-baseweb="input"] ~ div > small {
|
| 143 |
-
display: none !important;
|
| 144 |
-
visibility: hidden !important;
|
| 145 |
-
opacity: 0 !important;
|
| 146 |
-
height: 0 !important;
|
| 147 |
-
overflow: hidden !important;
|
| 148 |
-
}
|
| 149 |
-
|
| 150 |
-
/* Custom help text for input */
|
| 151 |
-
.input-help-text {
|
| 152 |
-
font-size: 0.85rem;
|
| 153 |
-
color: #666;
|
| 154 |
-
margin-top: 0.25rem;
|
| 155 |
-
text-align: left;
|
| 156 |
}
|
| 157 |
|
| 158 |
.session-info {
|
|
@@ -304,6 +261,114 @@ def serialize_documents(sources):
|
|
| 304 |
|
| 305 |
return serialized
|
| 306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
def extract_chunk_statistics(sources: List[Any]) -> Dict[str, Any]:
|
| 308 |
"""Extract statistics from retrieved chunks."""
|
| 309 |
if not sources:
|
|
@@ -483,11 +548,10 @@ def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieva
|
|
| 483 |
return
|
| 484 |
|
| 485 |
# Wrap in styled container
|
| 486 |
-
|
| 487 |
st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
|
| 488 |
-
=======
|
| 489 |
# st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
|
| 490 |
-
|
| 491 |
|
| 492 |
st.subheader(f"π {title}")
|
| 493 |
|
|
@@ -604,7 +668,7 @@ def main():
|
|
| 604 |
st.session_state.reset_conversation = False
|
| 605 |
st.rerun()
|
| 606 |
|
| 607 |
-
# Header -
|
| 608 |
st.markdown('<h1 class="main-header">π€ Intelligent Audit Report Chatbot</h1>', unsafe_allow_html=True)
|
| 609 |
st.markdown('<p class="subtitle">Ask questions about audit reports. Use the sidebar filters to narrow down your search!</p>', unsafe_allow_html=True)
|
| 610 |
|
|
@@ -631,21 +695,15 @@ def main():
|
|
| 631 |
|
| 632 |
2. **Leave filters empty** to search across all data
|
| 633 |
|
| 634 |
-
3. **Type your question** in the chat
|
| 635 |
|
| 636 |
-
4. **
|
| 637 |
|
| 638 |
#### π‘ Tips
|
| 639 |
|
| 640 |
- Use specific questions for better results
|
| 641 |
- Combine multiple filters for precise searches
|
| 642 |
-
- Check the "Retrieved Documents" tab to
|
| 643 |
-
|
| 644 |
-
#### π¬ Feedback Section
|
| 645 |
-
|
| 646 |
-
- Rate your experience (1-5 stars)
|
| 647 |
-
- Provide optional text feedback
|
| 648 |
-
- Located at the bottom of the page
|
| 649 |
|
| 650 |
#### β οΈ Important
|
| 651 |
|
|
@@ -670,13 +728,11 @@ def main():
|
|
| 670 |
help="Choose specific reports to search. When enabled, all other filters are ignored."
|
| 671 |
)
|
| 672 |
st.markdown('</div>', unsafe_allow_html=True)
|
| 673 |
-
|
| 674 |
-
st.markdown('---')
|
| 675 |
|
| 676 |
# Determine if filename filter is active
|
| 677 |
filename_mode = len(selected_filenames) > 0
|
| 678 |
# Sources filter
|
| 679 |
-
|
| 680 |
st.markdown('<div class="filter-title">π Sources</div>', unsafe_allow_html=True)
|
| 681 |
selected_sources = st.multiselect(
|
| 682 |
"Select sources:",
|
|
@@ -771,67 +827,6 @@ def main():
|
|
| 771 |
label_visibility="collapsed",
|
| 772 |
value=default_value if default_value else None
|
| 773 |
)
|
| 774 |
-
|
| 775 |
-
# Use JavaScript to specifically target and hide "Press Enter to apply" message
|
| 776 |
-
st.markdown("""
|
| 777 |
-
<script>
|
| 778 |
-
(function() {
|
| 779 |
-
// Hide InputInstructions element (contains "Press Enter to apply")
|
| 780 |
-
function hideInputInstructions() {
|
| 781 |
-
// Target the specific Streamlit element
|
| 782 |
-
const instructions = document.querySelector('[data-testid="InputInstructions"]');
|
| 783 |
-
if (instructions) {
|
| 784 |
-
instructions.style.display = 'none';
|
| 785 |
-
instructions.style.visibility = 'hidden';
|
| 786 |
-
instructions.style.opacity = '0';
|
| 787 |
-
instructions.style.height = '0';
|
| 788 |
-
instructions.style.width = '0';
|
| 789 |
-
instructions.style.overflow = 'hidden';
|
| 790 |
-
instructions.style.position = 'absolute';
|
| 791 |
-
instructions.style.left = '-9999px';
|
| 792 |
-
}
|
| 793 |
-
|
| 794 |
-
// Also search for any text containing "Press Enter" or "apply" inside input containers
|
| 795 |
-
const allElements = document.querySelectorAll('*');
|
| 796 |
-
allElements.forEach(el => {
|
| 797 |
-
const text = el.textContent || el.innerText || '';
|
| 798 |
-
if ((text.toLowerCase().includes('press enter') ||
|
| 799 |
-
text.toLowerCase().includes('enter to') ||
|
| 800 |
-
text.toLowerCase().includes('to apply')) &&
|
| 801 |
-
(el.tagName === 'SPAN' || el.tagName === 'DIV' || el.tagName === 'SMALL')) {
|
| 802 |
-
const style = window.getComputedStyle(el);
|
| 803 |
-
const fontSize = parseFloat(style.fontSize);
|
| 804 |
-
// Hide if it's small text (likely help text)
|
| 805 |
-
if (fontSize < 14 || el.hasAttribute('data-testid')) {
|
| 806 |
-
el.style.display = 'none';
|
| 807 |
-
el.style.visibility = 'hidden';
|
| 808 |
-
el.style.height = '0';
|
| 809 |
-
el.style.overflow = 'hidden';
|
| 810 |
-
}
|
| 811 |
-
}
|
| 812 |
-
});
|
| 813 |
-
}
|
| 814 |
-
|
| 815 |
-
// Run immediately and after delays to catch dynamic elements
|
| 816 |
-
hideInputInstructions();
|
| 817 |
-
setTimeout(hideInputInstructions, 50);
|
| 818 |
-
setTimeout(hideInputInstructions, 100);
|
| 819 |
-
setTimeout(hideInputInstructions, 500);
|
| 820 |
-
|
| 821 |
-
// Observe for new elements added by Streamlit
|
| 822 |
-
const observer = new MutationObserver(function(mutations) {
|
| 823 |
-
hideInputInstructions();
|
| 824 |
-
});
|
| 825 |
-
observer.observe(document.body, { childList: true, subtree: true, attributes: true });
|
| 826 |
-
})();
|
| 827 |
-
</script>
|
| 828 |
-
""", unsafe_allow_html=True)
|
| 829 |
-
|
| 830 |
-
# # Show custom help text below input - this replaces the default "Press Enter" message
|
| 831 |
-
# st.markdown(
|
| 832 |
-
# "<div class='input-help-text'>π‘ Press the <strong>Send</strong> button to submit your question</div>",
|
| 833 |
-
# unsafe_allow_html=True
|
| 834 |
-
# )
|
| 835 |
|
| 836 |
with col2:
|
| 837 |
send_button = st.button("Send", key="send_button", use_container_width=True)
|
|
@@ -934,8 +929,7 @@ def main():
|
|
| 934 |
# Count unique filenames
|
| 935 |
unique_filenames = set()
|
| 936 |
for doc in sources:
|
| 937 |
-
|
| 938 |
-
filename = metadata.get('filename', 'Unknown')
|
| 939 |
unique_filenames.add(filename)
|
| 940 |
|
| 941 |
st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
|
|
@@ -990,44 +984,6 @@ def main():
|
|
| 990 |
st.info("No documents were retrieved for the last query.")
|
| 991 |
else:
|
| 992 |
st.info("No documents have been retrieved yet. Start a conversation to see retrieved documents here.")
|
| 993 |
-
|
| 994 |
-
# Display retrieval history stats
|
| 995 |
-
st.markdown("---")
|
| 996 |
-
if st.session_state.rag_retrieval_history:
|
| 997 |
-
st.markdown("#### π Retrieval History")
|
| 998 |
-
st.markdown(f"This conversation has **{len(st.session_state.rag_retrieval_history)}** retrieval entries.")
|
| 999 |
-
|
| 1000 |
-
with st.expander(f"View {len(st.session_state.rag_retrieval_history)} retrieval entries", expanded=False):
|
| 1001 |
-
for idx, entry in enumerate(st.session_state.rag_retrieval_history, 1):
|
| 1002 |
-
with st.expander(f"Entry {idx}: {entry.get('rag_query_expansion', 'N/A')[:50]}...", expanded=False):
|
| 1003 |
-
st.markdown(f"**Query:** {entry.get('rag_query_expansion', 'N/A')}")
|
| 1004 |
-
st.markdown(f"**Documents Retrieved:** {len(entry.get('docs_retrieved', []))}")
|
| 1005 |
-
|
| 1006 |
-
# Show conversation up to this point
|
| 1007 |
-
conversation = entry.get('conversation_up_to', [])
|
| 1008 |
-
if conversation:
|
| 1009 |
-
st.markdown("**Conversation Context:**")
|
| 1010 |
-
for msg in conversation[-3:]: # Show last 3 messages
|
| 1011 |
-
role = msg.get('type', 'unknown')
|
| 1012 |
-
content = msg.get('content', '')[:200] + "..." if len(msg.get('content', '')) > 200 else msg.get('content', '')
|
| 1013 |
-
if role == 'human':
|
| 1014 |
-
st.markdown(f"- **You:** {content}")
|
| 1015 |
-
elif role == 'ai':
|
| 1016 |
-
st.markdown(f"- **Bot:** {content}")
|
| 1017 |
-
|
| 1018 |
-
# Show retrieved documents summary
|
| 1019 |
-
docs = entry.get('docs_retrieved', [])
|
| 1020 |
-
if docs:
|
| 1021 |
-
st.markdown("**Retrieved Documents:**")
|
| 1022 |
-
for doc_idx, doc in enumerate(docs[:5], 1): # Show first 5
|
| 1023 |
-
doc_meta = doc.get('metadata', {})
|
| 1024 |
-
filename = doc_meta.get('filename', 'Unknown')[:50]
|
| 1025 |
-
st.markdown(f"{doc_idx}. {filename}")
|
| 1026 |
-
if len(docs) > 5:
|
| 1027 |
-
st.markdown(f"... and {len(docs) - 5} more documents")
|
| 1028 |
-
else:
|
| 1029 |
-
st.markdown("---")
|
| 1030 |
-
st.info("π Retrieval history will appear here after you start asking questions.")
|
| 1031 |
|
| 1032 |
# Feedback Dashboard Section
|
| 1033 |
st.markdown("---")
|
|
@@ -1089,17 +1045,39 @@ def main():
|
|
| 1089 |
print("=" * 80)
|
| 1090 |
st.write("π **Debug: Feedback Data Being Submitted:**")
|
| 1091 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1092 |
# Create feedback data dictionary
|
| 1093 |
feedback_dict = {
|
| 1094 |
"open_ended_feedback": open_ended_feedback,
|
| 1095 |
"score": feedback_score,
|
| 1096 |
"is_feedback_about_last_retrieval": is_feedback_about_last_retrieval,
|
| 1097 |
-
"retrieved_data": st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else [],
|
| 1098 |
"conversation_id": st.session_state.conversation_id,
|
| 1099 |
"timestamp": time.time(),
|
| 1100 |
"message_count": len(st.session_state.messages),
|
| 1101 |
"has_retrievals": has_retrievals,
|
| 1102 |
-
"retrieval_count": len(st.session_state.rag_retrieval_history)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1103 |
}
|
| 1104 |
|
| 1105 |
print(f"π FEEDBACK SUBMISSION: Score={feedback_score}, Retrievals={len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0}")
|
|
@@ -1141,19 +1119,18 @@ def main():
|
|
| 1141 |
# Ensure parent directory exists before writing
|
| 1142 |
feedback_file.parent.mkdir(parents=True, mode=0o777, exist_ok=True)
|
| 1143 |
|
| 1144 |
-
# Save to local file
|
| 1145 |
print(f"πΎ FEEDBACK SAVE: Saving to local file: {feedback_file}")
|
| 1146 |
with open(feedback_file, 'w') as f:
|
| 1147 |
json.dump(feedback_data, f, indent=2, default=str)
|
| 1148 |
|
| 1149 |
print(f"β
FEEDBACK SAVE: Local file saved successfully")
|
| 1150 |
-
st.success("β
Thank you for your feedback! It has been saved locally.")
|
| 1151 |
-
st.balloons()
|
| 1152 |
|
| 1153 |
# Save to Snowflake if enabled and credentials available
|
| 1154 |
logger.info("π FEEDBACK SAVE: Starting Snowflake save process...")
|
| 1155 |
logger.info(f"π FEEDBACK SAVE: feedback_obj={'exists' if feedback_obj else 'None'}")
|
| 1156 |
|
|
|
|
| 1157 |
try:
|
| 1158 |
snowflake_enabled = os.getenv("SNOWFLAKE_ENABLED", "false").lower() == "true"
|
| 1159 |
logger.info(f"π SNOWFLAKE CHECK: enabled={snowflake_enabled}")
|
|
@@ -1164,36 +1141,39 @@ def main():
|
|
| 1164 |
logger.info("π€ SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
|
| 1165 |
print("π€ SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
|
| 1166 |
|
| 1167 |
-
|
|
|
|
| 1168 |
logger.info("β
SNOWFLAKE UI: Successfully saved to Snowflake")
|
| 1169 |
print("β
SNOWFLAKE UI: Successfully saved to Snowflake")
|
| 1170 |
-
st.success("β
Feedback also saved to Snowflake!")
|
| 1171 |
else:
|
| 1172 |
logger.warning("β οΈ SNOWFLAKE UI: Save failed")
|
| 1173 |
print("β οΈ SNOWFLAKE UI: Save failed")
|
| 1174 |
-
st.warning("β οΈ Snowflake save failed, but local save succeeded")
|
| 1175 |
except Exception as e:
|
| 1176 |
logger.error(f"β SNOWFLAKE UI ERROR: {e}")
|
| 1177 |
print(f"β SNOWFLAKE UI ERROR: {e}")
|
| 1178 |
traceback.print_exc()
|
| 1179 |
-
|
| 1180 |
else:
|
| 1181 |
logger.warning("β οΈ SNOWFLAKE UI: Skipping (feedback object not created)")
|
| 1182 |
print("β οΈ SNOWFLAKE UI: Skipping (feedback object not created)")
|
| 1183 |
-
|
| 1184 |
else:
|
| 1185 |
logger.info("π‘ SNOWFLAKE UI: Integration disabled")
|
| 1186 |
print("π‘ SNOWFLAKE UI: Integration disabled")
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
| 1190 |
-
logger.error(f"β NameError in Snowflake save: {e}")
|
| 1191 |
-
print(f"β NameError in Snowflake save: {e}")
|
| 1192 |
-
st.warning(f"β οΈ Snowflake save error: {e}")
|
| 1193 |
except Exception as e:
|
| 1194 |
logger.error(f"β Exception in Snowflake save: {type(e).__name__}: {e}")
|
| 1195 |
print(f"β Exception in Snowflake save: {type(e).__name__}: {e}")
|
| 1196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1197 |
|
| 1198 |
# Mark feedback as submitted to prevent resubmission
|
| 1199 |
st.session_state.feedback_submitted = True
|
|
@@ -1229,16 +1209,30 @@ def main():
|
|
| 1229 |
# Scroll to conversation - this is handled by the auto-scroll at bottom
|
| 1230 |
pass
|
| 1231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1232 |
# Example Questions Section
|
| 1233 |
st.markdown("---")
|
| 1234 |
-
st.markdown(
|
| 1235 |
-
|
| 1236 |
-
unsafe_allow_html=True
|
| 1237 |
-
)
|
| 1238 |
-
st.markdown(
|
| 1239 |
-
"<p class='example-questions-description'>Click on any question below to use it, or modify the editable examples:</p>",
|
| 1240 |
-
unsafe_allow_html=True
|
| 1241 |
-
)
|
| 1242 |
|
| 1243 |
# Initialize example question state
|
| 1244 |
if 'custom_question_1' not in st.session_state:
|
|
@@ -1261,53 +1255,56 @@ def main():
|
|
| 1261 |
|
| 1262 |
st.markdown("---")
|
| 1263 |
|
| 1264 |
-
# Questions 2 & 3: Editable examples
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
|
| 1268 |
-
|
| 1269 |
-
|
| 1270 |
-
|
| 1271 |
-
|
| 1272 |
-
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
-
|
| 1276 |
-
|
| 1277 |
-
|
| 1278 |
-
|
| 1279 |
-
)
|
| 1280 |
-
|
| 1281 |
-
|
| 1282 |
-
|
| 1283 |
-
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
|
| 1291 |
-
|
| 1292 |
-
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
-
|
| 1299 |
-
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
|
| 1303 |
-
|
| 1304 |
-
|
| 1305 |
-
|
| 1306 |
-
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
|
| 1310 |
-
|
|
|
|
|
|
|
|
|
|
| 1311 |
|
| 1312 |
|
| 1313 |
# Store selected question for next render (handled in input section above)
|
|
@@ -1407,5 +1404,32 @@ def main():
|
|
| 1407 |
</script>
|
| 1408 |
""", unsafe_allow_html=True)
|
| 1409 |
|
|
|
|
| 1410 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1411 |
main()
|
|
|
|
| 10 |
import logging
|
| 11 |
import traceback
|
| 12 |
from pathlib import Path
|
|
|
|
| 13 |
from collections import Counter
|
| 14 |
+
from typing import List, Dict, Any, Optional
|
| 15 |
+
|
| 16 |
|
|
|
|
|
|
|
| 17 |
import pandas as pd
|
| 18 |
+
import streamlit as st
|
| 19 |
import plotly.express as px
|
| 20 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
| 21 |
|
| 22 |
from multi_agent_chatbot import get_multi_agent_chatbot
|
| 23 |
from smart_chatbot import get_chatbot as get_smart_chatbot
|
|
|
|
| 24 |
from src.reporting.snowflake_connector import save_to_snowflake
|
| 25 |
+
from src.reporting.feedback_schema import create_feedback_from_dict
|
| 26 |
from src.config.paths import (
|
| 27 |
IS_DEPLOYED,
|
| 28 |
PROJECT_DIR,
|
|
|
|
| 99 |
color: #1f77b4;
|
| 100 |
text-align: center;
|
| 101 |
margin-bottom: 1rem;
|
| 102 |
+
width: 100%;
|
| 103 |
+
display: block;
|
| 104 |
}
|
| 105 |
|
| 106 |
.subtitle {
|
|
|
|
| 108 |
color: #666;
|
| 109 |
text-align: center;
|
| 110 |
margin-bottom: 2rem;
|
| 111 |
+
width: 100%;
|
| 112 |
+
display: block;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
}
|
| 114 |
|
| 115 |
.session-info {
|
|
|
|
| 261 |
|
| 262 |
return serialized
|
| 263 |
|
| 264 |
+
|
| 265 |
+
def extract_transcript(messages: List[Any]) -> List[Dict[str, str]]:
|
| 266 |
+
"""Extract transcript from messages - only user and bot messages, no extra metadata"""
|
| 267 |
+
transcript = []
|
| 268 |
+
for msg in messages:
|
| 269 |
+
if isinstance(msg, HumanMessage):
|
| 270 |
+
transcript.append({
|
| 271 |
+
"role": "user",
|
| 272 |
+
"content": str(msg.content) if hasattr(msg, 'content') else str(msg)
|
| 273 |
+
})
|
| 274 |
+
elif isinstance(msg, AIMessage):
|
| 275 |
+
transcript.append({
|
| 276 |
+
"role": "assistant",
|
| 277 |
+
"content": str(msg.content) if hasattr(msg, 'content') else str(msg)
|
| 278 |
+
})
|
| 279 |
+
return transcript
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def build_retrievals_structure(rag_retrieval_history: List[Dict[str, Any]], messages: List[Any]) -> List[Dict[str, Any]]:
|
| 283 |
+
"""Build retrievals structure from retrieval history"""
|
| 284 |
+
retrievals = []
|
| 285 |
+
|
| 286 |
+
for entry in rag_retrieval_history:
|
| 287 |
+
# Get the user message that triggered this retrieval
|
| 288 |
+
# The entry has conversation_up_to which includes messages up to that point
|
| 289 |
+
conversation_up_to = entry.get("conversation_up_to", [])
|
| 290 |
+
|
| 291 |
+
# Find the last user message in conversation_up_to (this is the trigger)
|
| 292 |
+
user_message_trigger = ""
|
| 293 |
+
for msg_dict in reversed(conversation_up_to):
|
| 294 |
+
if msg_dict.get("type") == "HumanMessage":
|
| 295 |
+
user_message_trigger = msg_dict.get("content", "")
|
| 296 |
+
break
|
| 297 |
+
|
| 298 |
+
# Fallback: if not found in conversation_up_to, get from actual messages
|
| 299 |
+
# This handles edge cases where conversation_up_to might be incomplete
|
| 300 |
+
if not user_message_trigger:
|
| 301 |
+
# Find which retrieval this is (0-indexed)
|
| 302 |
+
retrieval_idx = rag_retrieval_history.index(entry)
|
| 303 |
+
# The user message that triggered this retrieval is at position (retrieval_idx * 2)
|
| 304 |
+
# because each retrieval is preceded by: user message, bot response, user message, ...
|
| 305 |
+
# But we need to account for the fact that the first retrieval happens after the first user message
|
| 306 |
+
user_msgs = [msg for msg in messages if isinstance(msg, HumanMessage)]
|
| 307 |
+
if retrieval_idx < len(user_msgs):
|
| 308 |
+
user_message_trigger = str(user_msgs[retrieval_idx].content)
|
| 309 |
+
elif user_msgs:
|
| 310 |
+
# Fallback to last user message
|
| 311 |
+
user_message_trigger = str(user_msgs[-1].content)
|
| 312 |
+
|
| 313 |
+
# Get retrieved documents and truncate content to 100 chars
|
| 314 |
+
docs_retrieved = entry.get("docs_retrieved", [])
|
| 315 |
+
retrieved_docs = []
|
| 316 |
+
for doc in docs_retrieved:
|
| 317 |
+
doc_copy = doc.copy()
|
| 318 |
+
# Truncate content to 100 characters (keep all other fields)
|
| 319 |
+
if "content" in doc_copy:
|
| 320 |
+
doc_copy["content"] = doc_copy["content"][:100]
|
| 321 |
+
retrieved_docs.append(doc_copy)
|
| 322 |
+
|
| 323 |
+
retrievals.append({
|
| 324 |
+
"retrieved_docs": retrieved_docs,
|
| 325 |
+
"user_message_trigger": user_message_trigger
|
| 326 |
+
})
|
| 327 |
+
|
| 328 |
+
return retrievals
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def build_feedback_score_related_retrieval_docs(
|
| 332 |
+
is_feedback_about_last_retrieval: bool,
|
| 333 |
+
messages: List[Any],
|
| 334 |
+
rag_retrieval_history: List[Dict[str, Any]]
|
| 335 |
+
) -> Optional[Dict[str, Any]]:
|
| 336 |
+
"""Build feedback_score_related_retrieval_docs structure"""
|
| 337 |
+
if not rag_retrieval_history:
|
| 338 |
+
return None
|
| 339 |
+
|
| 340 |
+
# Get the relevant retrieval entry
|
| 341 |
+
if is_feedback_about_last_retrieval:
|
| 342 |
+
relevant_entry = rag_retrieval_history[-1]
|
| 343 |
+
else:
|
| 344 |
+
# If feedback is about all retrievals, use the last one as default
|
| 345 |
+
relevant_entry = rag_retrieval_history[-1]
|
| 346 |
+
|
| 347 |
+
# Get conversation up to that point
|
| 348 |
+
conversation_up_to = relevant_entry.get("conversation_up_to", [])
|
| 349 |
+
|
| 350 |
+
# Convert to transcript format (role/content)
|
| 351 |
+
conversation_up_to_point = []
|
| 352 |
+
for msg_dict in conversation_up_to:
|
| 353 |
+
if msg_dict.get("type") == "HumanMessage":
|
| 354 |
+
conversation_up_to_point.append({
|
| 355 |
+
"role": "user",
|
| 356 |
+
"content": msg_dict.get("content", "")
|
| 357 |
+
})
|
| 358 |
+
elif msg_dict.get("type") == "AIMessage":
|
| 359 |
+
conversation_up_to_point.append({
|
| 360 |
+
"role": "assistant",
|
| 361 |
+
"content": msg_dict.get("content", "")
|
| 362 |
+
})
|
| 363 |
+
|
| 364 |
+
# Get retrieved docs with full content (not truncated)
|
| 365 |
+
retrieved_docs = relevant_entry.get("docs_retrieved", [])
|
| 366 |
+
|
| 367 |
+
return {
|
| 368 |
+
"conversation_up_to_point": conversation_up_to_point,
|
| 369 |
+
"retrieved_docs": retrieved_docs
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
def extract_chunk_statistics(sources: List[Any]) -> Dict[str, Any]:
|
| 373 |
"""Extract statistics from retrieved chunks."""
|
| 374 |
if not sources:
|
|
|
|
| 548 |
return
|
| 549 |
|
| 550 |
# Wrap in styled container
|
| 551 |
+
|
| 552 |
st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
|
|
|
|
| 553 |
# st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
|
| 554 |
+
|
| 555 |
|
| 556 |
st.subheader(f"π {title}")
|
| 557 |
|
|
|
|
| 668 |
st.session_state.reset_conversation = False
|
| 669 |
st.rerun()
|
| 670 |
|
| 671 |
+
# Header - fully center aligned
|
| 672 |
st.markdown('<h1 class="main-header">π€ Intelligent Audit Report Chatbot</h1>', unsafe_allow_html=True)
|
| 673 |
st.markdown('<p class="subtitle">Ask questions about audit reports. Use the sidebar filters to narrow down your search!</p>', unsafe_allow_html=True)
|
| 674 |
|
|
|
|
| 695 |
|
| 696 |
2. **Leave filters empty** to search across all data
|
| 697 |
|
| 698 |
+
3. **Type your question** in the chat input at the bottom
|
| 699 |
|
| 700 |
+
4. **Click "Send"** to submit your question
|
| 701 |
|
| 702 |
#### π‘ Tips
|
| 703 |
|
| 704 |
- Use specific questions for better results
|
| 705 |
- Combine multiple filters for precise searches
|
| 706 |
+
- Check the "Retrieved Documents" tab to see source material
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
|
| 708 |
#### β οΈ Important
|
| 709 |
|
|
|
|
| 728 |
help="Choose specific reports to search. When enabled, all other filters are ignored."
|
| 729 |
)
|
| 730 |
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
|
|
|
|
| 731 |
|
| 732 |
# Determine if filename filter is active
|
| 733 |
filename_mode = len(selected_filenames) > 0
|
| 734 |
# Sources filter
|
| 735 |
+
st.markdown('<div class="filter-section">', unsafe_allow_html=True)
|
| 736 |
st.markdown('<div class="filter-title">π Sources</div>', unsafe_allow_html=True)
|
| 737 |
selected_sources = st.multiselect(
|
| 738 |
"Select sources:",
|
|
|
|
| 827 |
label_visibility="collapsed",
|
| 828 |
value=default_value if default_value else None
|
| 829 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 830 |
|
| 831 |
with col2:
|
| 832 |
send_button = st.button("Send", key="send_button", use_container_width=True)
|
|
|
|
| 929 |
# Count unique filenames
|
| 930 |
unique_filenames = set()
|
| 931 |
for doc in sources:
|
| 932 |
+
filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
|
|
|
|
| 933 |
unique_filenames.add(filename)
|
| 934 |
|
| 935 |
st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
|
|
|
|
| 984 |
st.info("No documents were retrieved for the last query.")
|
| 985 |
else:
|
| 986 |
st.info("No documents have been retrieved yet. Start a conversation to see retrieved documents here.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 987 |
|
| 988 |
# Feedback Dashboard Section
|
| 989 |
st.markdown("---")
|
|
|
|
| 1045 |
print("=" * 80)
|
| 1046 |
st.write("π **Debug: Feedback Data Being Submitted:**")
|
| 1047 |
|
| 1048 |
+
# Extract transcript from messages
|
| 1049 |
+
transcript = extract_transcript(st.session_state.messages)
|
| 1050 |
+
|
| 1051 |
+
# Build retrievals structure
|
| 1052 |
+
retrievals = build_retrievals_structure(
|
| 1053 |
+
st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else [],
|
| 1054 |
+
st.session_state.messages
|
| 1055 |
+
)
|
| 1056 |
+
|
| 1057 |
+
# Build feedback_score_related_retrieval_docs
|
| 1058 |
+
feedback_score_related_retrieval_docs = build_feedback_score_related_retrieval_docs(
|
| 1059 |
+
is_feedback_about_last_retrieval,
|
| 1060 |
+
st.session_state.messages,
|
| 1061 |
+
st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else []
|
| 1062 |
+
)
|
| 1063 |
+
|
| 1064 |
+
# Preserve old retrieved_data format for backward compatibility
|
| 1065 |
+
retrieved_data_old_format = st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else []
|
| 1066 |
+
|
| 1067 |
# Create feedback data dictionary
|
| 1068 |
feedback_dict = {
|
| 1069 |
"open_ended_feedback": open_ended_feedback,
|
| 1070 |
"score": feedback_score,
|
| 1071 |
"is_feedback_about_last_retrieval": is_feedback_about_last_retrieval,
|
|
|
|
| 1072 |
"conversation_id": st.session_state.conversation_id,
|
| 1073 |
"timestamp": time.time(),
|
| 1074 |
"message_count": len(st.session_state.messages),
|
| 1075 |
"has_retrievals": has_retrievals,
|
| 1076 |
+
"retrieval_count": len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0,
|
| 1077 |
+
"transcript": transcript,
|
| 1078 |
+
"retrievals": retrievals,
|
| 1079 |
+
"feedback_score_related_retrieval_docs": feedback_score_related_retrieval_docs,
|
| 1080 |
+
"retrieved_data": retrieved_data_old_format # Preserved old column
|
| 1081 |
}
|
| 1082 |
|
| 1083 |
print(f"π FEEDBACK SUBMISSION: Score={feedback_score}, Retrievals={len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0}")
|
|
|
|
| 1119 |
# Ensure parent directory exists before writing
|
| 1120 |
feedback_file.parent.mkdir(parents=True, mode=0o777, exist_ok=True)
|
| 1121 |
|
| 1122 |
+
# Save to local file first
|
| 1123 |
print(f"πΎ FEEDBACK SAVE: Saving to local file: {feedback_file}")
|
| 1124 |
with open(feedback_file, 'w') as f:
|
| 1125 |
json.dump(feedback_data, f, indent=2, default=str)
|
| 1126 |
|
| 1127 |
print(f"β
FEEDBACK SAVE: Local file saved successfully")
|
|
|
|
|
|
|
| 1128 |
|
| 1129 |
# Save to Snowflake if enabled and credentials available
|
| 1130 |
logger.info("π FEEDBACK SAVE: Starting Snowflake save process...")
|
| 1131 |
logger.info(f"π FEEDBACK SAVE: feedback_obj={'exists' if feedback_obj else 'None'}")
|
| 1132 |
|
| 1133 |
+
snowflake_success = False
|
| 1134 |
try:
|
| 1135 |
snowflake_enabled = os.getenv("SNOWFLAKE_ENABLED", "false").lower() == "true"
|
| 1136 |
logger.info(f"π SNOWFLAKE CHECK: enabled={snowflake_enabled}")
|
|
|
|
| 1141 |
logger.info("π€ SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
|
| 1142 |
print("π€ SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
|
| 1143 |
|
| 1144 |
+
snowflake_success = save_to_snowflake(feedback_obj)
|
| 1145 |
+
if snowflake_success:
|
| 1146 |
logger.info("β
SNOWFLAKE UI: Successfully saved to Snowflake")
|
| 1147 |
print("β
SNOWFLAKE UI: Successfully saved to Snowflake")
|
|
|
|
| 1148 |
else:
|
| 1149 |
logger.warning("β οΈ SNOWFLAKE UI: Save failed")
|
| 1150 |
print("β οΈ SNOWFLAKE UI: Save failed")
|
|
|
|
| 1151 |
except Exception as e:
|
| 1152 |
logger.error(f"β SNOWFLAKE UI ERROR: {e}")
|
| 1153 |
print(f"β SNOWFLAKE UI ERROR: {e}")
|
| 1154 |
traceback.print_exc()
|
| 1155 |
+
snowflake_success = False
|
| 1156 |
else:
|
| 1157 |
logger.warning("β οΈ SNOWFLAKE UI: Skipping (feedback object not created)")
|
| 1158 |
print("β οΈ SNOWFLAKE UI: Skipping (feedback object not created)")
|
| 1159 |
+
snowflake_success = False
|
| 1160 |
else:
|
| 1161 |
logger.info("π‘ SNOWFLAKE UI: Integration disabled")
|
| 1162 |
print("π‘ SNOWFLAKE UI: Integration disabled")
|
| 1163 |
+
# If Snowflake is disabled, consider it successful (local save only)
|
| 1164 |
+
snowflake_success = True
|
| 1165 |
+
|
|
|
|
|
|
|
|
|
|
| 1166 |
except Exception as e:
|
| 1167 |
logger.error(f"β Exception in Snowflake save: {type(e).__name__}: {e}")
|
| 1168 |
print(f"β Exception in Snowflake save: {type(e).__name__}: {e}")
|
| 1169 |
+
snowflake_success = False
|
| 1170 |
+
|
| 1171 |
+
# Only show success if Snowflake save succeeded (or if Snowflake is disabled)
|
| 1172 |
+
if snowflake_success:
|
| 1173 |
+
st.success("β
Thank you for your feedback! It has been saved successfully.")
|
| 1174 |
+
st.balloons()
|
| 1175 |
+
else:
|
| 1176 |
+
st.warning("β οΈ Feedback saved locally, but Snowflake save failed. Please check logs.")
|
| 1177 |
|
| 1178 |
# Mark feedback as submitted to prevent resubmission
|
| 1179 |
st.session_state.feedback_submitted = True
|
|
|
|
| 1209 |
# Scroll to conversation - this is handled by the auto-scroll at bottom
|
| 1210 |
pass
|
| 1211 |
|
| 1212 |
+
# Display retrieval history stats
|
| 1213 |
+
if st.session_state.rag_retrieval_history:
|
| 1214 |
+
st.markdown("---")
|
| 1215 |
+
st.markdown("#### π Retrieval History")
|
| 1216 |
+
|
| 1217 |
+
with st.expander(f"View {len(st.session_state.rag_retrieval_history)} retrieval entries", expanded=False):
|
| 1218 |
+
for idx, entry in enumerate(st.session_state.rag_retrieval_history, 1):
|
| 1219 |
+
st.markdown(f"**Retrieval #{idx}**")
|
| 1220 |
+
|
| 1221 |
+
# Display the actual RAG query
|
| 1222 |
+
rag_query_expansion = entry.get("rag_query_expansion", "No query available")
|
| 1223 |
+
st.code(rag_query_expansion, language="text")
|
| 1224 |
+
|
| 1225 |
+
# Display summary stats
|
| 1226 |
+
st.json({
|
| 1227 |
+
"conversation_length": len(entry.get("conversation_up_to", [])),
|
| 1228 |
+
"documents_retrieved": len(entry.get("docs_retrieved", []))
|
| 1229 |
+
})
|
| 1230 |
+
st.markdown("---")
|
| 1231 |
+
|
| 1232 |
# Example Questions Section
|
| 1233 |
st.markdown("---")
|
| 1234 |
+
st.markdown("### π‘ Example Questions")
|
| 1235 |
+
st.markdown("Click on any question below to use it, or modify the editable examples:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1236 |
|
| 1237 |
# Initialize example question state
|
| 1238 |
if 'custom_question_1' not in st.session_state:
|
|
|
|
| 1255 |
|
| 1256 |
st.markdown("---")
|
| 1257 |
|
| 1258 |
+
# Questions 2 & 3: Editable examples
|
| 1259 |
+
st.markdown("#### βοΈ Customizable Questions (Edit and use)")
|
| 1260 |
+
|
| 1261 |
+
# Question 2
|
| 1262 |
+
# st.markdown("**Question 2:**")
|
| 1263 |
+
custom_q1 = st.text_area(
|
| 1264 |
+
"Edit question 2:",
|
| 1265 |
+
value=st.session_state.custom_question_1,
|
| 1266 |
+
height=80,
|
| 1267 |
+
key="edit_question_2",
|
| 1268 |
+
help="Modify this question to fit your needs, then click 'Use This Question'"
|
| 1269 |
+
)
|
| 1270 |
+
col1, col2 = st.columns([1, 4])
|
| 1271 |
+
with col1:
|
| 1272 |
+
if st.button("π Use Question 2", key="use_custom_1", use_container_width=True):
|
| 1273 |
+
if custom_q1.strip():
|
| 1274 |
+
st.session_state.pending_question = custom_q1.strip()
|
| 1275 |
+
st.session_state.custom_question_1 = custom_q1.strip()
|
| 1276 |
+
st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
|
| 1277 |
+
st.rerun()
|
| 1278 |
+
else:
|
| 1279 |
+
st.warning("Please enter a question first!")
|
| 1280 |
+
with col2:
|
| 1281 |
+
st.caption("π‘ Tip: Add specific details like dates, names, or amounts to get more precise answers")
|
| 1282 |
+
|
| 1283 |
+
st.info("π‘ **Filter to apply:** Select District(s) and Year(s) sidebar panel before asking this question.")
|
| 1284 |
+
|
| 1285 |
+
st.markdown("---")
|
| 1286 |
+
|
| 1287 |
+
# Question 3
|
| 1288 |
+
# st.markdown("**Question 3:**")
|
| 1289 |
+
custom_q2 = st.text_area(
|
| 1290 |
+
"Edit question 3:",
|
| 1291 |
+
value=st.session_state.custom_question_2,
|
| 1292 |
+
height=80,
|
| 1293 |
+
key="edit_question_3",
|
| 1294 |
+
help="Modify this question to fit your needs, then click 'Use This Question'"
|
| 1295 |
+
)
|
| 1296 |
+
col1, col2 = st.columns([1, 4])
|
| 1297 |
+
with col1:
|
| 1298 |
+
if st.button("π Use Question 3", key="use_custom_2", use_container_width=True):
|
| 1299 |
+
if custom_q2.strip():
|
| 1300 |
+
st.session_state.pending_question = custom_q2.strip()
|
| 1301 |
+
st.session_state.custom_question_2 = custom_q2.strip()
|
| 1302 |
+
st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
|
| 1303 |
+
st.rerun()
|
| 1304 |
+
else:
|
| 1305 |
+
st.warning("Please enter a question first!")
|
| 1306 |
+
with col2:
|
| 1307 |
+
st.caption("π‘ Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
|
| 1308 |
|
| 1309 |
|
| 1310 |
# Store selected question for next render (handled in input section above)
|
|
|
|
| 1404 |
</script>
|
| 1405 |
""", unsafe_allow_html=True)
|
| 1406 |
|
| 1407 |
+
|
| 1408 |
if __name__ == "__main__":
|
| 1409 |
+
# Check if running in Streamlit context
|
| 1410 |
+
try:
|
| 1411 |
+
from streamlit.runtime.scriptrunner import get_script_run_ctx
|
| 1412 |
+
if get_script_run_ctx() is None:
|
| 1413 |
+
# Not in Streamlit runtime - show helpful message
|
| 1414 |
+
print("=" * 80)
|
| 1415 |
+
print("β οΈ WARNING: This is a Streamlit app!")
|
| 1416 |
+
print("=" * 80)
|
| 1417 |
+
print("\nPlease run this app using:")
|
| 1418 |
+
print(" streamlit run app.py")
|
| 1419 |
+
print("\nNot: python app.py")
|
| 1420 |
+
print("\nThe app will not function correctly when run with 'python app.py'")
|
| 1421 |
+
print("=" * 80)
|
| 1422 |
+
import sys
|
| 1423 |
+
sys.exit(1)
|
| 1424 |
+
except ImportError:
|
| 1425 |
+
# Streamlit not installed or not in Streamlit context
|
| 1426 |
+
print("=" * 80)
|
| 1427 |
+
print("β οΈ WARNING: This is a Streamlit app!")
|
| 1428 |
+
print("=" * 80)
|
| 1429 |
+
print("\nPlease run this app using:")
|
| 1430 |
+
print(" streamlit run app.py")
|
| 1431 |
+
print("\nNot: python app.py")
|
| 1432 |
+
print("=" * 80)
|
| 1433 |
+
import sys
|
| 1434 |
+
sys.exit(1)
|
| 1435 |
main()
|
src/reporting/feedback_schema.py
CHANGED
|
@@ -4,10 +4,12 @@ Feedback Schema for RAG Chatbot
|
|
| 4 |
This module defines dataclasses for feedback data structures
|
| 5 |
and provides Snowflake schema generation.
|
| 6 |
"""
|
| 7 |
-
|
|
|
|
| 8 |
from dataclasses import dataclass, asdict, field
|
| 9 |
from typing import List, Optional, Dict, Any, Union
|
| 10 |
-
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
@dataclass
|
|
@@ -39,34 +41,20 @@ class UserFeedback:
|
|
| 39 |
open_ended_feedback: Optional[str]
|
| 40 |
score: int
|
| 41 |
is_feedback_about_last_retrieval: bool
|
| 42 |
-
retrieved_data: List[RetrievalEntry]
|
| 43 |
conversation_id: str
|
| 44 |
timestamp: float
|
| 45 |
message_count: int
|
| 46 |
has_retrievals: bool
|
| 47 |
retrieval_count: int
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
| 50 |
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
| 51 |
|
| 52 |
def to_dict(self) -> Dict[str, Any]:
|
| 53 |
"""Convert to dictionary with nested data structures"""
|
| 54 |
result = asdict(self)
|
| 55 |
-
# Handle nested objects
|
| 56 |
-
if self.retrieved_data:
|
| 57 |
-
result['retrieved_data'] = [self._serialize_retrieval_entry(entry) for entry in self.retrieved_data]
|
| 58 |
-
return result
|
| 59 |
-
|
| 60 |
-
def _serialize_retrieval_entry(self, entry: RetrievalEntry) -> Dict[str, Any]:
|
| 61 |
-
"""Serialize retrieval entry to dict"""
|
| 62 |
-
# If raw data exists, use it (it's already properly formatted)
|
| 63 |
-
if hasattr(entry, '_raw_data') and entry._raw_data:
|
| 64 |
-
return entry._raw_data
|
| 65 |
-
|
| 66 |
-
# Otherwise, serialize the dataclass
|
| 67 |
-
result = asdict(entry)
|
| 68 |
-
if entry.documents_retrieved:
|
| 69 |
-
result['documents_retrieved'] = [asdict(doc) for doc in entry.documents_retrieved]
|
| 70 |
return result
|
| 71 |
|
| 72 |
def to_snowflake_schema(self) -> Dict[str, Any]:
|
|
@@ -81,28 +69,28 @@ class UserFeedback:
|
|
| 81 |
"message_count": "INTEGER",
|
| 82 |
"has_retrievals": "BOOLEAN",
|
| 83 |
"retrieval_count": "INTEGER",
|
| 84 |
-
"
|
| 85 |
-
"
|
|
|
|
|
|
|
| 86 |
"created_at": "TIMESTAMP_NTZ",
|
| 87 |
-
"
|
| 88 |
-
#
|
| 89 |
-
# [
|
| 90 |
# {
|
| 91 |
-
# "
|
| 92 |
-
# "
|
| 93 |
-
# "timestamp": 1234567890,
|
| 94 |
-
# "docs_retrieved": [
|
| 95 |
-
# {"filename": "...", "page": 14, "score": 0.95, ...},
|
| 96 |
-
# ...
|
| 97 |
-
# ]
|
| 98 |
# },
|
| 99 |
# ...
|
| 100 |
# ]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
}
|
| 102 |
return schema
|
| 103 |
|
| 104 |
@classmethod
|
| 105 |
-
def get_snowflake_create_table_sql(cls, table_name: str = "
|
| 106 |
"""Generate CREATE TABLE SQL for Snowflake"""
|
| 107 |
schema = cls.to_snowflake_schema(None)
|
| 108 |
|
|
@@ -117,16 +105,13 @@ class UserFeedback:
|
|
| 117 |
sql = f"""CREATE TABLE IF NOT EXISTS {table_name} (
|
| 118 |
{columns_str},
|
| 119 |
PRIMARY KEY (feedback_id)
|
| 120 |
-
)
|
| 121 |
-
|
| 122 |
-
--
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
--
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
-- Create index on score for feedback analysis
|
| 129 |
-
CREATE INDEX IF NOT EXISTS idx_feedback_score ON {table_name} (score);
|
| 130 |
"""
|
| 131 |
return sql
|
| 132 |
|
|
@@ -150,47 +135,27 @@ DOCUMENT_SCHEMA = {
|
|
| 150 |
}
|
| 151 |
|
| 152 |
|
| 153 |
-
def generate_snowflake_schema_sql() -> str:
|
| 154 |
"""Generate complete Snowflake schema SQL for feedback system"""
|
| 155 |
-
|
|
|
|
|
|
|
| 156 |
|
| 157 |
|
| 158 |
def create_feedback_from_dict(data: Dict[str, Any]) -> UserFeedback:
|
| 159 |
"""Create UserFeedback instance from dictionary"""
|
| 160 |
-
# Parse retrieved_data if present
|
| 161 |
-
retrieved_data = []
|
| 162 |
-
if "retrieved_data" in data and data["retrieved_data"]:
|
| 163 |
-
for entry_dict in data.get("retrieved_data", []):
|
| 164 |
-
# Map the actual structure from rag_retrieval_history
|
| 165 |
-
# Entry has: conversation_up_to, rag_query_expansion, docs_retrieved
|
| 166 |
-
try:
|
| 167 |
-
# Try to map to expected structure
|
| 168 |
-
entry = RetrievalEntry(
|
| 169 |
-
rag_query=entry_dict.get("rag_query_expansion", ""),
|
| 170 |
-
documents_retrieved=[], # Empty for now, will store as raw data
|
| 171 |
-
conversation_length=len(entry_dict.get("conversation_up_to", [])),
|
| 172 |
-
filters_applied=None,
|
| 173 |
-
timestamp=entry_dict.get("timestamp", None)
|
| 174 |
-
)
|
| 175 |
-
# Store raw data in the entry
|
| 176 |
-
entry._raw_data = entry_dict # Store original for preservation
|
| 177 |
-
retrieved_data.append(entry)
|
| 178 |
-
except Exception as e:
|
| 179 |
-
# If mapping fails, store as-is without strict typing
|
| 180 |
-
pass
|
| 181 |
-
|
| 182 |
return UserFeedback(
|
| 183 |
feedback_id=data.get("feedback_id", f"feedback_{data.get('timestamp', 'unknown')}"),
|
| 184 |
open_ended_feedback=data.get("open_ended_feedback"),
|
| 185 |
score=data["score"],
|
| 186 |
is_feedback_about_last_retrieval=data["is_feedback_about_last_retrieval"],
|
| 187 |
-
retrieved_data=retrieved_data,
|
| 188 |
conversation_id=data["conversation_id"],
|
| 189 |
timestamp=data["timestamp"],
|
| 190 |
message_count=data["message_count"],
|
| 191 |
has_retrievals=data["has_retrievals"],
|
| 192 |
retrieval_count=data["retrieval_count"],
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
| 195 |
)
|
| 196 |
-
|
|
|
|
| 4 |
This module defines dataclasses for feedback data structures
|
| 5 |
and provides Snowflake schema generation.
|
| 6 |
"""
|
| 7 |
+
import os
|
| 8 |
+
from datetime import datetime
|
| 9 |
from dataclasses import dataclass, asdict, field
|
| 10 |
from typing import List, Optional, Dict, Any, Union
|
| 11 |
+
|
| 12 |
+
|
| 13 |
|
| 14 |
|
| 15 |
@dataclass
|
|
|
|
| 41 |
open_ended_feedback: Optional[str]
|
| 42 |
score: int
|
| 43 |
is_feedback_about_last_retrieval: bool
|
|
|
|
| 44 |
conversation_id: str
|
| 45 |
timestamp: float
|
| 46 |
message_count: int
|
| 47 |
has_retrievals: bool
|
| 48 |
retrieval_count: int
|
| 49 |
+
transcript: List[Dict[str, str]] # List of {"role": "user"/"assistant", "content": "..."}
|
| 50 |
+
retrievals: List[Dict[str, Any]] # List of retrieval objects with retrieved_docs and user_message_trigger
|
| 51 |
+
feedback_score_related_retrieval_docs: Optional[Dict[str, Any]] = None # Conversation subset + retrieved docs
|
| 52 |
+
retrieved_data: Optional[List[Dict[str, Any]]] = None # Preserved old column for backward compatibility
|
| 53 |
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
| 54 |
|
| 55 |
def to_dict(self) -> Dict[str, Any]:
|
| 56 |
"""Convert to dictionary with nested data structures"""
|
| 57 |
result = asdict(self)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
return result
|
| 59 |
|
| 60 |
def to_snowflake_schema(self) -> Dict[str, Any]:
|
|
|
|
| 69 |
"message_count": "INTEGER",
|
| 70 |
"has_retrievals": "BOOLEAN",
|
| 71 |
"retrieval_count": "INTEGER",
|
| 72 |
+
"transcript": "VARCHAR(16777216)", # JSON string of ARRAY of {"role": "user"/"assistant", "content": "..."}
|
| 73 |
+
"retrievals": "VARCHAR(16777216)", # JSON string of ARRAY of retrieval objects
|
| 74 |
+
"feedback_score_related_retrieval_docs": "VARCHAR(16777216)", # JSON string of OBJECT with conversation subset + retrieved docs
|
| 75 |
+
"retrieved_data": "VARCHAR(16777216)", # JSON string - preserved old column for backward compatibility
|
| 76 |
"created_at": "TIMESTAMP_NTZ",
|
| 77 |
+
# transcript structure: [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}, ...]
|
| 78 |
+
# retrievals structure: [
|
|
|
|
| 79 |
# {
|
| 80 |
+
# "retrieved_docs": [{"content": "...", "metadata": {...}, ...}], # content truncated to 100 chars
|
| 81 |
+
# "user_message_trigger": "final user message that triggered this retrieval"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# },
|
| 83 |
# ...
|
| 84 |
# ]
|
| 85 |
+
# feedback_score_related_retrieval_docs structure: {
|
| 86 |
+
# "conversation_up_to_point": [{"role": "user", "content": "..."}, ...], # subset of transcript
|
| 87 |
+
# "retrieved_docs": [{"content": "...", "metadata": {...}, ...}] # full chunks with all info
|
| 88 |
+
# }
|
| 89 |
}
|
| 90 |
return schema
|
| 91 |
|
| 92 |
@classmethod
|
| 93 |
+
def get_snowflake_create_table_sql(cls, table_name: str = "USER_FEEDBACK_V3") -> str:
|
| 94 |
"""Generate CREATE TABLE SQL for Snowflake"""
|
| 95 |
schema = cls.to_snowflake_schema(None)
|
| 96 |
|
|
|
|
| 105 |
sql = f"""CREATE TABLE IF NOT EXISTS {table_name} (
|
| 106 |
{columns_str},
|
| 107 |
PRIMARY KEY (feedback_id)
|
| 108 |
+
)
|
| 109 |
+
CLUSTER BY (timestamp, conversation_id, score);
|
| 110 |
+
-- Note: Snowflake doesn't support traditional indexes on regular tables.
|
| 111 |
+
-- Instead, we use CLUSTER BY to optimize queries on these columns.
|
| 112 |
+
-- Snowflake automatically maintains clustering for efficient querying.
|
| 113 |
+
-- Note: transcript, retrievals, and feedback_score_related_retrieval_docs are stored as VARCHAR (JSON strings),
|
| 114 |
+
-- same approach as the old retrieved_data column. This allows easy storage and retrieval without VARIANT type complexity.
|
|
|
|
|
|
|
|
|
|
| 115 |
"""
|
| 116 |
return sql
|
| 117 |
|
|
|
|
| 135 |
}
|
| 136 |
|
| 137 |
|
| 138 |
+
def generate_snowflake_schema_sql(table_name: Optional[str] = None) -> str:
|
| 139 |
"""Generate complete Snowflake schema SQL for feedback system"""
|
| 140 |
+
if table_name is None:
|
| 141 |
+
table_name = os.getenv("SNOWFLAKE_FEEDBACK_TABLE", "USER_FEEDBACK_V3")
|
| 142 |
+
return UserFeedback.get_snowflake_create_table_sql(table_name)
|
| 143 |
|
| 144 |
|
| 145 |
def create_feedback_from_dict(data: Dict[str, Any]) -> UserFeedback:
|
| 146 |
"""Create UserFeedback instance from dictionary"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
return UserFeedback(
|
| 148 |
feedback_id=data.get("feedback_id", f"feedback_{data.get('timestamp', 'unknown')}"),
|
| 149 |
open_ended_feedback=data.get("open_ended_feedback"),
|
| 150 |
score=data["score"],
|
| 151 |
is_feedback_about_last_retrieval=data["is_feedback_about_last_retrieval"],
|
|
|
|
| 152 |
conversation_id=data["conversation_id"],
|
| 153 |
timestamp=data["timestamp"],
|
| 154 |
message_count=data["message_count"],
|
| 155 |
has_retrievals=data["has_retrievals"],
|
| 156 |
retrieval_count=data["retrieval_count"],
|
| 157 |
+
transcript=data.get("transcript", []),
|
| 158 |
+
retrievals=data.get("retrievals", []),
|
| 159 |
+
feedback_score_related_retrieval_docs=data.get("feedback_score_related_retrieval_docs"),
|
| 160 |
+
retrieved_data=data.get("retrieved_data")
|
| 161 |
)
|
|
|
src/reporting/snowflake_connector.py
CHANGED
|
@@ -8,8 +8,11 @@ import os
|
|
| 8 |
import json
|
| 9 |
import logging
|
| 10 |
from typing import Dict, Any, Optional
|
|
|
|
|
|
|
| 11 |
from src.reporting.feedback_schema import UserFeedback
|
| 12 |
|
|
|
|
| 13 |
# Try to import snowflake connector
|
| 14 |
try:
|
| 15 |
import snowflake.connector
|
|
@@ -79,12 +82,16 @@ class SnowflakeFeedbackConnector:
|
|
| 79 |
self._connection.close()
|
| 80 |
print("β
Disconnected from Snowflake")
|
| 81 |
|
| 82 |
-
def insert_feedback(self, feedback: UserFeedback) -> bool:
|
| 83 |
"""Insert a single feedback record into Snowflake"""
|
| 84 |
logger.info("=" * 80)
|
| 85 |
logger.info("π SNOWFLAKE INSERT: Starting feedback insertion process")
|
| 86 |
logger.info(f"π Feedback ID: {feedback.feedback_id}")
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
if not self._connection:
|
| 89 |
logger.error("β Not connected to Snowflake. Call connect() first.")
|
| 90 |
raise RuntimeError("Not connected to Snowflake. Call connect() first.")
|
|
@@ -131,38 +138,53 @@ class SnowflakeFeedbackConnector:
|
|
| 131 |
logger.error(f"β Could not set context: {e}")
|
| 132 |
raise
|
| 133 |
|
| 134 |
-
# Prepare data
|
| 135 |
-
logger.info("π§ DATA PREPARATION: Preparing
|
| 136 |
-
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
-
#
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
else:
|
| 148 |
-
|
| 149 |
-
logger.info(" -
|
| 150 |
-
retrieved_data = retrieved_data_raw
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
-
#
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
| 160 |
else:
|
| 161 |
-
logger.info(f" - Retrieved data is None, using NULL")
|
| 162 |
retrieved_data_for_db = None
|
|
|
|
| 163 |
|
| 164 |
-
# Build SQL with
|
| 165 |
-
|
|
|
|
| 166 |
feedback_id,
|
| 167 |
open_ended_feedback,
|
| 168 |
score,
|
|
@@ -172,23 +194,25 @@ class SnowflakeFeedbackConnector:
|
|
| 172 |
message_count,
|
| 173 |
has_retrievals,
|
| 174 |
retrieval_count,
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
retrieved_data
|
|
|
|
| 179 |
) VALUES (
|
| 180 |
%(feedback_id)s, %(open_ended_feedback)s, %(score)s, %(is_feedback_about_last_retrieval)s,
|
| 181 |
%(conversation_id)s, %(timestamp)s, %(message_count)s, %(has_retrievals)s,
|
| 182 |
-
%(retrieval_count)s, %(
|
| 183 |
-
%(retrieved_data)s
|
| 184 |
)"""
|
| 185 |
|
| 186 |
logger.info("π SQL PREPARATION: Building INSERT statement...")
|
| 187 |
-
logger.info(f" - Target table:
|
| 188 |
logger.info(f" - Database: {self.database}")
|
| 189 |
logger.info(f" - Schema: {self.schema}")
|
| 190 |
|
| 191 |
# Prepare parameters
|
|
|
|
| 192 |
params = {
|
| 193 |
'feedback_id': feedback.feedback_id,
|
| 194 |
'open_ended_feedback': feedback.open_ended_feedback,
|
|
@@ -199,10 +223,11 @@ class SnowflakeFeedbackConnector:
|
|
| 199 |
'message_count': feedback.message_count,
|
| 200 |
'has_retrievals': feedback.has_retrievals,
|
| 201 |
'retrieval_count': feedback.retrieval_count,
|
| 202 |
-
'
|
| 203 |
-
'
|
| 204 |
-
'
|
| 205 |
-
'retrieved_data': retrieved_data_for_db
|
|
|
|
| 206 |
}
|
| 207 |
|
| 208 |
# Execute insert
|
|
@@ -265,12 +290,16 @@ def get_snowflake_connector_from_env() -> Optional[SnowflakeFeedbackConnector]:
|
|
| 265 |
)
|
| 266 |
|
| 267 |
|
| 268 |
-
def save_to_snowflake(feedback: UserFeedback) -> bool:
|
| 269 |
"""Helper function to save feedback to Snowflake"""
|
| 270 |
logger.info("=" * 80)
|
| 271 |
logger.info("π΅ SNOWFLAKE SAVE: Starting save process")
|
| 272 |
logger.info(f"π Feedback ID: {feedback.feedback_id}")
|
| 273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
connector = get_snowflake_connector_from_env()
|
| 275 |
|
| 276 |
if not connector:
|
|
@@ -285,7 +314,7 @@ def save_to_snowflake(feedback: UserFeedback) -> bool:
|
|
| 285 |
logger.info("β
SNOWFLAKE SAVE: Connection established")
|
| 286 |
|
| 287 |
logger.info("π₯ SNOWFLAKE SAVE: Attempting to insert feedback...")
|
| 288 |
-
success = connector.insert_feedback(feedback)
|
| 289 |
|
| 290 |
logger.info("π SNOWFLAKE SAVE: Disconnecting...")
|
| 291 |
connector.disconnect()
|
|
@@ -302,4 +331,3 @@ def save_to_snowflake(feedback: UserFeedback) -> bool:
|
|
| 302 |
logger.error(f" - Error: {e}")
|
| 303 |
logger.info("=" * 80)
|
| 304 |
return False
|
| 305 |
-
|
|
|
|
| 8 |
import json
|
| 9 |
import logging
|
| 10 |
from typing import Dict, Any, Optional
|
| 11 |
+
|
| 12 |
+
|
| 13 |
from src.reporting.feedback_schema import UserFeedback
|
| 14 |
|
| 15 |
+
|
| 16 |
# Try to import snowflake connector
|
| 17 |
try:
|
| 18 |
import snowflake.connector
|
|
|
|
| 82 |
self._connection.close()
|
| 83 |
print("β
Disconnected from Snowflake")
|
| 84 |
|
| 85 |
+
def insert_feedback(self, feedback: UserFeedback, table_name: Optional[str] = None) -> bool:
|
| 86 |
"""Insert a single feedback record into Snowflake"""
|
| 87 |
logger.info("=" * 80)
|
| 88 |
logger.info("π SNOWFLAKE INSERT: Starting feedback insertion process")
|
| 89 |
logger.info(f"π Feedback ID: {feedback.feedback_id}")
|
| 90 |
|
| 91 |
+
# Get table name from parameter, env var, or default
|
| 92 |
+
if table_name is None:
|
| 93 |
+
table_name = os.getenv("SNOWFLAKE_FEEDBACK_TABLE", "USER_FEEDBACK_V3")
|
| 94 |
+
|
| 95 |
if not self._connection:
|
| 96 |
logger.error("β Not connected to Snowflake. Call connect() first.")
|
| 97 |
raise RuntimeError("Not connected to Snowflake. Call connect() first.")
|
|
|
|
| 138 |
logger.error(f"β Could not set context: {e}")
|
| 139 |
raise
|
| 140 |
|
| 141 |
+
# Prepare data - convert to JSON strings for VARIANT columns (same approach as old retrieved_data)
|
| 142 |
+
logger.info("π§ DATA PREPARATION: Preparing VARIANT columns...")
|
| 143 |
+
feedback_dict = feedback.to_dict()
|
| 144 |
|
| 145 |
+
# Prepare transcript (ARRAY) - convert to JSON string
|
| 146 |
+
transcript_raw = feedback_dict.get('transcript', [])
|
| 147 |
+
if transcript_raw:
|
| 148 |
+
# Convert to JSON string (same approach as old retrieved_data)
|
| 149 |
+
transcript_for_db = json.dumps(transcript_raw)
|
| 150 |
+
logger.info(f" - Transcript: {len(transcript_raw)} messages, JSON length: {len(transcript_for_db)}")
|
| 151 |
+
else:
|
| 152 |
+
transcript_for_db = None
|
| 153 |
+
logger.info(" - Transcript: None")
|
| 154 |
|
| 155 |
+
# Prepare retrievals (ARRAY) - convert to JSON string
|
| 156 |
+
retrievals_raw = feedback_dict.get('retrievals', [])
|
| 157 |
+
if retrievals_raw:
|
| 158 |
+
# Convert to JSON string (same approach as old retrieved_data)
|
| 159 |
+
retrievals_for_db = json.dumps(retrievals_raw)
|
| 160 |
+
logger.info(f" - Retrievals: {len(retrievals_raw)} entries, JSON length: {len(retrievals_for_db)}")
|
| 161 |
else:
|
| 162 |
+
retrievals_for_db = None
|
| 163 |
+
logger.info(" - Retrievals: None")
|
|
|
|
| 164 |
|
| 165 |
+
# Prepare feedback_score_related_retrieval_docs (OBJECT) - convert to JSON string
|
| 166 |
+
feedback_score_related_raw = feedback_dict.get('feedback_score_related_retrieval_docs')
|
| 167 |
+
if feedback_score_related_raw:
|
| 168 |
+
# Convert to JSON string (same approach as old retrieved_data)
|
| 169 |
+
feedback_score_related_for_db = json.dumps(feedback_score_related_raw)
|
| 170 |
+
logger.info(f" - Feedback score related docs: present, JSON length: {len(feedback_score_related_for_db)}")
|
| 171 |
+
else:
|
| 172 |
+
feedback_score_related_for_db = None
|
| 173 |
+
logger.info(" - Feedback score related docs: None")
|
| 174 |
|
| 175 |
+
# Prepare retrieved_data (preserved old column) - convert to JSON string
|
| 176 |
+
retrieved_data_raw = feedback_dict.get('retrieved_data')
|
| 177 |
+
if retrieved_data_raw:
|
| 178 |
+
# Convert to JSON string (same approach as old retrieved_data)
|
| 179 |
+
retrieved_data_for_db = json.dumps(retrieved_data_raw)
|
| 180 |
+
logger.info(f" - Retrieved data (preserved): present, JSON length: {len(retrieved_data_for_db)}")
|
| 181 |
else:
|
|
|
|
| 182 |
retrieved_data_for_db = None
|
| 183 |
+
logger.info(" - Retrieved data (preserved): None")
|
| 184 |
|
| 185 |
+
# Build SQL with new column structure
|
| 186 |
+
# Columns are VARCHAR (storing JSON strings), same approach as old retrieved_data
|
| 187 |
+
sql = f"""INSERT INTO {table_name} (
|
| 188 |
feedback_id,
|
| 189 |
open_ended_feedback,
|
| 190 |
score,
|
|
|
|
| 194 |
message_count,
|
| 195 |
has_retrievals,
|
| 196 |
retrieval_count,
|
| 197 |
+
transcript,
|
| 198 |
+
retrievals,
|
| 199 |
+
feedback_score_related_retrieval_docs,
|
| 200 |
+
retrieved_data,
|
| 201 |
+
created_at
|
| 202 |
) VALUES (
|
| 203 |
%(feedback_id)s, %(open_ended_feedback)s, %(score)s, %(is_feedback_about_last_retrieval)s,
|
| 204 |
%(conversation_id)s, %(timestamp)s, %(message_count)s, %(has_retrievals)s,
|
| 205 |
+
%(retrieval_count)s, %(transcript)s, %(retrievals)s, %(feedback_score_related_retrieval_docs)s,
|
| 206 |
+
%(retrieved_data)s, %(created_at)s
|
| 207 |
)"""
|
| 208 |
|
| 209 |
logger.info("π SQL PREPARATION: Building INSERT statement...")
|
| 210 |
+
logger.info(f" - Target table: {table_name}")
|
| 211 |
logger.info(f" - Database: {self.database}")
|
| 212 |
logger.info(f" - Schema: {self.schema}")
|
| 213 |
|
| 214 |
# Prepare parameters
|
| 215 |
+
# Pass JSON strings for VARIANT columns (same approach as old retrieved_data)
|
| 216 |
params = {
|
| 217 |
'feedback_id': feedback.feedback_id,
|
| 218 |
'open_ended_feedback': feedback.open_ended_feedback,
|
|
|
|
| 223 |
'message_count': feedback.message_count,
|
| 224 |
'has_retrievals': feedback.has_retrievals,
|
| 225 |
'retrieval_count': feedback.retrieval_count,
|
| 226 |
+
'transcript': transcript_for_db, # JSON string
|
| 227 |
+
'retrievals': retrievals_for_db, # JSON string
|
| 228 |
+
'feedback_score_related_retrieval_docs': feedback_score_related_for_db, # JSON string
|
| 229 |
+
'retrieved_data': retrieved_data_for_db, # JSON string - preserved old column
|
| 230 |
+
'created_at': feedback.created_at
|
| 231 |
}
|
| 232 |
|
| 233 |
# Execute insert
|
|
|
|
| 290 |
)
|
| 291 |
|
| 292 |
|
| 293 |
+
def save_to_snowflake(feedback: UserFeedback, table_name: Optional[str] = None) -> bool:
|
| 294 |
"""Helper function to save feedback to Snowflake"""
|
| 295 |
logger.info("=" * 80)
|
| 296 |
logger.info("π΅ SNOWFLAKE SAVE: Starting save process")
|
| 297 |
logger.info(f"π Feedback ID: {feedback.feedback_id}")
|
| 298 |
|
| 299 |
+
# Get table name from parameter or env var
|
| 300 |
+
if table_name is None:
|
| 301 |
+
table_name = os.getenv("SNOWFLAKE_FEEDBACK_TABLE", "USER_FEEDBACK_V3")
|
| 302 |
+
|
| 303 |
connector = get_snowflake_connector_from_env()
|
| 304 |
|
| 305 |
if not connector:
|
|
|
|
| 314 |
logger.info("β
SNOWFLAKE SAVE: Connection established")
|
| 315 |
|
| 316 |
logger.info("π₯ SNOWFLAKE SAVE: Attempting to insert feedback...")
|
| 317 |
+
success = connector.insert_feedback(feedback, table_name=table_name)
|
| 318 |
|
| 319 |
logger.info("π SNOWFLAKE SAVE: Disconnecting...")
|
| 320 |
connector.disconnect()
|
|
|
|
| 331 |
logger.error(f" - Error: {e}")
|
| 332 |
logger.info("=" * 80)
|
| 333 |
return False
|
|
|