Spaces:

akryldigital
/

audit_assistant

Sleeping

App Files Files Community

Ara Yeroyan commited on 29 days ago

Commit

b4984e2

2 Parent(s): 264ca84 69de8d2

Merge branch 'main' of https://huggingface.co/spaces/akryldigital/audit_assistant

Browse files

Files changed (3) hide show

app.py +265 -241
src/reporting/feedback_schema.py +36 -71
src/reporting/snowflake_connector.py +67 -39

app.py CHANGED Viewed

@@ -10,18 +10,19 @@ import uuid
 import logging
 import traceback
 from pathlib import Path
-from typing import List, Dict, Any
 from collections import Counter
-import streamlit as st
-from langchain_core.messages import HumanMessage, AIMessage
 import pandas as pd
 import plotly.express as px
 from multi_agent_chatbot import get_multi_agent_chatbot
 from smart_chatbot import get_chatbot as get_smart_chatbot
-from src.reporting.feedback_schema import create_feedback_from_dict
 from src.reporting.snowflake_connector import save_to_snowflake
 from src.config.paths import (
     IS_DEPLOYED,
     PROJECT_DIR,
@@ -98,6 +99,8 @@ st.markdown("""
         color: #1f77b4;
         text-align: center;
         margin-bottom: 1rem;
     }
     .subtitle {
@@ -105,54 +108,8 @@ st.markdown("""
         color: #666;
         text-align: center;
         margin-bottom: 2rem;
-    }
-    .example-questions-header {
-        text-align: center;
-        margin-bottom: 1rem;
-    }
-    .example-questions-description {
-        text-align: center;
-        color: #666;
-        margin-bottom: 2rem;
-    }
-    /* Hide ALL default Streamlit text input help messages about Enter key */
-    /* This is the key one - hides "Press Enter to apply" message inside input field */
-    div[data-testid="InputInstructions"],
-    span[data-testid="InputInstructions"],
-    *[data-testid="InputInstructions"] {
-        display: none !important;
-        visibility: hidden !important;
-        opacity: 0 !important;
-        height: 0 !important;
-        width: 0 !important;
-        overflow: hidden !important;
-        position: absolute !important;
-        left: -9999px !important;
-    }
-    /* Also hide other potential locations */
-    div[data-testid="stTextInput"] + div > small,
-    div[data-testid="stTextInput"] ~ div > small,
-    div[data-testid="stTextInputContainer"] + div > small,
-    div[data-testid="stTextInputContainer"] ~ div > small,
-    div[data-baseweb="input"] + div > small,
-    div[data-baseweb="input"] ~ div > small {
-        display: none !important;
-        visibility: hidden !important;
-        opacity: 0 !important;
-        height: 0 !important;
-        overflow: hidden !important;
-    }
-    /* Custom help text for input */
-    .input-help-text {
-        font-size: 0.85rem;
-        color: #666;
-        margin-top: 0.25rem;
-        text-align: left;
     }
     .session-info {
@@ -304,6 +261,114 @@ def serialize_documents(sources):
     return serialized
 def extract_chunk_statistics(sources: List[Any]) -> Dict[str, Any]:
     """Extract statistics from retrieved chunks."""
     if not sources:
@@ -483,11 +548,10 @@ def display_chunk_statistics_table(stats: Dict[str, Any], title: str = "Retrieva
         return
     # Wrap in styled container
-<<<<<<< HEAD
     st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
-=======
     # st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
->>>>>>> 21eb407535b7e67a7dc3ea192c84831c0ae680d3
     st.subheader(f"📊 {title}")
@@ -604,7 +668,7 @@ def main():
         st.session_state.reset_conversation = False
         st.rerun()
-    # Header - centered
     st.markdown('<h1 class="main-header">🤖 Intelligent Audit Report Chatbot</h1>', unsafe_allow_html=True)
     st.markdown('<p class="subtitle">Ask questions about audit reports. Use the sidebar filters to narrow down your search!</p>', unsafe_allow_html=True)
@@ -631,21 +695,15 @@ def main():
             2. **Leave filters empty** to search across all data
-            3. **Type your question** in the chat and click "Send"
-            4. **Choose sample questions from the bottom of the page**
             #### 💡 Tips
             - Use specific questions for better results
             - Combine multiple filters for precise searches
-            - Check the "Retrieved Documents" tab to get various insights
-            #### 💬 Feedback Section
-            - Rate your experience (1-5 stars)
-            - Provide optional text feedback
-            - Located at the bottom of the page
             #### ⚠️ Important
@@ -670,13 +728,11 @@ def main():
                 help="Choose specific reports to search. When enabled, all other filters are ignored."
             )
         st.markdown('</div>', unsafe_allow_html=True)
-        st.markdown('---')
         # Determine if filename filter is active
         filename_mode = len(selected_filenames) > 0
         # Sources filter
-        # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
         st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
         selected_sources = st.multiselect(
             "Select sources:",
@@ -771,67 +827,6 @@ def main():
                 label_visibility="collapsed",
                 value=default_value if default_value else None
             )
-            # Use JavaScript to specifically target and hide "Press Enter to apply" message
-            st.markdown("""
-            <script>
-                (function() {
-                    // Hide InputInstructions element (contains "Press Enter to apply")
-                    function hideInputInstructions() {
-                        // Target the specific Streamlit element
-                        const instructions = document.querySelector('[data-testid="InputInstructions"]');
-                        if (instructions) {
-                            instructions.style.display = 'none';
-                            instructions.style.visibility = 'hidden';
-                            instructions.style.opacity = '0';
-                            instructions.style.height = '0';
-                            instructions.style.width = '0';
-                            instructions.style.overflow = 'hidden';
-                            instructions.style.position = 'absolute';
-                            instructions.style.left = '-9999px';
-                        }
-                        // Also search for any text containing "Press Enter" or "apply" inside input containers
-                        const allElements = document.querySelectorAll('*');
-                        allElements.forEach(el => {
-                            const text = el.textContent || el.innerText || '';
-                            if ((text.toLowerCase().includes('press enter') ||
-                                 text.toLowerCase().includes('enter to') ||
-                                 text.toLowerCase().includes('to apply')) &&
-                                (el.tagName === 'SPAN' || el.tagName === 'DIV' || el.tagName === 'SMALL')) {
-                                const style = window.getComputedStyle(el);
-                                const fontSize = parseFloat(style.fontSize);
-                                // Hide if it's small text (likely help text)
-                                if (fontSize < 14 || el.hasAttribute('data-testid')) {
-                                    el.style.display = 'none';
-                                    el.style.visibility = 'hidden';
-                                    el.style.height = '0';
-                                    el.style.overflow = 'hidden';
-                                }
-                            }
-                        });
-                    }
-                    // Run immediately and after delays to catch dynamic elements
-                    hideInputInstructions();
-                    setTimeout(hideInputInstructions, 50);
-                    setTimeout(hideInputInstructions, 100);
-                    setTimeout(hideInputInstructions, 500);
-                    // Observe for new elements added by Streamlit
-                    const observer = new MutationObserver(function(mutations) {
-                        hideInputInstructions();
-                    });
-                    observer.observe(document.body, { childList: true, subtree: true, attributes: true });
-                })();
-            </script>
-            """, unsafe_allow_html=True)
-            # # Show custom help text below input - this replaces the default "Press Enter" message
-            # st.markdown(
-            #     "<div class='input-help-text'>💡 Press the <strong>Send</strong> button to submit your question</div>",
-            #     unsafe_allow_html=True
-            # )
         with col2:
             send_button = st.button("Send", key="send_button", use_container_width=True)
@@ -934,8 +929,7 @@ def main():
                 # Count unique filenames
                 unique_filenames = set()
                 for doc in sources:
-                    metadata = getattr(doc, 'metadata', {})
-                    filename = metadata.get('filename', 'Unknown')
                     unique_filenames.add(filename)
                 st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
@@ -990,44 +984,6 @@ def main():
                 st.info("No documents were retrieved for the last query.")
         else:
             st.info("No documents have been retrieved yet. Start a conversation to see retrieved documents here.")
-        # Display retrieval history stats
-        st.markdown("---")
-        if st.session_state.rag_retrieval_history:
-            st.markdown("#### 📊 Retrieval History")
-            st.markdown(f"This conversation has **{len(st.session_state.rag_retrieval_history)}** retrieval entries.")
-            with st.expander(f"View {len(st.session_state.rag_retrieval_history)} retrieval entries", expanded=False):
-                for idx, entry in enumerate(st.session_state.rag_retrieval_history, 1):
-                    with st.expander(f"Entry {idx}: {entry.get('rag_query_expansion', 'N/A')[:50]}...", expanded=False):
-                        st.markdown(f"**Query:** {entry.get('rag_query_expansion', 'N/A')}")
-                        st.markdown(f"**Documents Retrieved:** {len(entry.get('docs_retrieved', []))}")
-                        # Show conversation up to this point
-                        conversation = entry.get('conversation_up_to', [])
-                        if conversation:
-                            st.markdown("**Conversation Context:**")
-                            for msg in conversation[-3:]:  # Show last 3 messages
-                                role = msg.get('type', 'unknown')
-                                content = msg.get('content', '')[:200] + "..." if len(msg.get('content', '')) > 200 else msg.get('content', '')
-                                if role == 'human':
-                                    st.markdown(f"- **You:** {content}")
-                                elif role == 'ai':
-                                    st.markdown(f"- **Bot:** {content}")
-                        # Show retrieved documents summary
-                        docs = entry.get('docs_retrieved', [])
-                        if docs:
-                            st.markdown("**Retrieved Documents:**")
-                            for doc_idx, doc in enumerate(docs[:5], 1):  # Show first 5
-                                doc_meta = doc.get('metadata', {})
-                                filename = doc_meta.get('filename', 'Unknown')[:50]
-                                st.markdown(f"{doc_idx}. {filename}")
-                            if len(docs) > 5:
-                                st.markdown(f"... and {len(docs) - 5} more documents")
-        else:
-            st.markdown("---")
-            st.info("📊 Retrieval history will appear here after you start asking questions.")
     # Feedback Dashboard Section
     st.markdown("---")
@@ -1089,17 +1045,39 @@ def main():
                     print("=" * 80)
                     st.write("🔍 **Debug: Feedback Data Being Submitted:**")
                     # Create feedback data dictionary
                     feedback_dict = {
                         "open_ended_feedback": open_ended_feedback,
                         "score": feedback_score,
                         "is_feedback_about_last_retrieval": is_feedback_about_last_retrieval,
-                        "retrieved_data": st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else [],
                         "conversation_id": st.session_state.conversation_id,
                         "timestamp": time.time(),
                         "message_count": len(st.session_state.messages),
                         "has_retrievals": has_retrievals,
-                        "retrieval_count": len(st.session_state.rag_retrieval_history)
                     }
                     print(f"📝 FEEDBACK SUBMISSION: Score={feedback_score}, Retrievals={len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0}")
@@ -1141,19 +1119,18 @@ def main():
                         # Ensure parent directory exists before writing
                         feedback_file.parent.mkdir(parents=True, mode=0o777, exist_ok=True)
-                        # Save to local file
                         print(f"💾 FEEDBACK SAVE: Saving to local file: {feedback_file}")
                         with open(feedback_file, 'w') as f:
                             json.dump(feedback_data, f, indent=2, default=str)
                         print(f"✅ FEEDBACK SAVE: Local file saved successfully")
-                        st.success("✅ Thank you for your feedback! It has been saved locally.")
-                        st.balloons()
                         # Save to Snowflake if enabled and credentials available
                         logger.info("🔄 FEEDBACK SAVE: Starting Snowflake save process...")
                         logger.info(f"📊 FEEDBACK SAVE: feedback_obj={'exists' if feedback_obj else 'None'}")
                         try:
                             snowflake_enabled = os.getenv("SNOWFLAKE_ENABLED", "false").lower() == "true"
                             logger.info(f"🔍 SNOWFLAKE CHECK: enabled={snowflake_enabled}")
@@ -1164,36 +1141,39 @@ def main():
                                         logger.info("📤 SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
                                         print("📤 SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
-                                        if save_to_snowflake(feedback_obj):
                                             logger.info("✅ SNOWFLAKE UI: Successfully saved to Snowflake")
                                             print("✅ SNOWFLAKE UI: Successfully saved to Snowflake")
-                                            st.success("✅ Feedback also saved to Snowflake!")
                                         else:
                                             logger.warning("⚠️ SNOWFLAKE UI: Save failed")
                                             print("⚠️ SNOWFLAKE UI: Save failed")
-                                            st.warning("⚠️ Snowflake save failed, but local save succeeded")
                                     except Exception as e:
                                         logger.error(f"❌ SNOWFLAKE UI ERROR: {e}")
                                         print(f"❌ SNOWFLAKE UI ERROR: {e}")
                                         traceback.print_exc()
-                                        st.warning(f"⚠️ Could not save to Snowflake: {e}")
                                 else:
                                     logger.warning("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)")
                                     print("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)")
-                                    st.warning("⚠️ Skipping Snowflake save (feedback object not created)")
                             else:
                                 logger.info("💡 SNOWFLAKE UI: Integration disabled")
                                 print("💡 SNOWFLAKE UI: Integration disabled")
-                                st.info("💡 Snowflake integration disabled (set SNOWFLAKE_ENABLED=true to enable)")
-                        except NameError as e:
-                            traceback.print_exc()
-                            logger.error(f"❌ NameError in Snowflake save: {e}")
-                            print(f"❌ NameError in Snowflake save: {e}")
-                            st.warning(f"⚠️ Snowflake save error: {e}")
                         except Exception as e:
                             logger.error(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}")
                             print(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}")
-                            st.warning(f"⚠️ Snowflake save error: {e}")
                         # Mark feedback as submitted to prevent resubmission
                         st.session_state.feedback_submitted = True
@@ -1229,16 +1209,30 @@ def main():
                     # Scroll to conversation - this is handled by the auto-scroll at bottom
                     pass
     # Example Questions Section
     st.markdown("---")
-    st.markdown(
-        "<h3 class='example-questions-header'>💡 Example Questions</h3>",
-        unsafe_allow_html=True
-    )
-    st.markdown(
-        "<p class='example-questions-description'>Click on any question below to use it, or modify the editable examples:</p>",
-        unsafe_allow_html=True
-    )
     # Initialize example question state
     if 'custom_question_1' not in st.session_state:
@@ -1261,53 +1255,56 @@ def main():
     st.markdown("---")
-    # Questions 2 & 3: Editable examples (collapsible, side by side)
-    with st.expander("#### ✏️ Customizable Questions (Edit and use)", expanded=False):
-        # Place questions side by side
-        col1, col2 = st.columns(2)
-        # Question 2
-        with col1:
-            st.markdown("**Question 2:**")
-            custom_q1 = st.text_area(
-                "Edit question 2:",
-                value=st.session_state.custom_question_1,
-                height=100,
-                key="edit_question_2",
-                help="Modify this question to fit your needs, then click 'Use This Question'",
-                label_visibility="collapsed"
-            )
-            if st.button("📋 Use Question 2", key="use_custom_1", use_container_width=True):
-                if custom_q1.strip():
-                    st.session_state.pending_question = custom_q1.strip()
-                    st.session_state.custom_question_1 = custom_q1.strip()
-                    st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
-                    st.rerun()
-                else:
-                    st.warning("Please enter a question first!")
-            st.caption("💡 Tip: Add specific details like dates, names, or amounts to get more precise answers")
-            st.info("💡 **Filter to apply:** Select District(s) and Year(s) from sidebar panel")
-        # Question 3
-        with col2:
-            st.markdown("**Question 3:**")
-            custom_q2 = st.text_area(
-                "Edit question 3:",
-                value=st.session_state.custom_question_2,
-                height=100,
-                key="edit_question_3",
-                help="Modify this question to fit your needs, then click 'Use This Question'",
-                label_visibility="collapsed"
-            )
-            if st.button("📋 Use Question 3", key="use_custom_2", use_container_width=True):
-                if custom_q2.strip():
-                    st.session_state.pending_question = custom_q2.strip()
-                    st.session_state.custom_question_2 = custom_q2.strip()
-                    st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
-                    st.rerun()
-                else:
-                    st.warning("Please enter a question first!")
-            st.caption("💡 Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
     # Store selected question for next render (handled in input section above)
@@ -1407,5 +1404,32 @@ def main():
     </script>
     """, unsafe_allow_html=True)
 if __name__ == "__main__":
     main()

 import logging
 import traceback
 from pathlib import Path
 from collections import Counter
+from typing import List, Dict, Any, Optional
 import pandas as pd
+import streamlit as st
 import plotly.express as px
+from langchain_core.messages import HumanMessage, AIMessage
 from multi_agent_chatbot import get_multi_agent_chatbot
 from smart_chatbot import get_chatbot as get_smart_chatbot
 from src.reporting.snowflake_connector import save_to_snowflake
+from src.reporting.feedback_schema import create_feedback_from_dict
 from src.config.paths import (
     IS_DEPLOYED,
     PROJECT_DIR,
         color: #1f77b4;
         text-align: center;
         margin-bottom: 1rem;
+        width: 100%;
+        display: block;
     }
     .subtitle {
         color: #666;
         text-align: center;
         margin-bottom: 2rem;
+        width: 100%;
+        display: block;
     }
     .session-info {
     return serialized
+def extract_transcript(messages: List[Any]) -> List[Dict[str, str]]:
+    """Extract transcript from messages - only user and bot messages, no extra metadata"""
+    transcript = []
+    for msg in messages:
+        if isinstance(msg, HumanMessage):
+            transcript.append({
+                "role": "user",
+                "content": str(msg.content) if hasattr(msg, 'content') else str(msg)
+            })
+        elif isinstance(msg, AIMessage):
+            transcript.append({
+                "role": "assistant",
+                "content": str(msg.content) if hasattr(msg, 'content') else str(msg)
+            })
+    return transcript
+def build_retrievals_structure(rag_retrieval_history: List[Dict[str, Any]], messages: List[Any]) -> List[Dict[str, Any]]:
+    """Build retrievals structure from retrieval history"""
+    retrievals = []
+    for entry in rag_retrieval_history:
+        # Get the user message that triggered this retrieval
+        # The entry has conversation_up_to which includes messages up to that point
+        conversation_up_to = entry.get("conversation_up_to", [])
+        # Find the last user message in conversation_up_to (this is the trigger)
+        user_message_trigger = ""
+        for msg_dict in reversed(conversation_up_to):
+            if msg_dict.get("type") == "HumanMessage":
+                user_message_trigger = msg_dict.get("content", "")
+                break
+        # Fallback: if not found in conversation_up_to, get from actual messages
+        # This handles edge cases where conversation_up_to might be incomplete
+        if not user_message_trigger:
+            # Find which retrieval this is (0-indexed)
+            retrieval_idx = rag_retrieval_history.index(entry)
+            # The user message that triggered this retrieval is at position (retrieval_idx * 2)
+            # because each retrieval is preceded by: user message, bot response, user message, ...
+            # But we need to account for the fact that the first retrieval happens after the first user message
+            user_msgs = [msg for msg in messages if isinstance(msg, HumanMessage)]
+            if retrieval_idx < len(user_msgs):
+                user_message_trigger = str(user_msgs[retrieval_idx].content)
+            elif user_msgs:
+                # Fallback to last user message
+                user_message_trigger = str(user_msgs[-1].content)
+        # Get retrieved documents and truncate content to 100 chars
+        docs_retrieved = entry.get("docs_retrieved", [])
+        retrieved_docs = []
+        for doc in docs_retrieved:
+            doc_copy = doc.copy()
+            # Truncate content to 100 characters (keep all other fields)
+            if "content" in doc_copy:
+                doc_copy["content"] = doc_copy["content"][:100]
+            retrieved_docs.append(doc_copy)
+        retrievals.append({
+            "retrieved_docs": retrieved_docs,
+            "user_message_trigger": user_message_trigger
+        })
+    return retrievals
+def build_feedback_score_related_retrieval_docs(
+    is_feedback_about_last_retrieval: bool,
+    messages: List[Any],
+    rag_retrieval_history: List[Dict[str, Any]]
+) -> Optional[Dict[str, Any]]:
+    """Build feedback_score_related_retrieval_docs structure"""
+    if not rag_retrieval_history:
+        return None
+    # Get the relevant retrieval entry
+    if is_feedback_about_last_retrieval:
+        relevant_entry = rag_retrieval_history[-1]
+    else:
+        # If feedback is about all retrievals, use the last one as default
+        relevant_entry = rag_retrieval_history[-1]
+    # Get conversation up to that point
+    conversation_up_to = relevant_entry.get("conversation_up_to", [])
+    # Convert to transcript format (role/content)
+    conversation_up_to_point = []
+    for msg_dict in conversation_up_to:
+        if msg_dict.get("type") == "HumanMessage":
+            conversation_up_to_point.append({
+                "role": "user",
+                "content": msg_dict.get("content", "")
+            })
+        elif msg_dict.get("type") == "AIMessage":
+            conversation_up_to_point.append({
+                "role": "assistant",
+                "content": msg_dict.get("content", "")
+            })
+    # Get retrieved docs with full content (not truncated)
+    retrieved_docs = relevant_entry.get("docs_retrieved", [])
+    return {
+        "conversation_up_to_point": conversation_up_to_point,
+        "retrieved_docs": retrieved_docs
+    }
 def extract_chunk_statistics(sources: List[Any]) -> Dict[str, Any]:
     """Extract statistics from retrieved chunks."""
     if not sources:
         return
     # Wrap in styled container
     st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
     # st.markdown('<div class="retrieval-distribution-container">', unsafe_allow_html=True)
     st.subheader(f"📊 {title}")
         st.session_state.reset_conversation = False
         st.rerun()
+    # Header - fully center aligned
     st.markdown('<h1 class="main-header">🤖 Intelligent Audit Report Chatbot</h1>', unsafe_allow_html=True)
     st.markdown('<p class="subtitle">Ask questions about audit reports. Use the sidebar filters to narrow down your search!</p>', unsafe_allow_html=True)
             2. **Leave filters empty** to search across all data
+            3. **Type your question** in the chat input at the bottom
+            4. **Click "Send"** to submit your question
             #### 💡 Tips
             - Use specific questions for better results
             - Combine multiple filters for precise searches
+            - Check the "Retrieved Documents" tab to see source material
             #### ⚠️ Important
                 help="Choose specific reports to search. When enabled, all other filters are ignored."
             )
         st.markdown('</div>', unsafe_allow_html=True)
         # Determine if filename filter is active
         filename_mode = len(selected_filenames) > 0
         # Sources filter
+        st.markdown('<div class="filter-section">', unsafe_allow_html=True)
         st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
         selected_sources = st.multiselect(
             "Select sources:",
                 label_visibility="collapsed",
                 value=default_value if default_value else None
             )
         with col2:
             send_button = st.button("Send", key="send_button", use_container_width=True)
                 # Count unique filenames
                 unique_filenames = set()
                 for doc in sources:
+                    filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
                     unique_filenames.add(filename)
                 st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
                 st.info("No documents were retrieved for the last query.")
         else:
             st.info("No documents have been retrieved yet. Start a conversation to see retrieved documents here.")
     # Feedback Dashboard Section
     st.markdown("---")
                     print("=" * 80)
                     st.write("🔍 **Debug: Feedback Data Being Submitted:**")
+                    # Extract transcript from messages
+                    transcript = extract_transcript(st.session_state.messages)
+                    # Build retrievals structure
+                    retrievals = build_retrievals_structure(
+                        st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else [],
+                        st.session_state.messages
+                    )
+                    # Build feedback_score_related_retrieval_docs
+                    feedback_score_related_retrieval_docs = build_feedback_score_related_retrieval_docs(
+                        is_feedback_about_last_retrieval,
+                        st.session_state.messages,
+                        st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else []
+                    )
+                    # Preserve old retrieved_data format for backward compatibility
+                    retrieved_data_old_format = st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else []
                     # Create feedback data dictionary
                     feedback_dict = {
                         "open_ended_feedback": open_ended_feedback,
                         "score": feedback_score,
                         "is_feedback_about_last_retrieval": is_feedback_about_last_retrieval,
                         "conversation_id": st.session_state.conversation_id,
                         "timestamp": time.time(),
                         "message_count": len(st.session_state.messages),
                         "has_retrievals": has_retrievals,
+                        "retrieval_count": len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0,
+                        "transcript": transcript,
+                        "retrievals": retrievals,
+                        "feedback_score_related_retrieval_docs": feedback_score_related_retrieval_docs,
+                        "retrieved_data": retrieved_data_old_format  # Preserved old column
                     }
                     print(f"📝 FEEDBACK SUBMISSION: Score={feedback_score}, Retrievals={len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0}")
                         # Ensure parent directory exists before writing
                         feedback_file.parent.mkdir(parents=True, mode=0o777, exist_ok=True)
+                        # Save to local file first
                         print(f"💾 FEEDBACK SAVE: Saving to local file: {feedback_file}")
                         with open(feedback_file, 'w') as f:
                             json.dump(feedback_data, f, indent=2, default=str)
                         print(f"✅ FEEDBACK SAVE: Local file saved successfully")
                         # Save to Snowflake if enabled and credentials available
                         logger.info("🔄 FEEDBACK SAVE: Starting Snowflake save process...")
                         logger.info(f"📊 FEEDBACK SAVE: feedback_obj={'exists' if feedback_obj else 'None'}")
+                        snowflake_success = False
                         try:
                             snowflake_enabled = os.getenv("SNOWFLAKE_ENABLED", "false").lower() == "true"
                             logger.info(f"🔍 SNOWFLAKE CHECK: enabled={snowflake_enabled}")
                                         logger.info("📤 SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
                                         print("📤 SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
+                                        snowflake_success = save_to_snowflake(feedback_obj)
+                                        if snowflake_success:
                                             logger.info("✅ SNOWFLAKE UI: Successfully saved to Snowflake")
                                             print("✅ SNOWFLAKE UI: Successfully saved to Snowflake")
                                         else:
                                             logger.warning("⚠️ SNOWFLAKE UI: Save failed")
                                             print("⚠️ SNOWFLAKE UI: Save failed")
                                     except Exception as e:
                                         logger.error(f"❌ SNOWFLAKE UI ERROR: {e}")
                                         print(f"❌ SNOWFLAKE UI ERROR: {e}")
                                         traceback.print_exc()
+                                        snowflake_success = False
                                 else:
                                     logger.warning("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)")
                                     print("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)")
+                                    snowflake_success = False
                             else:
                                 logger.info("💡 SNOWFLAKE UI: Integration disabled")
                                 print("💡 SNOWFLAKE UI: Integration disabled")
+                                # If Snowflake is disabled, consider it successful (local save only)
+                                snowflake_success = True
                         except Exception as e:
                             logger.error(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}")
                             print(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}")
+                            snowflake_success = False
+                        # Only show success if Snowflake save succeeded (or if Snowflake is disabled)
+                        if snowflake_success:
+                            st.success("✅ Thank you for your feedback! It has been saved successfully.")
+                            st.balloons()
+                        else:
+                            st.warning("⚠️ Feedback saved locally, but Snowflake save failed. Please check logs.")
                         # Mark feedback as submitted to prevent resubmission
                         st.session_state.feedback_submitted = True
                     # Scroll to conversation - this is handled by the auto-scroll at bottom
                     pass
+    # Display retrieval history stats
+    if st.session_state.rag_retrieval_history:
+        st.markdown("---")
+        st.markdown("#### 📊 Retrieval History")
+        with st.expander(f"View {len(st.session_state.rag_retrieval_history)} retrieval entries", expanded=False):
+            for idx, entry in enumerate(st.session_state.rag_retrieval_history, 1):
+                st.markdown(f"**Retrieval #{idx}**")
+                # Display the actual RAG query
+                rag_query_expansion = entry.get("rag_query_expansion", "No query available")
+                st.code(rag_query_expansion, language="text")
+                # Display summary stats
+                st.json({
+                    "conversation_length": len(entry.get("conversation_up_to", [])),
+                    "documents_retrieved": len(entry.get("docs_retrieved", []))
+                })
+                st.markdown("---")
     # Example Questions Section
     st.markdown("---")
+    st.markdown("### 💡 Example Questions")
+    st.markdown("Click on any question below to use it, or modify the editable examples:")
     # Initialize example question state
     if 'custom_question_1' not in st.session_state:
     st.markdown("---")
+    # Questions 2 & 3: Editable examples
+    st.markdown("#### ✏️ Customizable Questions (Edit and use)")
+    # Question 2
+    # st.markdown("**Question 2:**")
+    custom_q1 = st.text_area(
+        "Edit question 2:",
+        value=st.session_state.custom_question_1,
+        height=80,
+        key="edit_question_2",
+        help="Modify this question to fit your needs, then click 'Use This Question'"
+    )
+    col1, col2 = st.columns([1, 4])
+    with col1:
+        if st.button("📋 Use Question 2", key="use_custom_1", use_container_width=True):
+            if custom_q1.strip():
+                st.session_state.pending_question = custom_q1.strip()
+                st.session_state.custom_question_1 = custom_q1.strip()
+                st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
+                st.rerun()
+            else:
+                st.warning("Please enter a question first!")
+    with col2:
+        st.caption("💡 Tip: Add specific details like dates, names, or amounts to get more precise answers")
+    st.info("💡 **Filter to apply:** Select District(s) and Year(s) sidebar panel before asking this question.")
+    st.markdown("---")
+    # Question 3
+    # st.markdown("**Question 3:**")
+    custom_q2 = st.text_area(
+        "Edit question 3:",
+        value=st.session_state.custom_question_2,
+        height=80,
+        key="edit_question_3",
+        help="Modify this question to fit your needs, then click 'Use This Question'"
+    )
+    col1, col2 = st.columns([1, 4])
+    with col1:
+        if st.button("📋 Use Question 3", key="use_custom_2", use_container_width=True):
+            if custom_q2.strip():
+                st.session_state.pending_question = custom_q2.strip()
+                st.session_state.custom_question_2 = custom_q2.strip()
+                st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
+                st.rerun()
+            else:
+                st.warning("Please enter a question first!")
+    with col2:
+        st.caption("💡 Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
     # Store selected question for next render (handled in input section above)
     </script>
     """, unsafe_allow_html=True)
 if __name__ == "__main__":
+    # Check if running in Streamlit context
+    try:
+        from streamlit.runtime.scriptrunner import get_script_run_ctx
+        if get_script_run_ctx() is None:
+            # Not in Streamlit runtime - show helpful message
+            print("=" * 80)
+            print("⚠️  WARNING: This is a Streamlit app!")
+            print("=" * 80)
+            print("\nPlease run this app using:")
+            print("  streamlit run app.py")
+            print("\nNot: python app.py")
+            print("\nThe app will not function correctly when run with 'python app.py'")
+            print("=" * 80)
+            import sys
+            sys.exit(1)
+    except ImportError:
+        # Streamlit not installed or not in Streamlit context
+        print("=" * 80)
+        print("⚠️  WARNING: This is a Streamlit app!")
+        print("=" * 80)
+        print("\nPlease run this app using:")
+        print("  streamlit run app.py")
+        print("\nNot: python app.py")
+        print("=" * 80)
+        import sys
+        sys.exit(1)
     main()

src/reporting/feedback_schema.py CHANGED Viewed

@@ -4,10 +4,12 @@ Feedback Schema for RAG Chatbot
 This module defines dataclasses for feedback data structures
 and provides Snowflake schema generation.
 """
 from dataclasses import dataclass, asdict, field
 from typing import List, Optional, Dict, Any, Union
-from datetime import datetime
 @dataclass
@@ -39,34 +41,20 @@ class UserFeedback:
     open_ended_feedback: Optional[str]
     score: int
     is_feedback_about_last_retrieval: bool
-    retrieved_data: List[RetrievalEntry]
     conversation_id: str
     timestamp: float
     message_count: int
     has_retrievals: bool
     retrieval_count: int
-    user_query: Optional[str] = None
-    bot_response: Optional[str] = None
     created_at: str = field(default_factory=lambda: datetime.now().isoformat())
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary with nested data structures"""
         result = asdict(self)
-        # Handle nested objects
-        if self.retrieved_data:
-            result['retrieved_data'] = [self._serialize_retrieval_entry(entry) for entry in self.retrieved_data]
-        return result
-    def _serialize_retrieval_entry(self, entry: RetrievalEntry) -> Dict[str, Any]:
-        """Serialize retrieval entry to dict"""
-        # If raw data exists, use it (it's already properly formatted)
-        if hasattr(entry, '_raw_data') and entry._raw_data:
-            return entry._raw_data
-        # Otherwise, serialize the dataclass
-        result = asdict(entry)
-        if entry.documents_retrieved:
-            result['documents_retrieved'] = [asdict(doc) for doc in entry.documents_retrieved]
         return result
     def to_snowflake_schema(self) -> Dict[str, Any]:
@@ -81,28 +69,28 @@ class UserFeedback:
             "message_count": "INTEGER",
             "has_retrievals": "BOOLEAN",
             "retrieval_count": "INTEGER",
-            "user_query": "VARCHAR(16777216)",
-            "bot_response": "VARCHAR(16777216)",
             "created_at": "TIMESTAMP_NTZ",
-            "retrieved_data": "VARIANT",  # Array of retrieval entries
-            # retrieved_data structure:
-            # [
             #   {
-            #     "rag_query": "...",
-            #     "conversation_length": 5,
-            #     "timestamp": 1234567890,
-            #     "docs_retrieved": [
-            #       {"filename": "...", "page": 14, "score": 0.95, ...},
-            #       ...
-            #     ]
             #   },
             #   ...
             # ]
         }
         return schema
     @classmethod
-    def get_snowflake_create_table_sql(cls, table_name: str = "user_feedback") -> str:
         """Generate CREATE TABLE SQL for Snowflake"""
         schema = cls.to_snowflake_schema(None)
@@ -117,16 +105,13 @@ class UserFeedback:
         sql = f"""CREATE TABLE IF NOT EXISTS {table_name} (
 {columns_str},
   PRIMARY KEY (feedback_id)
-);
--- Create index on timestamp for querying by time
-CREATE INDEX IF NOT EXISTS idx_feedback_timestamp ON {table_name} (timestamp);
--- Create index on conversation_id for querying by conversation
-CREATE INDEX IF NOT EXISTS idx_feedback_conversation ON {table_name} (conversation_id);
--- Create index on score for feedback analysis
-CREATE INDEX IF NOT EXISTS idx_feedback_score ON {table_name} (score);
 """
         return sql
@@ -150,47 +135,27 @@ DOCUMENT_SCHEMA = {
 }
-def generate_snowflake_schema_sql() -> str:
     """Generate complete Snowflake schema SQL for feedback system"""
-    return UserFeedback.get_snowflake_create_table_sql("user_feedback")
 def create_feedback_from_dict(data: Dict[str, Any]) -> UserFeedback:
     """Create UserFeedback instance from dictionary"""
-    # Parse retrieved_data if present
-    retrieved_data = []
-    if "retrieved_data" in data and data["retrieved_data"]:
-        for entry_dict in data.get("retrieved_data", []):
-            # Map the actual structure from rag_retrieval_history
-            # Entry has: conversation_up_to, rag_query_expansion, docs_retrieved
-            try:
-                # Try to map to expected structure
-                entry = RetrievalEntry(
-                    rag_query=entry_dict.get("rag_query_expansion", ""),
-                    documents_retrieved=[],  # Empty for now, will store as raw data
-                    conversation_length=len(entry_dict.get("conversation_up_to", [])),
-                    filters_applied=None,
-                    timestamp=entry_dict.get("timestamp", None)
-                )
-                # Store raw data in the entry
-                entry._raw_data = entry_dict  # Store original for preservation
-                retrieved_data.append(entry)
-            except Exception as e:
-                # If mapping fails, store as-is without strict typing
-                pass
     return UserFeedback(
         feedback_id=data.get("feedback_id", f"feedback_{data.get('timestamp', 'unknown')}"),
         open_ended_feedback=data.get("open_ended_feedback"),
         score=data["score"],
         is_feedback_about_last_retrieval=data["is_feedback_about_last_retrieval"],
-        retrieved_data=retrieved_data,
         conversation_id=data["conversation_id"],
         timestamp=data["timestamp"],
         message_count=data["message_count"],
         has_retrievals=data["has_retrievals"],
         retrieval_count=data["retrieval_count"],
-        user_query=data.get("user_query"),
-        bot_response=data.get("bot_response")
     )

 This module defines dataclasses for feedback data structures
 and provides Snowflake schema generation.
 """
+import os
+from datetime import datetime
 from dataclasses import dataclass, asdict, field
 from typing import List, Optional, Dict, Any, Union
 @dataclass
     open_ended_feedback: Optional[str]
     score: int
     is_feedback_about_last_retrieval: bool
     conversation_id: str
     timestamp: float
     message_count: int
     has_retrievals: bool
     retrieval_count: int
+    transcript: List[Dict[str, str]]  # List of {"role": "user"/"assistant", "content": "..."}
+    retrievals: List[Dict[str, Any]]  # List of retrieval objects with retrieved_docs and user_message_trigger
+    feedback_score_related_retrieval_docs: Optional[Dict[str, Any]] = None  # Conversation subset + retrieved docs
+    retrieved_data: Optional[List[Dict[str, Any]]] = None  # Preserved old column for backward compatibility
     created_at: str = field(default_factory=lambda: datetime.now().isoformat())
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary with nested data structures"""
         result = asdict(self)
         return result
     def to_snowflake_schema(self) -> Dict[str, Any]:
             "message_count": "INTEGER",
             "has_retrievals": "BOOLEAN",
             "retrieval_count": "INTEGER",
+            "transcript": "VARCHAR(16777216)",  # JSON string of ARRAY of {"role": "user"/"assistant", "content": "..."}
+            "retrievals": "VARCHAR(16777216)",  # JSON string of ARRAY of retrieval objects
+            "feedback_score_related_retrieval_docs": "VARCHAR(16777216)",  # JSON string of OBJECT with conversation subset + retrieved docs
+            "retrieved_data": "VARCHAR(16777216)",  # JSON string - preserved old column for backward compatibility
             "created_at": "TIMESTAMP_NTZ",
+            # transcript structure: [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}, ...]
+            # retrievals structure: [
             #   {
+            #     "retrieved_docs": [{"content": "...", "metadata": {...}, ...}],  # content truncated to 100 chars
+            #     "user_message_trigger": "final user message that triggered this retrieval"
             #   },
             #   ...
             # ]
+            # feedback_score_related_retrieval_docs structure: {
+            #   "conversation_up_to_point": [{"role": "user", "content": "..."}, ...],  # subset of transcript
+            #   "retrieved_docs": [{"content": "...", "metadata": {...}, ...}]  # full chunks with all info
+            # }
         }
         return schema
     @classmethod
+    def get_snowflake_create_table_sql(cls, table_name: str = "USER_FEEDBACK_V3") -> str:
         """Generate CREATE TABLE SQL for Snowflake"""
         schema = cls.to_snowflake_schema(None)
         sql = f"""CREATE TABLE IF NOT EXISTS {table_name} (
 {columns_str},
   PRIMARY KEY (feedback_id)
+)
+CLUSTER BY (timestamp, conversation_id, score);
+-- Note: Snowflake doesn't support traditional indexes on regular tables.
+-- Instead, we use CLUSTER BY to optimize queries on these columns.
+-- Snowflake automatically maintains clustering for efficient querying.
+-- Note: transcript, retrievals, and feedback_score_related_retrieval_docs are stored as VARCHAR (JSON strings),
+-- same approach as the old retrieved_data column. This allows easy storage and retrieval without VARIANT type complexity.
 """
         return sql
 }
+def generate_snowflake_schema_sql(table_name: Optional[str] = None) -> str:
     """Generate complete Snowflake schema SQL for feedback system"""
+    if table_name is None:
+        table_name = os.getenv("SNOWFLAKE_FEEDBACK_TABLE", "USER_FEEDBACK_V3")
+    return UserFeedback.get_snowflake_create_table_sql(table_name)
 def create_feedback_from_dict(data: Dict[str, Any]) -> UserFeedback:
     """Create UserFeedback instance from dictionary"""
     return UserFeedback(
         feedback_id=data.get("feedback_id", f"feedback_{data.get('timestamp', 'unknown')}"),
         open_ended_feedback=data.get("open_ended_feedback"),
         score=data["score"],
         is_feedback_about_last_retrieval=data["is_feedback_about_last_retrieval"],
         conversation_id=data["conversation_id"],
         timestamp=data["timestamp"],
         message_count=data["message_count"],
         has_retrievals=data["has_retrievals"],
         retrieval_count=data["retrieval_count"],
+        transcript=data.get("transcript", []),
+        retrievals=data.get("retrievals", []),
+        feedback_score_related_retrieval_docs=data.get("feedback_score_related_retrieval_docs"),
+        retrieved_data=data.get("retrieved_data")
     )

src/reporting/snowflake_connector.py CHANGED Viewed

@@ -8,8 +8,11 @@ import os
 import json
 import logging
 from typing import Dict, Any, Optional
 from src.reporting.feedback_schema import UserFeedback
 # Try to import snowflake connector
 try:
     import snowflake.connector
@@ -79,12 +82,16 @@ class SnowflakeFeedbackConnector:
             self._connection.close()
             print("✅ Disconnected from Snowflake")
-    def insert_feedback(self, feedback: UserFeedback) -> bool:
         """Insert a single feedback record into Snowflake"""
         logger.info("=" * 80)
         logger.info("🔄 SNOWFLAKE INSERT: Starting feedback insertion process")
         logger.info(f"📝 Feedback ID: {feedback.feedback_id}")
         if not self._connection:
             logger.error("❌ Not connected to Snowflake. Call connect() first.")
             raise RuntimeError("Not connected to Snowflake. Call connect() first.")
@@ -131,38 +138,53 @@ class SnowflakeFeedbackConnector:
                 logger.error(f"❌ Could not set context: {e}")
                 raise
-            # Prepare data
-            logger.info("🔧 DATA PREPARATION: Preparing retrieved_data...")
-            retrieved_data_raw = feedback.to_dict()['retrieved_data']
-            logger.info(f"   - Retrieved data type (raw): {type(retrieved_data_raw).__name__}")
-            logger.info(f"   - Retrieved data: {repr(retrieved_data_raw)[:200]}")
-            # If retrieved_data is already a string (from UI), parse it
-            if isinstance(retrieved_data_raw, str):
-                logger.info("   - Parsing string to Python object")
-                retrieved_data = json.loads(retrieved_data_raw)
-            elif retrieved_data_raw is None:
-                retrieved_data = None
             else:
-                # It's already a Python object (list/dict)
-                logger.info("   - Data is already a Python object")
-                retrieved_data = retrieved_data_raw
-            logger.info(f"   - Retrieved data size: {len(str(retrieved_data)) if retrieved_data else 0} characters")
-            logger.info(f"   - Retrieved data type: {type(retrieved_data).__name__}")
-            # Convert to JSON string for TEXT column
-            if retrieved_data:
-                retrieved_data_for_db = json.dumps(retrieved_data)
-                logger.info(f"   - Converting to JSON string for TEXT column")
-                logger.info(f"   - JSON string length: {len(retrieved_data_for_db)}")
             else:
-                logger.info(f"   - Retrieved data is None, using NULL")
                 retrieved_data_for_db = None
-            # Build SQL with retrieved_data as a TEXT column parameter
-            sql = f"""INSERT INTO user_feedback (
                 feedback_id,
                 open_ended_feedback,
                 score,
@@ -172,23 +194,25 @@ class SnowflakeFeedbackConnector:
                 message_count,
                 has_retrievals,
                 retrieval_count,
-                user_query,
-                bot_response,
-                created_at,
-                retrieved_data
             ) VALUES (
                 %(feedback_id)s, %(open_ended_feedback)s, %(score)s, %(is_feedback_about_last_retrieval)s,
                 %(conversation_id)s, %(timestamp)s, %(message_count)s, %(has_retrievals)s,
-                %(retrieval_count)s, %(user_query)s, %(bot_response)s, %(created_at)s,
-                %(retrieved_data)s
             )"""
             logger.info("📝 SQL PREPARATION: Building INSERT statement...")
-            logger.info(f"   - Target table: user_feedback")
             logger.info(f"   - Database: {self.database}")
             logger.info(f"   - Schema: {self.schema}")
             # Prepare parameters
             params = {
                 'feedback_id': feedback.feedback_id,
                 'open_ended_feedback': feedback.open_ended_feedback,
@@ -199,10 +223,11 @@ class SnowflakeFeedbackConnector:
                 'message_count': feedback.message_count,
                 'has_retrievals': feedback.has_retrievals,
                 'retrieval_count': feedback.retrieval_count,
-                'user_query': feedback.user_query,
-                'bot_response': feedback.bot_response,
-                'created_at': feedback.created_at,
-                'retrieved_data': retrieved_data_for_db
             }
             # Execute insert
@@ -265,12 +290,16 @@ def get_snowflake_connector_from_env() -> Optional[SnowflakeFeedbackConnector]:
     )
-def save_to_snowflake(feedback: UserFeedback) -> bool:
     """Helper function to save feedback to Snowflake"""
     logger.info("=" * 80)
     logger.info("🔵 SNOWFLAKE SAVE: Starting save process")
     logger.info(f"📝 Feedback ID: {feedback.feedback_id}")
     connector = get_snowflake_connector_from_env()
     if not connector:
@@ -285,7 +314,7 @@ def save_to_snowflake(feedback: UserFeedback) -> bool:
         logger.info("✅ SNOWFLAKE SAVE: Connection established")
         logger.info("📥 SNOWFLAKE SAVE: Attempting to insert feedback...")
-        success = connector.insert_feedback(feedback)
         logger.info("🔌 SNOWFLAKE SAVE: Disconnecting...")
         connector.disconnect()
@@ -302,4 +331,3 @@ def save_to_snowflake(feedback: UserFeedback) -> bool:
         logger.error(f"   - Error: {e}")
         logger.info("=" * 80)
         return False

 import json
 import logging
 from typing import Dict, Any, Optional
 from src.reporting.feedback_schema import UserFeedback
 # Try to import snowflake connector
 try:
     import snowflake.connector
             self._connection.close()
             print("✅ Disconnected from Snowflake")
+    def insert_feedback(self, feedback: UserFeedback, table_name: Optional[str] = None) -> bool:
         """Insert a single feedback record into Snowflake"""
         logger.info("=" * 80)
         logger.info("🔄 SNOWFLAKE INSERT: Starting feedback insertion process")
         logger.info(f"📝 Feedback ID: {feedback.feedback_id}")
+        # Get table name from parameter, env var, or default
+        if table_name is None:
+            table_name = os.getenv("SNOWFLAKE_FEEDBACK_TABLE", "USER_FEEDBACK_V3")
         if not self._connection:
             logger.error("❌ Not connected to Snowflake. Call connect() first.")
             raise RuntimeError("Not connected to Snowflake. Call connect() first.")
                 logger.error(f"❌ Could not set context: {e}")
                 raise
+            # Prepare data - convert to JSON strings for VARIANT columns (same approach as old retrieved_data)
+            logger.info("🔧 DATA PREPARATION: Preparing VARIANT columns...")
+            feedback_dict = feedback.to_dict()
+            # Prepare transcript (ARRAY) - convert to JSON string
+            transcript_raw = feedback_dict.get('transcript', [])
+            if transcript_raw:
+                # Convert to JSON string (same approach as old retrieved_data)
+                transcript_for_db = json.dumps(transcript_raw)
+                logger.info(f"   - Transcript: {len(transcript_raw)} messages, JSON length: {len(transcript_for_db)}")
+            else:
+                transcript_for_db = None
+                logger.info("   - Transcript: None")
+            # Prepare retrievals (ARRAY) - convert to JSON string
+            retrievals_raw = feedback_dict.get('retrievals', [])
+            if retrievals_raw:
+                # Convert to JSON string (same approach as old retrieved_data)
+                retrievals_for_db = json.dumps(retrievals_raw)
+                logger.info(f"   - Retrievals: {len(retrievals_raw)} entries, JSON length: {len(retrievals_for_db)}")
             else:
+                retrievals_for_db = None
+                logger.info("   - Retrievals: None")
+            # Prepare feedback_score_related_retrieval_docs (OBJECT) - convert to JSON string
+            feedback_score_related_raw = feedback_dict.get('feedback_score_related_retrieval_docs')
+            if feedback_score_related_raw:
+                # Convert to JSON string (same approach as old retrieved_data)
+                feedback_score_related_for_db = json.dumps(feedback_score_related_raw)
+                logger.info(f"   - Feedback score related docs: present, JSON length: {len(feedback_score_related_for_db)}")
+            else:
+                feedback_score_related_for_db = None
+                logger.info("   - Feedback score related docs: None")
+            # Prepare retrieved_data (preserved old column) - convert to JSON string
+            retrieved_data_raw = feedback_dict.get('retrieved_data')
+            if retrieved_data_raw:
+                # Convert to JSON string (same approach as old retrieved_data)
+                retrieved_data_for_db = json.dumps(retrieved_data_raw)
+                logger.info(f"   - Retrieved data (preserved): present, JSON length: {len(retrieved_data_for_db)}")
             else:
                 retrieved_data_for_db = None
+                logger.info("   - Retrieved data (preserved): None")
+            # Build SQL with new column structure
+            # Columns are VARCHAR (storing JSON strings), same approach as old retrieved_data
+            sql = f"""INSERT INTO {table_name} (
                 feedback_id,
                 open_ended_feedback,
                 score,
                 message_count,
                 has_retrievals,
                 retrieval_count,
+                transcript,
+                retrievals,
+                feedback_score_related_retrieval_docs,
+                retrieved_data,
+                created_at
             ) VALUES (
                 %(feedback_id)s, %(open_ended_feedback)s, %(score)s, %(is_feedback_about_last_retrieval)s,
                 %(conversation_id)s, %(timestamp)s, %(message_count)s, %(has_retrievals)s,
+                %(retrieval_count)s, %(transcript)s, %(retrievals)s, %(feedback_score_related_retrieval_docs)s,
+                %(retrieved_data)s, %(created_at)s
             )"""
             logger.info("📝 SQL PREPARATION: Building INSERT statement...")
+            logger.info(f"   - Target table: {table_name}")
             logger.info(f"   - Database: {self.database}")
             logger.info(f"   - Schema: {self.schema}")
             # Prepare parameters
+            # Pass JSON strings for VARIANT columns (same approach as old retrieved_data)
             params = {
                 'feedback_id': feedback.feedback_id,
                 'open_ended_feedback': feedback.open_ended_feedback,
                 'message_count': feedback.message_count,
                 'has_retrievals': feedback.has_retrievals,
                 'retrieval_count': feedback.retrieval_count,
+                'transcript': transcript_for_db,  # JSON string
+                'retrievals': retrievals_for_db,  # JSON string
+                'feedback_score_related_retrieval_docs': feedback_score_related_for_db,  # JSON string
+                'retrieved_data': retrieved_data_for_db,  # JSON string - preserved old column
+                'created_at': feedback.created_at
             }
             # Execute insert
     )
+def save_to_snowflake(feedback: UserFeedback, table_name: Optional[str] = None) -> bool:
     """Helper function to save feedback to Snowflake"""
     logger.info("=" * 80)
     logger.info("🔵 SNOWFLAKE SAVE: Starting save process")
     logger.info(f"📝 Feedback ID: {feedback.feedback_id}")
+    # Get table name from parameter or env var
+    if table_name is None:
+        table_name = os.getenv("SNOWFLAKE_FEEDBACK_TABLE", "USER_FEEDBACK_V3")
     connector = get_snowflake_connector_from_env()
     if not connector:
         logger.info("✅ SNOWFLAKE SAVE: Connection established")
         logger.info("📥 SNOWFLAKE SAVE: Attempting to insert feedback...")
+        success = connector.insert_feedback(feedback, table_name=table_name)
         logger.info("🔌 SNOWFLAKE SAVE: Disconnecting...")
         connector.disconnect()
         logger.error(f"   - Error: {e}")
         logger.info("=" * 80)
         return False