Spaces:

Vanshcc
/

DB_Chatbot

Running

App Files Files Community

Vanshcc commited on 23 days ago

Commit

8823302

verified ·

1 Parent(s): 6ca635c

add more context awareness and add graphing

Browse files

Files changed (5) hide show

app.py +27 -34
chatbot.py +17 -3
router.py +9 -2
sql/generator.py +6 -5
viz_utils.py +64 -0

app.py CHANGED Viewed

@@ -34,6 +34,7 @@ from database.connection import DatabaseConnection
 from llm import create_llm_client
 from chatbot import create_chatbot, DatabaseChatbot
 from memory import ChatMemory, EnhancedChatMemory
 # Groq models (all FREE!)
@@ -557,7 +558,7 @@ def render_chat_interface():
     with chat_container:
         # Display messages
-        for msg in st.session_state.messages:
             with st.chat_message(msg["role"]):
                 st.markdown(msg["content"])
@@ -569,6 +570,9 @@ def render_chat_interface():
                     if meta.get("sql_query"):
                         with st.expander("SQL Query"):
                             st.code(meta["sql_query"], language="sql")
     # Chat input
     if prompt := st.chat_input("Ask about your data..."):
@@ -581,43 +585,32 @@ def render_chat_interface():
         if st.session_state.memory:
             st.session_state.memory.add_message("user", prompt)
         with st.chat_message("user"):
             st.markdown(prompt)
         # Get response
-        with st.chat_message("assistant"):
-            with st.spinner("Thinking..."):
-                response = st.session_state.chatbot.chat(
-                    prompt,
-                    st.session_state.memory,
-                    ignored_tables=list(st.session_state.ignored_tables)
-                )
-                st.markdown(response.answer)
-                # Show metadata
-                if response.query_type != "general":
-                    st.caption(f"Query type: {response.query_type}")
-                if response.sql_query:
-                    with st.expander("SQL Query"):
-                        st.code(response.sql_query, language="sql")
-                if response.sql_results:
-                    with st.expander("Results"):
-                        st.dataframe(response.sql_results)
-        # Save to memory
-        st.session_state.messages.append({
-            "role": "assistant",
-            "content": response.answer,
-            "metadata": {
-                "query_type": response.query_type,
-                "sql_query": response.sql_query
-            }
-        })
-        if st.session_state.memory:
-            st.session_state.memory.add_message("assistant", response.answer)
 def main():

 from llm import create_llm_client
 from chatbot import create_chatbot, DatabaseChatbot
 from memory import ChatMemory, EnhancedChatMemory
+from viz_utils import render_visualization
 # Groq models (all FREE!)
     with chat_container:
         # Display messages
+        for i, msg in enumerate(st.session_state.messages):
             with st.chat_message(msg["role"]):
                 st.markdown(msg["content"])
                     if meta.get("sql_query"):
                         with st.expander("SQL Query"):
                             st.code(meta["sql_query"], language="sql")
+                    if meta.get("sql_results"):
+                        render_visualization(meta["sql_results"], f"hist_{i}")
     # Chat input
     if prompt := st.chat_input("Ask about your data..."):
         if st.session_state.memory:
             st.session_state.memory.add_message("user", prompt)
+        # Display user message immediately
         with st.chat_message("user"):
             st.markdown(prompt)
         # Get response
+        with st.spinner("Thinking..."):
+            response = st.session_state.chatbot.chat(
+                prompt,
+                st.session_state.memory,
+                ignored_tables=list(st.session_state.ignored_tables)
+            )
+            # Save to memory
+            st.session_state.messages.append({
+                "role": "assistant",
+                "content": response.answer,
+                "metadata": {
+                    "query_type": response.query_type,
+                    "sql_query": response.sql_query,
+                    "sql_results": response.sql_results
+                }
+            })
+            if st.session_state.memory:
+                st.session_state.memory.add_message("assistant", response.answer)
+            st.rerun()
 def main():

chatbot.py CHANGED Viewed

@@ -57,6 +57,12 @@ INSTRUCTIONS:
 - Be concise but complete
 - Format data nicely
 YOUR RESPONSE:"""
     def __init__(self, llm_client: Optional[LLMClient] = None):
@@ -238,11 +244,11 @@ YOUR RESPONSE:"""
                 else:
                     return ChatResponse(answer="⚠️ Nothing previous to save. Tell me something to remember first!", query_type="memory")
-            # Route the query
-            routing = self.router.route(query, schema_context)
             # Get chat history for context
             history = memory.get_context_messages(5) if memory else []
             # Process based on route
             if routing.query_type == QueryType.RAG:
@@ -260,6 +266,14 @@ YOUR RESPONSE:"""
     def _handle_rag(self, query: str, history: List[Dict], allowed_tables: Optional[List[str]] = None) -> ChatResponse:
         """Handle RAG-based query."""
         context = self.rag_engine.get_context(query, top_k=5, table_filter=allowed_tables)
         prompt = self.RESPONSE_PROMPT.format(context=f"RELEVANT DATA:\n{context}", question=query)

 - Be concise but complete
 - Format data nicely
+INTERACTION GUIDELINES:
+- If the SQL results show a list (e.g., top products) and hit the limit (5, 10, or 50), MENTION this and ASK the user if they want to see more or a specific number.
+  Example: "Here are the top 5 products... Would you like to see the top 10?"
+- If the user's question was broad (e.g., "Show me products") and you're showing a limited set, ASK if they want to filter by a specific attribute (e.g., "Would you like to filter by category or price?").
+- If the answer is "0 results" for a "top/best" query, suggest looking at the data generally.
 YOUR RESPONSE:"""
     def __init__(self, llm_client: Optional[LLMClient] = None):
                 else:
                     return ChatResponse(answer="⚠️ Nothing previous to save. Tell me something to remember first!", query_type="memory")
             # Get chat history for context
             history = memory.get_context_messages(5) if memory else []
+            # Route the query
+            routing = self.router.route(query, schema_context, history)
             # Process based on route
             if routing.query_type == QueryType.RAG:
     def _handle_rag(self, query: str, history: List[Dict], allowed_tables: Optional[List[str]] = None) -> ChatResponse:
         """Handle RAG-based query."""
+        # Check if we have any indexed data
+        if self.rag_engine.document_count == 0:
+            return ChatResponse(
+                answer="⚠️ **I can't answer this yet.**\n\nThis looks like a semantic question (searching for meaning/concepts), but you haven't **indexed the text data** yet.\n\nPlease click the **'📚 Index Text Data'** button in the sidebar to enable this functionality.",
+                query_type="error",
+                error="RAG index is empty"
+            )
         context = self.rag_engine.get_context(query, top_k=5, table_filter=allowed_tables)
         prompt = self.RESPONSE_PROMPT.format(context=f"RELEVANT DATA:\n{context}", question=query)

router.py CHANGED Viewed

@@ -48,6 +48,7 @@ Determine if this query needs:
 4. GENERAL - General conversation not requiring database access
 IMPORTANT: If the user asks to "show more", "show other", "see remaining", "next results", or similar - this is a PAGINATION request and should be routed to SQL, NOT GENERAL.
 Respond in this exact format:
 TYPE: [RAG|SQL|HYBRID|GENERAL]
@@ -61,13 +62,19 @@ REASONING: [brief explanation]"""
     def set_llm_client(self, llm_client):
         self.llm_client = llm_client
-    def route(self, query: str, schema_context: str) -> RoutingDecision:
         """Analyze query and determine routing."""
         if not self.llm_client:
             # Fallback to simple heuristics
             return self._heuristic_route(query)
-        prompt = self.ROUTING_PROMPT.format(schema=schema_context, query=query)
         try:
             response = self.llm_client.chat([

 4. GENERAL - General conversation not requiring database access
 IMPORTANT: If the user asks to "show more", "show other", "see remaining", "next results", or similar - this is a PAGINATION request and should be routed to SQL, NOT GENERAL.
+5. REFERENTIAL/AFFIRMATIVE: If the query is simply "yes", "sure", "ok", "please", or "do it", check if it's likely a confirmation to a previous offer (like "would you like to see 10 more?"). If so, this is likely SQL (pagination or new query). If ambiguous, default to GENERAL.
 Respond in this exact format:
 TYPE: [RAG|SQL|HYBRID|GENERAL]
     def set_llm_client(self, llm_client):
         self.llm_client = llm_client
+    def route(self, query: str, schema_context: str, chat_history: Optional[List[Dict]] = None) -> RoutingDecision:
         """Analyze query and determine routing."""
         if not self.llm_client:
             # Fallback to simple heuristics
             return self._heuristic_route(query)
+        prev_context = ""
+        if chat_history and len(chat_history) > 0:
+            last_msg = chat_history[-1]
+            if last_msg.get("role") == "assistant":
+                prev_context = f"\nPREVIOUS ASSISTANT MSG: {last_msg.get('content', '')[:200]}..."
+        prompt = self.ROUTING_PROMPT.format(schema=schema_context, query=query + prev_context)
         try:
             response = self.llm_client.chat([

sql/generator.py CHANGED Viewed

@@ -61,15 +61,16 @@ class SQLGenerator:
 RULES:
 1. ONLY generate SELECT statements.
 2. NEVER use INSERT, UPDATE, DELETE, DROP, CREATE, ALTER, or TRUNCATE.
-3. Always include a LIMIT clause (max 50 rows unless specified).
 4. Use table and column names EXACTLY as shown in the schema.
-5. AMBIGUITY: If the user asks for a category, type, or specific value, and you are unsure which column it belongs to:
    - Check multiple likely columns (e.g., `category`, `sub_category`, `type`, `description`).
    - Use pattern matching for flexibility.
    - Use `OR` to combine multiple column checks.
-6. DATA AWARENESS: In footwear databases, specific types like 'Formal', 'Casual', or 'Sports' often appear in `sub_category` OR `category`. Check both if available.
-7. Return ONLY the SQL query, no explanations.
-8. PAGINATION: If the user asks to "show more", "show other", "see remaining", or similar follow-up:
    - Look at the previous conversation for the original query conditions.
    - Use LIMIT with OFFSET to get the next set of results (e.g., LIMIT 10 OFFSET 10 for the second page).
    - Keep the same WHERE conditions from the previous query.

 RULES:
 1. ONLY generate SELECT statements.
 2. NEVER use INSERT, UPDATE, DELETE, DROP, CREATE, ALTER, or TRUNCATE.
+3. Always include a LIMIT clause (max 50 rows unless specified). Do NOT use LIMIT 1 for "top" or "best" queries unless explicitly asked for "single" or "one".
 4. Use table and column names EXACTLY as shown in the schema.
+5. TOP/BEST ITEMS: When asked for 'top', 'highest', or 'best' items (e.g. 'top rated products'), use LIMIT 5 or LIMIT 10 to show potential ties or multiple top candidates. Never use LIMIT 1 for these unless the user explicitly asks for "the number one" or "single best".
+6. AMBIGUITY: If the user asks for a category, type, or specific value, and you are unsure which column it belongs to:
    - Check multiple likely columns (e.g., `category`, `sub_category`, `type`, `description`).
    - Use pattern matching for flexibility.
    - Use `OR` to combine multiple column checks.
+7. DATA AWARENESS: In footwear databases, specific types like 'Formal', 'Casual', or 'Sports' often appear in `sub_category` OR `category`. Check both if available.
+8. Return ONLY the SQL query, no explanations.
+9. PAGINATION: If the user asks to "show more", "show other", "see remaining", or similar follow-up:
    - Look at the previous conversation for the original query conditions.
    - Use LIMIT with OFFSET to get the next set of results (e.g., LIMIT 10 OFFSET 10 for the second page).
    - Keep the same WHERE conditions from the previous query.

viz_utils.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import streamlit as st
+import pandas as pd
+def render_visualization(results, key_prefix):
+    """Render data tables and visualizations from SQL results."""
+    if not results:
+        return
+    # Convert to DataFrame
+    df = pd.DataFrame(results)
+    with st.expander("📊 Results & Visualization", expanded=False):
+        tab_data, tab_viz = st.tabs(["📄 Data", "📈 Visualize"])
+        with tab_data:
+            st.dataframe(df, use_container_width=True)
+        with tab_viz:
+            numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
+            categorical_cols = df.select_dtypes(exclude=['number']).columns.tolist()
+            if not numeric_cols:
+                st.info("No numeric data found to visualize.")
+            else:
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    chart_type = st.selectbox(
+                        "Chart Type",
+                        ["Bar", "Line", "Area", "Scatter"],
+                        key=f"{key_prefix}_chart_type"
+                    )
+                with col2:
+                    # Default X axis logic
+                    x_options = df.columns.tolist()
+                    default_x = categorical_cols[0] if categorical_cols else x_options[0]
+                    # Find index safely
+                    try:
+                        def_index = x_options.index(default_x)
+                    except ValueError:
+                        def_index = 0
+                    x_axis = st.selectbox(
+                        "X Axis",
+                        x_options,
+                        index=def_index,
+                        key=f"{key_prefix}_x_axis"
+                    )
+                with col3:
+                    y_axis = st.selectbox(
+                        "Y Axis",
+                        numeric_cols,
+                        index=0,
+                        key=f"{key_prefix}_y_axis"
+                    )
+                if chart_type == "Bar":
+                    st.bar_chart(df, x=x_axis, y=y_axis)
+                elif chart_type == "Line":
+                    st.line_chart(df, x=x_axis, y=y_axis)
+                elif chart_type == "Area":
+                    st.area_chart(df, x=x_axis, y=y_axis)
+                elif chart_type == "Scatter":
+                    st.scatter_chart(df, x=x_axis, y=y_axis)