fixed one more bug. during stress testing

Browse files

Files changed (3) hide show

mcp/core/intelligence.py +17 -8
mcp/requirements.txt +1 -2
streamlit/app.py +71 -1

mcp/core/intelligence.py CHANGED Viewed

@@ -27,34 +27,43 @@ def _get_database_for_table(table_name: str) -> str | None:
 async def execute_federated_query(sql: str) -> List[Dict[str, Any]]:
     """
     Executes a SQL query against the correct SQLite database.
-    This is a simplified version of a federated query engine. It identifies the
-    target database from the first table name in the SQL query.
     """
     parsed = sqlparse.parse(sql)[0]
     target_table = None
-    # Find the first table name in the parsed SQL
     for token in parsed.tokens:
-        if isinstance(token, sqlparse.sql.Identifier):
             target_table = token.get_real_name()
             break
-        elif token.is_group:
             for sub_token in token.tokens:
                 if isinstance(sub_token, sqlparse.sql.Identifier):
                     target_table = sub_token.get_real_name()
                     break
-        if target_table:
-            break
     if not target_table:
         raise ValueError("Could not identify a target table in the SQL query.")
     logger.info(f"Identified target table: {target_table}")
-    # Determine which database engine to use
     db_name = _get_database_for_table(target_table)
     if not db_name:
         raise ValueError(f"Table '{target_table}' not found in any known database.")
     db_engines = get_db_connections()
     engine = db_engines.get(db_name)

 async def execute_federated_query(sql: str) -> List[Dict[str, Any]]:
     """
     Executes a SQL query against the correct SQLite database.
+    Strips database prefixes from table names (e.g., clinical_trials.patients → patients).
     """
     parsed = sqlparse.parse(sql)[0]
     target_table = None
+    # Find table name from FROM clause
+    from_found = False
     for token in parsed.tokens:
+        if token.ttype is sqlparse.tokens.Keyword and token.value.upper() == 'FROM':
+            from_found = True
+            continue
+        elif from_found and isinstance(token, sqlparse.sql.Identifier):
             target_table = token.get_real_name()
             break
+        elif from_found and token.is_group:
             for sub_token in token.tokens:
                 if isinstance(sub_token, sqlparse.sql.Identifier):
                     target_table = sub_token.get_real_name()
                     break
+            if target_table:
+                break
     if not target_table:
         raise ValueError("Could not identify a target table in the SQL query.")
     logger.info(f"Identified target table: {target_table}")
+    # Determine which database this table belongs to
     db_name = _get_database_for_table(target_table)
     if not db_name:
         raise ValueError(f"Table '{target_table}' not found in any known database.")
+    # Strip all database prefixes from SQL (e.g., "clinical_trials.patients" → "patients")
+    for known_db in ["clinical_trials", "laboratory", "drug_discovery"]:
+        sql = sql.replace(f"{known_db}.", "")
+    logger.info(f"Cleaned SQL for database '{db_name}': {sql}")
     db_engines = get_db_connections()
     engine = db_engines.get(db_name)

mcp/requirements.txt CHANGED Viewed

@@ -4,5 +4,4 @@ neo4j==5.14.0
 pydantic==2.4.0
 requests==2.31.0
 SQLAlchemy==2.0.29
-sqlparse==0.5.0
-mcp==1.1.1

 pydantic==2.4.0
 requests==2.31.0
 SQLAlchemy==2.0.29
+sqlparse==0.5.0

streamlit/app.py CHANGED Viewed

@@ -29,6 +29,8 @@ if 'messages' not in st.session_state:
     st.session_state.messages = []
 if 'schema_info' not in st.session_state:
     st.session_state.schema_info = ""
 # --- Helper Functions ---
 def stream_agent_response(question: str):
@@ -125,6 +127,42 @@ def display_sidebar():
             st.session_state.messages = []
             st.rerun()
 def main():
     display_sidebar()
     st.title("💬 GraphRAG Conversational Agent")
@@ -134,6 +172,15 @@ def main():
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
     if prompt := st.chat_input("Ask your question here..."):
         st.session_state.messages.append({"role": "user", "content": prompt})
@@ -143,6 +190,7 @@ def main():
         with st.chat_message("assistant"):
             full_response = ""
             response_box = st.empty()
             for chunk in stream_agent_response(prompt):
                 if "error" in chunk:
@@ -156,12 +204,34 @@ def main():
                     full_response += f"🤔 *{content}*\n\n"
                 elif chunk.get("type") == "observation":
                     full_response += f"{content}\n\n"
                 elif chunk.get("type") == "final_answer":
                     full_response += f"**Final Answer:**\n{content}"
                 response_box.markdown(full_response)
-        st.session_state.messages.append({"role": "assistant", "content": full_response})
 if __name__ == "__main__":
     main()

     st.session_state.messages = []
 if 'schema_info' not in st.session_state:
     st.session_state.schema_info = ""
+if 'current_results' not in st.session_state:
+    st.session_state.current_results = None
 # --- Helper Functions ---
 def stream_agent_response(question: str):
             st.session_state.messages = []
             st.rerun()
+def extract_sql_results(observation_content: str) -> pd.DataFrame | None:
+    """Extract SQL results from execute_query tool observation."""
+    try:
+        if "execute_query" not in observation_content or "returned:" not in observation_content:
+            return None
+        # Extract the content between triple backticks
+        if "```" in observation_content:
+            parts = observation_content.split("```")
+            if len(parts) >= 2:
+                result_text = parts[1].strip()
+                # Parse table format: "column1 | column2 | column3"
+                lines = [line.strip() for line in result_text.split('\n') if line.strip()]
+                if len(lines) < 3:  # Need headers, separator, and at least one row
+                    return None
+                # Parse headers
+                headers = [h.strip() for h in lines[0].split('|')]
+                # Parse data rows (skip separator line at index 1)
+                data_rows = []
+                for line in lines[2:]:
+                    if "and" in line and "more rows" in line:
+                        break
+                    row_values = [v.strip() for v in line.split('|')]
+                    if len(row_values) == len(headers):
+                        data_rows.append(row_values)
+                if data_rows:
+                    return pd.DataFrame(data_rows, columns=headers)
+    except Exception:
+        pass
+    return None
 def main():
     display_sidebar()
     st.title("💬 GraphRAG Conversational Agent")
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
+            if message.get("dataframe") is not None:
+                st.dataframe(message["dataframe"], use_container_width=True)
+                csv = message["dataframe"].to_csv(index=False)
+                st.download_button(
+                    label="📥 Download CSV",
+                    data=csv,
+                    file_name="query_results.csv",
+                    mime="text/csv"
+                )
     if prompt := st.chat_input("Ask your question here..."):
         st.session_state.messages.append({"role": "user", "content": prompt})
         with st.chat_message("assistant"):
             full_response = ""
             response_box = st.empty()
+            sql_results_df = None
             for chunk in stream_agent_response(prompt):
                 if "error" in chunk:
                     full_response += f"🤔 *{content}*\n\n"
                 elif chunk.get("type") == "observation":
                     full_response += f"{content}\n\n"
+                    # Try to extract SQL results
+                    df = extract_sql_results(content)
+                    if df is not None:
+                        sql_results_df = df
                 elif chunk.get("type") == "final_answer":
                     full_response += f"**Final Answer:**\n{content}"
                 response_box.markdown(full_response)
+            # Display DataFrame if SQL results were found
+            if sql_results_df is not None:
+                st.markdown("---")
+                st.markdown("### 📊 Query Results")
+                st.dataframe(sql_results_df, use_container_width=True)
+                csv = sql_results_df.to_csv(index=False)
+                st.download_button(
+                    label="📥 Download CSV",
+                    data=csv,
+                    file_name="query_results.csv",
+                    mime="text/csv",
+                    key=f"download_{len(st.session_state.messages)}"
+                )
+        st.session_state.messages.append({
+            "role": "assistant",
+            "content": full_response,
+            "dataframe": sql_results_df
+        })
 if __name__ == "__main__":
     main()