Spaces:

ohmygaugh
/

agent-mcp-sql

No application file

File size: 9,573 Bytes

f831e98
 
 
 
 
 
 
 
 
86cbe3c
f831e98
 
86cbe3c
f831e98
86cbe3c
 
 
f831e98
 
 
 
86cbe3c
 
 
f831e98
 
86cbe3c
 
 
 
 
8595be6
 
86cbe3c
 
 
 
 
 
 
a0eb181
 
86cbe3c
a0eb181
86cbe3c
a0eb181
86cbe3c
 
 
f831e98
86cbe3c
 
f831e98
 
86cbe3c
a0eb181
86cbe3c
f831e98
86cbe3c
 
 
 
 
 
f831e98
86cbe3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f831e98
86cbe3c
 
f831e98
86cbe3c
 
 
 
f831e98
a0eb181
86cbe3c
a0eb181
f831e98
a0eb181
86cbe3c
a0eb181
86cbe3c
 
 
 
 
 
f831e98
86cbe3c
 
f831e98
86cbe3c
 
f831e98
86cbe3c
 
 
a0eb181
 
 
 
 
 
 
f831e98
86cbe3c
 
f831e98
86cbe3c
 
 
 
f831e98
8595be6
 
 
 
 
 
6422ca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8595be6
 
6422ca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8595be6
 
 
 
86cbe3c
 
 
 
f831e98
86cbe3c
 
 
 
8595be6
 
 
 
 
 
 
 
 
f831e98
86cbe3c
 
 
 
f831e98
86cbe3c
 
 
8595be6
f831e98
86cbe3c
 
 
 
 
f831e98
86cbe3c
f831e98
86cbe3c
 
 
 
8595be6
 
 
 
86cbe3c
 
 
 
8595be6
 
 
 
 
 
 
 
 
 
 
 
 
 
f831e98
8595be6
 
 
 
 
f831e98

#!/usr/bin/env python3
"""
Streamlit MCP Monitor & Query Tester
A lightweight monitoring and testing interface for the agentic system.
All database access MUST go through MCP server - no direct connections allowed.
"""

import streamlit as st
import requests
import os
import json
import pandas as pd
from typing import Dict, Any

# --- Configuration ---
AGENT_URL = os.getenv("AGENT_URL", "http://agent:8001/query")
NEO4J_URL = os.getenv("NEO4J_URL", "http://neo4j:7474")
MCP_URL = os.getenv("MCP_URL", "http://mcp:8000/mcp")
MCP_API_KEY = os.getenv("MCP_API_KEY", "dev-key-123")

st.set_page_config(
    page_title="GraphRAG Chat",
    page_icon="💬",
    layout="wide"
)

# --- Session State ---
if 'messages' not in st.session_state:
    st.session_state.messages = []
if 'schema_info' not in st.session_state:
    st.session_state.schema_info = ""
if 'current_results' not in st.session_state:
    st.session_state.current_results = None

# --- Helper Functions ---
def stream_agent_response(question: str):
    """Streams the agent's response, yielding JSON objects."""
    try:
        with requests.post(AGENT_URL, json={"question": question}, stream=True, timeout=300) as r:
            r.raise_for_status()
            for line in r.iter_lines():
                if line:
                    try:
                        yield json.loads(line.decode('utf-8'))
                    except json.JSONDecodeError:
                        # Skip malformed JSON lines
                        continue
    except requests.exceptions.RequestException as e:
        yield {"error": f"Failed to connect to agent: {e}"}

def fetch_schema_info() -> str:
    """Fetches the database schema from the MCP server for display."""
    try:
        response = requests.post(
            f"{MCP_URL}/discovery/get_relevant_schemas",
            headers={"x-api-key": MCP_API_KEY, "Content-Type": "application/json"},
            json={"query": ""}
        )
        response.raise_for_status()
        data = response.json()

        if data.get("status") == "success":
            schemas = data.get("schemas", [])
            if not schemas: return "No schema information found."
            
            # Group columns by table
            tables = {}
            for s in schemas:
                table_key = f"{s.get('database', '')}.{s.get('table', '')}"
                if table_key not in tables:
                    tables[table_key] = []
                tables[table_key].append(f"{s.get('name', '')} ({s.get('type', [''])[0]})")
            
            schema_text = ""
            for table, columns in tables.items():
                schema_text += f"**{table}**:\n"
                for col in columns:
                    schema_text += f"- {col}\n"
            return schema_text
        else:
            return f"Error from MCP: {data.get('message', 'Unknown error')}"

    except requests.exceptions.RequestException as e:
        return f"Could not fetch schema: {e}"

@st.cache_data(ttl=600)
def get_cached_schema():
    """Cache the schema info to avoid repeated calls."""
    return fetch_schema_info()

@st.cache_data(ttl=10)
def check_service_health(service_name: str, url: str) -> bool:
    """Checks if a service is reachable. Cached for 10 seconds."""
    try:
        response = requests.get(url, timeout=2)
        return response.status_code in [200, 401]
    except Exception:
        return False

# --- UI Components ---
def display_sidebar():
    with st.sidebar:
        st.title("🗄️ Database Schema")
        
        if st.button("🔄 Refresh Schema"):
            st.cache_data.clear()

        st.session_state.schema_info = get_cached_schema()
        st.markdown(st.session_state.schema_info)

        st.markdown("---")
        st.title("🔌 Service Status")
        
        try:
            neo4j_status = "✅ Online" if check_service_health("Neo4j", NEO4J_URL) else "❌ Offline"
            mcp_health_url = "http://mcp:8000/health"
            mcp_status = "✅ Online" if check_service_health("MCP", mcp_health_url) else "❌ Offline"
        except Exception as e:
            neo4j_status = "❓ Unknown"
            mcp_status = "❓ Unknown"
        
        st.markdown(f"**Neo4j:** {neo4j_status}")
        st.markdown(f"**MCP Server:** {mcp_status}")
        
        st.markdown("---")
        if st.button("🗑️ Clear Chat History"):
            st.session_state.messages = []
            st.rerun()

def extract_sql_results(observation_content: str) -> pd.DataFrame | None:
    """Extract SQL results from execute_query tool observation."""
    try:
        if "execute_query" not in observation_content or "returned:" not in observation_content:
            return None
        
        # Look for JSON results in the observation
        if "Query returned" in observation_content and "rows:" in observation_content:
            # Extract the table format from the text
            lines = observation_content.split('\n')
            table_start = -1
            for i, line in enumerate(lines):
                if "Query returned" in line and "rows:" in line:
                    table_start = i + 1
                    break
            
            if table_start >= 0 and table_start < len(lines):
                # Find the table data
                table_lines = []
                for i in range(table_start, len(lines)):
                    line = lines[i].strip()
                    if line and not line.startswith("Final Answer"):
                        if "|" in line:  # Table format
                            table_lines.append(line)
                        elif line.startswith("PT") or line.startswith("DIAB") or line.startswith("NEURO"):  # Data row
                            table_lines.append(line)
                    elif line.startswith("Final Answer"):
                        break
                
                if len(table_lines) >= 2:  # Headers + at least one data row
                    # Parse headers
                    headers = [h.strip() for h in table_lines[0].split('|')]
                    
                    # Parse data rows
                    data_rows = []
                    for line in table_lines[1:]:
                        if "and" in line and "more rows" in line:
                            break
                        row_values = [v.strip() for v in line.split('|')]
                        if len(row_values) == len(headers):
                            data_rows.append(row_values)
                    
                    if data_rows:
                        return pd.DataFrame(data_rows, columns=headers)
    except Exception:
        pass
    return None

def main():
    display_sidebar()
    st.title("💬 GraphRAG Conversational Agent")
    st.markdown("Ask questions about the life sciences dataset. The agent's thought process will be shown below.")

    # Display chat history
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])
            if message.get("dataframe") is not None:
                st.dataframe(message["dataframe"], use_container_width=True)
                csv = message["dataframe"].to_csv(index=False)
                st.download_button(
                    label="📥 Download CSV",
                    data=csv,
                    file_name="query_results.csv",
                    mime="text/csv"
                )

    if prompt := st.chat_input("Ask your question here..."):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        with st.chat_message("assistant"):
            full_response = ""
            response_box = st.empty()
            sql_results_df = None
            
            for chunk in stream_agent_response(prompt):
                if "error" in chunk:
                    full_response = chunk["error"]
                    response_box.error(full_response)
                    break
                
                content = chunk.get("content", "")
                
                if chunk.get("type") == "thought":
                    full_response += f"🤔 *{content}*\n\n"
                elif chunk.get("type") == "observation":
                    full_response += f"{content}\n\n"
                    # Try to extract SQL results
                    df = extract_sql_results(content)
                    if df is not None:
                        sql_results_df = df
                elif chunk.get("type") == "final_answer":
                    full_response += f"**Final Answer:**\n{content}"

                response_box.markdown(full_response)
            
            # Display DataFrame if SQL results were found
            if sql_results_df is not None:
                st.markdown("---")
                st.markdown("### 📊 Query Results")
                st.dataframe(sql_results_df, use_container_width=True)
                csv = sql_results_df.to_csv(index=False)
                st.download_button(
                    label="📥 Download CSV",
                    data=csv,
                    file_name="query_results.csv",
                    mime="text/csv",
                    key=f"download_{len(st.session_state.messages)}"
                )
        
        st.session_state.messages.append({
            "role": "assistant", 
            "content": full_response,
            "dataframe": sql_results_df
        })

if __name__ == "__main__":
    main()