Spaces:

cryogenic22
/

data_pipeline_agent

Runtime error

App Files Files Community

cryogenic22 commited on Mar 21, 2025

Commit

b0e5312

verified ·

1 Parent(s): 7ab1a2c

Update app.py

Browse files

Files changed (1) hide show

app.py +330 -0

app.py CHANGED Viewed

@@ -3,16 +3,346 @@ Main application for Pharmaceutical Data Management Agent.
 """
 import os
 import streamlit as st
 from anthropic import Anthropic
 from dotenv import load_dotenv
 # Import data module
 from data.synthetic_db import SyntheticDatabase
 # Import graph module
 from graph.workflow import create_agent_graph
 # Import UI modules
 from ui.conversation import render_conversation_tab
 from ui.pipeline import render_pipeline_tab

 """
 import os
+import sys
 import streamlit as st
 from anthropic import Anthropic
 from dotenv import load_dotenv
+# Add the current directory to the path to enable relative imports
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 # Import data module
 from data.synthetic_db import SyntheticDatabase
 # Import graph module
 from graph.workflow import create_agent_graph
+# Create ui directory if it doesn't exist
+os.makedirs("ui", exist_ok=True)
+# Create ui files if they don't exist
+def ensure_ui_files_exist():
+    """Create UI module files if they don't exist."""
+    # Conversation UI
+    conversation_py = "ui/conversation.py"
+    if not os.path.exists(conversation_py):
+        with open(conversation_py, "w") as f:
+            f.write("""
+import streamlit as st
+def render_conversation_tab(session_state, agent_graph, update_state_dict):
+    \"\"\"Render the conversation tab in the UI.\"\"\"
+    st.subheader("Conversation with Data Management Agent")
+    # Display conversation history
+    for message in session_state.conversation["messages"]:
+        if message["role"] == "user":
+            st.markdown(f"**You:** {message['content']}")
+        else:
+            st.markdown(f"**Agent:** {message['content']}")
+    # Input for new message
+    with st.form(key="user_input_form"):
+        user_input = st.text_area("What data pipeline do you need to create?",
+                        placeholder="e.g., I need a sales performance dashboard showing regional performance by product for the last 2 years")
+        submit_button = st.form_submit_button("Submit")
+    if submit_button and user_input:
+        # Add user message to conversation
+        new_message = {"role": "user", "content": user_input}
+        session_state.conversation["messages"].append(new_message)
+        # Update agent state
+        agent_state = session_state.agent_state.copy()
+        agent_state["messages"] = agent_state["messages"] + [new_message]
+        # Run the agent graph
+        with st.spinner("Agent is processing..."):
+            try:
+                # Update the state dictionary for tools
+                update_state_dict(agent_state)
+                # Execute the agent workflow
+                result = agent_graph.invoke(agent_state)
+                # Update session state with result
+                session_state.agent_state = result
+                # Update the state dictionary for tools again with the result
+                update_state_dict(result)
+                # Update conversation with agent responses
+                for message in result["messages"]:
+                    if message not in session_state.conversation["messages"]:
+                        session_state.conversation["messages"].append(message)
+                # Update other state properties
+                session_state.conversation["user_intent"] = result.get("user_intent", {})
+                session_state.conversation["pipeline_plan"] = result.get("pipeline_plan", {})
+                session_state.conversation["sql_queries"] = result.get("sql_queries", [])
+                session_state.conversation["execution_results"] = result.get("execution_results", {})
+                session_state.conversation["confidence_scores"] = result.get("confidence_scores", {})
+                session_state.conversation["status"] = result.get("status", "planning")
+                session_state.conversation["current_agent"] = result.get("current_agent", "understanding_agent")
+                # Force refresh
+                st.rerun()
+            except Exception as e:
+                st.error(f"Error executing agent workflow: {str(e)}")
+""")
+    # Pipeline UI
+    pipeline_py = "ui/pipeline.py"
+    if not os.path.exists(pipeline_py):
+        with open(pipeline_py, "w") as f:
+            f.write("""
+import streamlit as st
+def render_pipeline_tab(session_state):
+    \"\"\"Render the pipeline details tab in the UI.\"\"\"
+    st.subheader("Pipeline Details")
+    # Intent Understanding
+    st.markdown("### User Intent")
+    if session_state.conversation["user_intent"]:
+        st.markdown(session_state.conversation["user_intent"].get("description", "No intent captured yet"))
+        if "confidence_scores" in session_state.conversation and "intent_understanding" in session_state.conversation["confidence_scores"]:
+            score = session_state.conversation["confidence_scores"]["intent_understanding"] * 100
+            st.progress(score / 100, text=f"Intent Understanding Confidence: {score:.1f}%")
+    else:
+        st.info("No user intent has been captured yet. Start a conversation to extract intent.")
+    # Pipeline Plan
+    st.markdown("### Pipeline Plan")
+    if session_state.conversation["pipeline_plan"]:
+        st.markdown(session_state.conversation["pipeline_plan"].get("description", "No plan created yet"))
+        if "confidence_scores" in session_state.conversation and "plan_quality" in session_state.conversation["confidence_scores"]:
+            score = session_state.conversation["confidence_scores"]["plan_quality"] * 100
+            st.progress(score / 100, text=f"Plan Quality Confidence: {score:.1f}%")
+    else:
+        st.info("No pipeline plan has been created yet. Continue the conversation to develop a plan.")
+    # SQL Queries
+    st.markdown("### SQL Queries")
+    if session_state.conversation["sql_queries"]:
+        for i, query in enumerate(session_state.conversation["sql_queries"]):
+            with st.expander(f"Query {i+1}: {query.get('name', 'Unnamed Query')}"):
+                st.code(query.get("sql", ""), language="sql")
+    else:
+        st.info("No SQL queries have been generated yet. Continue the conversation to generate queries.")
+    # Execution Results
+    st.markdown("### Execution Results")
+    if session_state.conversation["execution_results"] and "details" in session_state.conversation["execution_results"]:
+        st.markdown(session_state.conversation["execution_results"].get("summary", ""))
+        if "success_rate" in session_state.conversation["execution_results"]:
+            score = session_state.conversation["execution_results"]["success_rate"] * 100
+            st.progress(score / 100, text=f"Execution Success Rate: {score:.1f}%")
+        results = session_state.conversation["execution_results"]["details"]
+        for i, result in enumerate(results):
+            status = "✅" if result["success"] else "❌"
+            with st.expander(f"{status} {result.get('query_name', f'Query {i+1}')}"):
+                st.markdown(f"**Result:** {result.get('result_summary', 'No summary available')}")
+                st.markdown(f"**Rows Processed:** {result.get('row_count', 0)}")
+    else:
+        st.info("No execution results available yet. Complete the pipeline creation to see results.")
+""")
+    # Agent Workflow UI
+    agent_workflow_py = "ui/agent_workflow.py"
+    if not os.path.exists(agent_workflow_py):
+        with open(agent_workflow_py, "w") as f:
+            f.write("""
+import streamlit as st
+def render_workflow_tab(session_state):
+    \"\"\"Render the agent workflow visualization tab in the UI.\"\"\"
+    st.subheader("Agent Workflow Visualization")
+    # Display current agent and status
+    current_agent = session_state.conversation.get("current_agent", "understanding_agent")
+    status = session_state.conversation.get("status", "planning")
+    st.markdown(f"**Current State:** {status.title()}")
+    st.markdown(f"**Current Agent:** {current_agent.replace('_', ' ').title()}")
+    # Visualize the workflow
+    col1, col2, col3, col4 = st.columns(4)
+    # Determine which agent is active
+    understanding_active = current_agent == "understanding_agent"
+    planning_active = current_agent == "planning_agent"
+    sql_active = current_agent == "sql_generator_agent"
+    executor_active = current_agent == "executor_agent"
+    # Show the workflow visualization
+    with col1:
+        if understanding_active:
+            st.markdown("### 🔍 **Understanding**")
+        else:
+            st.markdown("### 🔍 Understanding")
+        st.markdown("Extracts user intent and asks clarification questions")
+        if "user_intent" in session_state.conversation and session_state.conversation["user_intent"]:
+            st.success("Completed")
+        elif understanding_active:
+            st.info("In Progress")
+        else:
+            st.warning("Not Started")
+    with col2:
+        if planning_active:
+            st.markdown("### 📋 **Planning**")
+        else:
+            st.markdown("### 📋 Planning")
+        st.markdown("Creates data pipeline plan with sources and transformations")
+        if "pipeline_plan" in session_state.conversation and session_state.conversation["pipeline_plan"]:
+            st.success("Completed")
+        elif planning_active:
+            st.info("In Progress")
+        elif understanding_active:
+            st.warning("Not Started")
+        else:
+            st.success("Completed")
+    with col3:
+        if sql_active:
+            st.markdown("### 💻 **SQL Generation**")
+        else:
+            st.markdown("### 💻 SQL Generation")
+        st.markdown("Converts plan into executable SQL queries")
+        if "sql_queries" in session_state.conversation and session_state.conversation["sql_queries"]:
+            st.success("Completed")
+        elif sql_active:
+            st.info("In Progress")
+        elif understanding_active or planning_active:
+            st.warning("Not Started")
+        else:
+            st.success("Completed")
+    with col4:
+        if executor_active:
+            st.markdown("### ⚙️ **Execution**")
+        else:
+            st.markdown("### ⚙️ Execution")
+        st.markdown("Executes queries and reports results")
+        if "execution_results" in session_state.conversation and session_state.conversation["execution_results"]:
+            st.success("Completed")
+        elif executor_active:
+            st.info("In Progress")
+        elif understanding_active or planning_active or sql_active:
+            st.warning("Not Started")
+        else:
+            st.success("Completed")
+    # Overall confidence score
+    if "confidence_scores" in session_state.conversation and "overall" in session_state.conversation["confidence_scores"]:
+        st.markdown("### Overall Pipeline Confidence")
+        score = session_state.conversation["confidence_scores"]["overall"] * 100
+        st.progress(score / 100, text=f"{score:.1f}%")
+        # Workflow decision points
+        if status == "complete":
+            if score > 80:
+                st.success("✅ High confidence - Pipeline can be deployed automatically")
+            else:
+                st.warning("⚠️ Medium confidence - Human review recommended before deployment")
+    # Add human review section for pending approval status
+    if status == "pending_approval":
+        st.markdown("### 👤 Human Review Required")
+        st.info("This pipeline requires human review before deployment")
+        col1, col2 = st.columns(2)
+        with col1:
+            if st.button("✅ Approve Pipeline"):
+                # Update state to approved
+                session_state.conversation["status"] = "approved"
+                # Trigger execution to continue
+                st.rerun()
+        with col2:
+            if st.button("❌ Reject Pipeline"):
+                # Update state to rejected
+                session_state.conversation["status"] = "rejected"
+                st.error("Pipeline rejected. Please provide feedback to refine the pipeline.")
+""")
+    # DB Explorer UI
+    db_explorer_py = "ui/db_explorer.py"
+    if not os.path.exists(db_explorer_py):
+        with open(db_explorer_py, "w") as f:
+            f.write("""
+import streamlit as st
+import pandas as pd
+def render_db_explorer_tab(session_state):
+    \"\"\"Render the database explorer tab in the UI.\"\"\"
+    st.subheader("Database Explorer")
+    # Get tables by category
+    tables = session_state.db.get_tables()
+    # Display tables by category
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("### Raw Data Tables")
+        for table in tables["raw_tables"]:
+            with st.expander(table):
+                sample = session_state.db.get_table_sample(table, 3)
+                st.dataframe(pd.DataFrame(sample))
+        st.markdown("### Staging Tables")
+        for table in tables["staging_tables"]:
+            with st.expander(table):
+                sample = session_state.db.get_table_sample(table, 3)
+                st.dataframe(pd.DataFrame(sample))
+    with col2:
+        st.markdown("### Analytics Ready Data")
+        for table in tables["ard_tables"]:
+            with st.expander(table):
+                sample = session_state.db.get_table_sample(table, 3)
+                st.dataframe(pd.DataFrame(sample))
+        st.markdown("### Data Products")
+        for table in tables["data_products"]:
+            with st.expander(table):
+                sample = session_state.db.get_table_sample(table, 3)
+                st.dataframe(pd.DataFrame(sample))
+    # SQL Query Executor
+    st.markdown("### Query Explorer")
+    with st.form(key="sql_form"):
+        sql_query = st.text_area("Enter SQL Query", height=100,
+                                placeholder="SELECT * FROM ARD_SALES_PERFORMANCE WHERE region = 'North' LIMIT 5")
+        run_sql = st.form_submit_button("Run Query")
+    if run_sql and sql_query:
+        with st.spinner("Executing query..."):
+            result = session_state.db.execute_query(sql_query)
+            if "error" in result:
+                st.error(f"Error executing query: {result['error']}")
+            elif "data" in result:
+                st.dataframe(pd.DataFrame(result["data"]))
+                st.success(f"Query returned {len(result['data'])} rows")
+            elif "tables" in result:
+                st.write(result["tables"])
+            elif "schema" in result:
+                st.write(f"Schema for {result['table']}:")
+                st.dataframe(pd.DataFrame(result["schema"]))
+""")
+# Now import UI modules after ensuring they exist
+ensure_ui_files_exist()
 # Import UI modules
 from ui.conversation import render_conversation_tab
 from ui.pipeline import render_pipeline_tab