Spaces:
Runtime error
Runtime error
File size: 17,446 Bytes
a621345 f8a3c4c b0e5312 f8a3c4c a621345 f8a3c4c b0e5312 a621345 f8a3c4c a621345 f8a3c4c b0e5312 ec7ddd6 b0e5312 8dca38a 3607b03 8dca38a 0eeb228 0497d8b 8dca38a 0eeb228 8dca38a 0eeb228 8dca38a 0eeb228 8dca38a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 | """
Main application for Pharmaceutical Data Management Agent.
"""
import os
import sys
import streamlit as st
from anthropic import Anthropic
from dotenv import load_dotenv
# Add the current directory to the path to enable relative imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# Import data module
from data.synthetic_db import SyntheticDatabase
# Import graph module
from graph.workflow import create_agent_graph
# Create ui directory if it doesn't exist
os.makedirs("ui", exist_ok=True)
# Create ui files if they don't exist
def ensure_ui_files_exist():
"""Create UI module files if they don't exist."""
# Conversation UI
conversation_py = "ui/conversation.py"
if not os.path.exists(conversation_py):
with open(conversation_py, "w") as f:
f.write("""
import streamlit as st
def render_conversation_tab(session_state, agent_graph, update_state_dict):
\"\"\"Render the conversation tab in the UI.\"\"\"
st.subheader("Conversation with Data Management Agent")
# Display conversation history
for message in session_state.conversation["messages"]:
if message["role"] == "user":
st.markdown(f"**You:** {message['content']}")
else:
st.markdown(f"**Agent:** {message['content']}")
# Input for new message
with st.form(key="user_input_form"):
user_input = st.text_area("What data pipeline do you need to create?",
placeholder="e.g., I need a sales performance dashboard showing regional performance by product for the last 2 years")
submit_button = st.form_submit_button("Submit")
if submit_button and user_input:
# Add user message to conversation
new_message = {"role": "user", "content": user_input}
session_state.conversation["messages"].append(new_message)
# Update agent state
agent_state = session_state.agent_state.copy()
agent_state["messages"] = agent_state["messages"] + [new_message]
# Run the agent graph
with st.spinner("Agent is processing..."):
try:
# Update the state dictionary for tools
update_state_dict(agent_state)
# Execute the agent workflow
result = agent_graph.invoke(agent_state)
# Update session state with result
session_state.agent_state = result
# Update the state dictionary for tools again with the result
update_state_dict(result)
# Update conversation with agent responses
for message in result["messages"]:
if message not in session_state.conversation["messages"]:
session_state.conversation["messages"].append(message)
# Update other state properties
session_state.conversation["user_intent"] = result.get("user_intent", {})
session_state.conversation["pipeline_plan"] = result.get("pipeline_plan", {})
session_state.conversation["sql_queries"] = result.get("sql_queries", [])
session_state.conversation["execution_results"] = result.get("execution_results", {})
session_state.conversation["confidence_scores"] = result.get("confidence_scores", {})
session_state.conversation["status"] = result.get("status", "planning")
session_state.conversation["current_agent"] = result.get("current_agent", "understanding_agent")
# Force refresh
st.rerun()
except Exception as e:
st.error(f"Error executing agent workflow: {str(e)}")
""")
# Pipeline UI
pipeline_py = "ui/pipeline.py"
if not os.path.exists(pipeline_py):
with open(pipeline_py, "w") as f:
f.write("""
import streamlit as st
def render_pipeline_tab(session_state):
\"\"\"Render the pipeline details tab in the UI.\"\"\"
st.subheader("Pipeline Details")
# Intent Understanding
st.markdown("### User Intent")
if session_state.conversation["user_intent"]:
st.markdown(session_state.conversation["user_intent"].get("description", "No intent captured yet"))
if "confidence_scores" in session_state.conversation and "intent_understanding" in session_state.conversation["confidence_scores"]:
score = session_state.conversation["confidence_scores"]["intent_understanding"] * 100
st.progress(score / 100, text=f"Intent Understanding Confidence: {score:.1f}%")
else:
st.info("No user intent has been captured yet. Start a conversation to extract intent.")
# Pipeline Plan
st.markdown("### Pipeline Plan")
if session_state.conversation["pipeline_plan"]:
st.markdown(session_state.conversation["pipeline_plan"].get("description", "No plan created yet"))
if "confidence_scores" in session_state.conversation and "plan_quality" in session_state.conversation["confidence_scores"]:
score = session_state.conversation["confidence_scores"]["plan_quality"] * 100
st.progress(score / 100, text=f"Plan Quality Confidence: {score:.1f}%")
else:
st.info("No pipeline plan has been created yet. Continue the conversation to develop a plan.")
# SQL Queries
st.markdown("### SQL Queries")
if session_state.conversation["sql_queries"]:
for i, query in enumerate(session_state.conversation["sql_queries"]):
with st.expander(f"Query {i+1}: {query.get('name', 'Unnamed Query')}"):
st.code(query.get("sql", ""), language="sql")
else:
st.info("No SQL queries have been generated yet. Continue the conversation to generate queries.")
# Execution Results
st.markdown("### Execution Results")
if session_state.conversation["execution_results"] and "details" in session_state.conversation["execution_results"]:
st.markdown(session_state.conversation["execution_results"].get("summary", ""))
if "success_rate" in session_state.conversation["execution_results"]:
score = session_state.conversation["execution_results"]["success_rate"] * 100
st.progress(score / 100, text=f"Execution Success Rate: {score:.1f}%")
results = session_state.conversation["execution_results"]["details"]
for i, result in enumerate(results):
status = "β
" if result["success"] else "β"
with st.expander(f"{status} {result.get('query_name', f'Query {i+1}')}"):
st.markdown(f"**Result:** {result.get('result_summary', 'No summary available')}")
st.markdown(f"**Rows Processed:** {result.get('row_count', 0)}")
else:
st.info("No execution results available yet. Complete the pipeline creation to see results.")
""")
# Agent Workflow UI
agent_workflow_py = "ui/agent_workflow.py"
if not os.path.exists(agent_workflow_py):
with open(agent_workflow_py, "w") as f:
f.write("""
import streamlit as st
def render_workflow_tab(session_state):
\"\"\"Render the agent workflow visualization tab in the UI.\"\"\"
st.subheader("Agent Workflow Visualization")
# Display current agent and status
current_agent = session_state.conversation.get("current_agent", "understanding_agent")
status = session_state.conversation.get("status", "planning")
st.markdown(f"**Current State:** {status.title()}")
st.markdown(f"**Current Agent:** {current_agent.replace('_', ' ').title()}")
# Visualize the workflow
col1, col2, col3, col4 = st.columns(4)
# Determine which agent is active
understanding_active = current_agent == "understanding_agent"
planning_active = current_agent == "planning_agent"
sql_active = current_agent == "sql_generator_agent"
executor_active = current_agent == "executor_agent"
# Show the workflow visualization
with col1:
if understanding_active:
st.markdown("### π **Understanding**")
else:
st.markdown("### π Understanding")
st.markdown("Extracts user intent and asks clarification questions")
if "user_intent" in session_state.conversation and session_state.conversation["user_intent"]:
st.success("Completed")
elif understanding_active:
st.info("In Progress")
else:
st.warning("Not Started")
with col2:
if planning_active:
st.markdown("### π **Planning**")
else:
st.markdown("### π Planning")
st.markdown("Creates data pipeline plan with sources and transformations")
if "pipeline_plan" in session_state.conversation and session_state.conversation["pipeline_plan"]:
st.success("Completed")
elif planning_active:
st.info("In Progress")
elif understanding_active:
st.warning("Not Started")
else:
st.success("Completed")
with col3:
if sql_active:
st.markdown("### π» **SQL Generation**")
else:
st.markdown("### π» SQL Generation")
st.markdown("Converts plan into executable SQL queries")
if "sql_queries" in session_state.conversation and session_state.conversation["sql_queries"]:
st.success("Completed")
elif sql_active:
st.info("In Progress")
elif understanding_active or planning_active:
st.warning("Not Started")
else:
st.success("Completed")
with col4:
if executor_active:
st.markdown("### βοΈ **Execution**")
else:
st.markdown("### βοΈ Execution")
st.markdown("Executes queries and reports results")
if "execution_results" in session_state.conversation and session_state.conversation["execution_results"]:
st.success("Completed")
elif executor_active:
st.info("In Progress")
elif understanding_active or planning_active or sql_active:
st.warning("Not Started")
else:
st.success("Completed")
# Overall confidence score
if "confidence_scores" in session_state.conversation and "overall" in session_state.conversation["confidence_scores"]:
st.markdown("### Overall Pipeline Confidence")
score = session_state.conversation["confidence_scores"]["overall"] * 100
st.progress(score / 100, text=f"{score:.1f}%")
# Workflow decision points
if status == "complete":
if score > 80:
st.success("β
High confidence - Pipeline can be deployed automatically")
else:
st.warning("β οΈ Medium confidence - Human review recommended before deployment")
# Add human review section for pending approval status
if status == "pending_approval":
st.markdown("### π€ Human Review Required")
st.info("This pipeline requires human review before deployment")
col1, col2 = st.columns(2)
with col1:
if st.button("β
Approve Pipeline"):
# Update state to approved
session_state.conversation["status"] = "approved"
# Trigger execution to continue
st.rerun()
with col2:
if st.button("β Reject Pipeline"):
# Update state to rejected
session_state.conversation["status"] = "rejected"
st.error("Pipeline rejected. Please provide feedback to refine the pipeline.")
""")
# DB Explorer UI
db_explorer_py = "ui/db_explorer.py"
if not os.path.exists(db_explorer_py):
with open(db_explorer_py, "w") as f:
f.write("""
import streamlit as st
import pandas as pd
def render_db_explorer_tab(session_state):
\"\"\"Render the database explorer tab in the UI.\"\"\"
st.subheader("Database Explorer")
# Get tables by category
tables = session_state.db.get_tables()
# Display tables by category
col1, col2 = st.columns(2)
with col1:
st.markdown("### Raw Data Tables")
for table in tables["raw_tables"]:
with st.expander(table):
sample = session_state.db.get_table_sample(table, 3)
st.dataframe(pd.DataFrame(sample))
st.markdown("### Staging Tables")
for table in tables["staging_tables"]:
with st.expander(table):
sample = session_state.db.get_table_sample(table, 3)
st.dataframe(pd.DataFrame(sample))
with col2:
st.markdown("### Analytics Ready Data")
for table in tables["ard_tables"]:
with st.expander(table):
sample = session_state.db.get_table_sample(table, 3)
st.dataframe(pd.DataFrame(sample))
st.markdown("### Data Products")
for table in tables["data_products"]:
with st.expander(table):
sample = session_state.db.get_table_sample(table, 3)
st.dataframe(pd.DataFrame(sample))
# SQL Query Executor
st.markdown("### Query Explorer")
with st.form(key="sql_form"):
sql_query = st.text_area("Enter SQL Query", height=100,
placeholder="SELECT * FROM ARD_SALES_PERFORMANCE WHERE region = 'North' LIMIT 5")
run_sql = st.form_submit_button("Run Query")
if run_sql and sql_query:
with st.spinner("Executing query..."):
result = session_state.db.execute_query(sql_query)
if "error" in result:
st.error(f"Error executing query: {result['error']}")
elif "data" in result:
st.dataframe(pd.DataFrame(result["data"]))
st.success(f"Query returned {len(result['data'])} rows")
elif "tables" in result:
st.write(result["tables"])
elif "schema" in result:
st.write(f"Schema for {result['table']}:")
st.dataframe(pd.DataFrame(result["schema"]))
""")
# Now import UI modules after ensuring they exist
ensure_ui_files_exist()
# Import UI modules
from ui.conversation import render_conversation_tab
from ui.pipeline import render_pipeline_tab
from ui.agent_workflow import render_workflow_tab
from ui.db_explorer import render_db_explorer_tab
# Import agent state
from agents.state import AgentState
# Load environment variables from .env file if it exists
load_dotenv()
def main():
"""Main application entry point."""
# Set page configuration
st.set_page_config(page_title="Pharmaceutical Data Agent", page_icon="π", layout="wide")
api_key = os.environ.get("ANTHROPIC_API_KEY")
# Set the API key as an environment variable
os.environ["ANTHROPIC_API_KEY"] = api_key
# Initialize Anthropic client using default settings
anthropic_client = Anthropic() # Uses API key from environment variable
# Initialize database with synthetic data
if "db" not in st.session_state:
st.session_state.db = SyntheticDatabase()
# Initialize agent graph
if "agent_graph" not in st.session_state or "update_state_dict" not in st.session_state:
st.session_state.agent_graph, st.session_state.update_state_dict = create_agent_graph(
anthropic_client,
st.session_state.db
)
# Initialize conversation state
# Initialize conversation state
# Initialize conversation state
if "conversation" not in st.session_state:
st.session_state.conversation = {
"messages": [],
"user_intent": {},
"data_context": {},
"pipeline_plan": {},
"sql_queries": [],
"execution_results": {},
"confidence_scores": {},
"status": "planning",
"current_agent": "understanding_agent" # Always a string, not a list
}
# Initialize agent state for new runs
if "agent_state" not in st.session_state:
st.session_state.agent_state = AgentState(
messages=[],
user_intent={},
data_context={},
pipeline_plan={},
sql_queries=[],
execution_results={},
confidence_scores={},
status="planning",
current_agent="understanding_agent" # Always a string, not a list
)
# Main content area
st.title("Pharmaceutical Data Management Agent")
# Tabs for different views
tab1, tab2, tab3, tab4 = st.tabs(["Conversation", "Pipeline Details", "Agent Workflow", "Database Explorer"])
with tab1:
render_conversation_tab(
st.session_state,
st.session_state.agent_graph,
st.session_state.update_state_dict
)
with tab2:
render_pipeline_tab(st.session_state)
with tab3:
render_workflow_tab(st.session_state)
with tab4:
render_db_explorer_tab(st.session_state)
if __name__ == "__main__":
main() |