File size: 17,446 Bytes
a621345
 
 
f8a3c4c
 
b0e5312
f8a3c4c
 
a621345
f8a3c4c
b0e5312
 
 
a621345
 
f8a3c4c
a621345
 
f8a3c4c
b0e5312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec7ddd6
b0e5312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8dca38a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3607b03
8dca38a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0eeb228
0497d8b
8dca38a
 
 
 
 
 
 
 
 
 
 
0eeb228
8dca38a
0eeb228
 
8dca38a
 
 
 
 
 
 
 
 
 
0eeb228
8dca38a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
"""
Main application for Pharmaceutical Data Management Agent.
"""

import os
import sys
import streamlit as st
from anthropic import Anthropic
from dotenv import load_dotenv

# Add the current directory to the path to enable relative imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# Import data module
from data.synthetic_db import SyntheticDatabase

# Import graph module
from graph.workflow import create_agent_graph

# Create ui directory if it doesn't exist
os.makedirs("ui", exist_ok=True)

# Create ui files if they don't exist
def ensure_ui_files_exist():
    """Create UI module files if they don't exist."""
    # Conversation UI
    conversation_py = "ui/conversation.py"
    if not os.path.exists(conversation_py):
        with open(conversation_py, "w") as f:
            f.write("""
import streamlit as st

def render_conversation_tab(session_state, agent_graph, update_state_dict):
    \"\"\"Render the conversation tab in the UI.\"\"\"
    st.subheader("Conversation with Data Management Agent")
    
    # Display conversation history
    for message in session_state.conversation["messages"]:
        if message["role"] == "user":
            st.markdown(f"**You:** {message['content']}")
        else:
            st.markdown(f"**Agent:** {message['content']}")
    
    # Input for new message
    with st.form(key="user_input_form"):
        user_input = st.text_area("What data pipeline do you need to create?", 
                                placeholder="e.g., I need a sales performance dashboard showing regional performance by product for the last 2 years")
        submit_button = st.form_submit_button("Submit")
    
    if submit_button and user_input:
        # Add user message to conversation
        new_message = {"role": "user", "content": user_input}
        session_state.conversation["messages"].append(new_message)
        
        # Update agent state
        agent_state = session_state.agent_state.copy()
        agent_state["messages"] = agent_state["messages"] + [new_message]
        
        # Run the agent graph
        with st.spinner("Agent is processing..."):
            try:
                # Update the state dictionary for tools
                update_state_dict(agent_state)
                
                # Execute the agent workflow
                result = agent_graph.invoke(agent_state)
                
                # Update session state with result
                session_state.agent_state = result
                
                # Update the state dictionary for tools again with the result
                update_state_dict(result)
                
                # Update conversation with agent responses
                for message in result["messages"]:
                    if message not in session_state.conversation["messages"]:
                        session_state.conversation["messages"].append(message)
                
                # Update other state properties
                session_state.conversation["user_intent"] = result.get("user_intent", {})
                session_state.conversation["pipeline_plan"] = result.get("pipeline_plan", {})
                session_state.conversation["sql_queries"] = result.get("sql_queries", [])
                session_state.conversation["execution_results"] = result.get("execution_results", {})
                session_state.conversation["confidence_scores"] = result.get("confidence_scores", {})
                session_state.conversation["status"] = result.get("status", "planning")
                session_state.conversation["current_agent"] = result.get("current_agent", "understanding_agent")
                
                # Force refresh
                st.rerun()
                
            except Exception as e:
                st.error(f"Error executing agent workflow: {str(e)}")
""")

    # Pipeline UI
    pipeline_py = "ui/pipeline.py"
    if not os.path.exists(pipeline_py):
        with open(pipeline_py, "w") as f:
            f.write("""
import streamlit as st

def render_pipeline_tab(session_state):
    \"\"\"Render the pipeline details tab in the UI.\"\"\"
    st.subheader("Pipeline Details")
    
    # Intent Understanding
    st.markdown("### User Intent")
    if session_state.conversation["user_intent"]:
        st.markdown(session_state.conversation["user_intent"].get("description", "No intent captured yet"))
        if "confidence_scores" in session_state.conversation and "intent_understanding" in session_state.conversation["confidence_scores"]:
            score = session_state.conversation["confidence_scores"]["intent_understanding"] * 100
            st.progress(score / 100, text=f"Intent Understanding Confidence: {score:.1f}%")
    else:
        st.info("No user intent has been captured yet. Start a conversation to extract intent.")
    
    # Pipeline Plan
    st.markdown("### Pipeline Plan")
    if session_state.conversation["pipeline_plan"]:
        st.markdown(session_state.conversation["pipeline_plan"].get("description", "No plan created yet"))
        if "confidence_scores" in session_state.conversation and "plan_quality" in session_state.conversation["confidence_scores"]:
            score = session_state.conversation["confidence_scores"]["plan_quality"] * 100
            st.progress(score / 100, text=f"Plan Quality Confidence: {score:.1f}%")
    else:
        st.info("No pipeline plan has been created yet. Continue the conversation to develop a plan.")
    
    # SQL Queries
    st.markdown("### SQL Queries")
    if session_state.conversation["sql_queries"]:
        for i, query in enumerate(session_state.conversation["sql_queries"]):
            with st.expander(f"Query {i+1}: {query.get('name', 'Unnamed Query')}"):
                st.code(query.get("sql", ""), language="sql")
    else:
        st.info("No SQL queries have been generated yet. Continue the conversation to generate queries.")
    
    # Execution Results
    st.markdown("### Execution Results")
    if session_state.conversation["execution_results"] and "details" in session_state.conversation["execution_results"]:
        st.markdown(session_state.conversation["execution_results"].get("summary", ""))
        
        if "success_rate" in session_state.conversation["execution_results"]:
            score = session_state.conversation["execution_results"]["success_rate"] * 100
            st.progress(score / 100, text=f"Execution Success Rate: {score:.1f}%")
        
        results = session_state.conversation["execution_results"]["details"]
        for i, result in enumerate(results):
            status = "βœ…" if result["success"] else "❌"
            with st.expander(f"{status} {result.get('query_name', f'Query {i+1}')}"):
                st.markdown(f"**Result:** {result.get('result_summary', 'No summary available')}")
                st.markdown(f"**Rows Processed:** {result.get('row_count', 0)}")
    else:
        st.info("No execution results available yet. Complete the pipeline creation to see results.")
""")

    # Agent Workflow UI
    agent_workflow_py = "ui/agent_workflow.py"
    if not os.path.exists(agent_workflow_py):
        with open(agent_workflow_py, "w") as f:
            f.write("""
import streamlit as st

def render_workflow_tab(session_state):
    \"\"\"Render the agent workflow visualization tab in the UI.\"\"\"
    st.subheader("Agent Workflow Visualization")
    
    # Display current agent and status
    current_agent = session_state.conversation.get("current_agent", "understanding_agent")
    status = session_state.conversation.get("status", "planning")
    
    st.markdown(f"**Current State:** {status.title()}")
    st.markdown(f"**Current Agent:** {current_agent.replace('_', ' ').title()}")
    
    # Visualize the workflow
    col1, col2, col3, col4 = st.columns(4)
    
    # Determine which agent is active
    understanding_active = current_agent == "understanding_agent"
    planning_active = current_agent == "planning_agent"
    sql_active = current_agent == "sql_generator_agent"
    executor_active = current_agent == "executor_agent"
    
    # Show the workflow visualization
    with col1:
        if understanding_active:
            st.markdown("### πŸ” **Understanding**")
        else:
            st.markdown("### πŸ” Understanding")
        st.markdown("Extracts user intent and asks clarification questions")
        
        if "user_intent" in session_state.conversation and session_state.conversation["user_intent"]:
            st.success("Completed")
        elif understanding_active:
            st.info("In Progress")
        else:
            st.warning("Not Started")
    
    with col2:
        if planning_active:
            st.markdown("### πŸ“‹ **Planning**")
        else:
            st.markdown("### πŸ“‹ Planning")
        st.markdown("Creates data pipeline plan with sources and transformations")
        
        if "pipeline_plan" in session_state.conversation and session_state.conversation["pipeline_plan"]:
            st.success("Completed")
        elif planning_active:
            st.info("In Progress")
        elif understanding_active:
            st.warning("Not Started")
        else:
            st.success("Completed")
    
    with col3:
        if sql_active:
            st.markdown("### πŸ’» **SQL Generation**")
        else:
            st.markdown("### πŸ’» SQL Generation")
        st.markdown("Converts plan into executable SQL queries")
        
        if "sql_queries" in session_state.conversation and session_state.conversation["sql_queries"]:
            st.success("Completed")
        elif sql_active:
            st.info("In Progress")
        elif understanding_active or planning_active:
            st.warning("Not Started")
        else:
            st.success("Completed")
    
    with col4:
        if executor_active:
            st.markdown("### βš™οΈ **Execution**")
        else:
            st.markdown("### βš™οΈ Execution")
        st.markdown("Executes queries and reports results")
        
        if "execution_results" in session_state.conversation and session_state.conversation["execution_results"]:
            st.success("Completed")
        elif executor_active:
            st.info("In Progress")
        elif understanding_active or planning_active or sql_active:
            st.warning("Not Started")
        else:
            st.success("Completed")
    
    # Overall confidence score
    if "confidence_scores" in session_state.conversation and "overall" in session_state.conversation["confidence_scores"]:
        st.markdown("### Overall Pipeline Confidence")
        score = session_state.conversation["confidence_scores"]["overall"] * 100
        st.progress(score / 100, text=f"{score:.1f}%")
    
        # Workflow decision points
        if status == "complete":
            if score > 80:
                st.success("βœ… High confidence - Pipeline can be deployed automatically")
            else:
                st.warning("⚠️ Medium confidence - Human review recommended before deployment")
                
    # Add human review section for pending approval status
    if status == "pending_approval":
        st.markdown("### πŸ‘€ Human Review Required")
        st.info("This pipeline requires human review before deployment")
        
        col1, col2 = st.columns(2)
        with col1:
            if st.button("βœ… Approve Pipeline"):
                # Update state to approved
                session_state.conversation["status"] = "approved"
                # Trigger execution to continue
                st.rerun()
        with col2:
            if st.button("❌ Reject Pipeline"):
                # Update state to rejected
                session_state.conversation["status"] = "rejected"
                st.error("Pipeline rejected. Please provide feedback to refine the pipeline.")
""")

    # DB Explorer UI
    db_explorer_py = "ui/db_explorer.py"
    if not os.path.exists(db_explorer_py):
        with open(db_explorer_py, "w") as f:
            f.write("""
import streamlit as st
import pandas as pd

def render_db_explorer_tab(session_state):
    \"\"\"Render the database explorer tab in the UI.\"\"\"
    st.subheader("Database Explorer")
    
    # Get tables by category
    tables = session_state.db.get_tables()
    
    # Display tables by category
    col1, col2 = st.columns(2)
    
    with col1:
        st.markdown("### Raw Data Tables")
        for table in tables["raw_tables"]:
            with st.expander(table):
                sample = session_state.db.get_table_sample(table, 3)
                st.dataframe(pd.DataFrame(sample))
        
        st.markdown("### Staging Tables")
        for table in tables["staging_tables"]:
            with st.expander(table):
                sample = session_state.db.get_table_sample(table, 3)
                st.dataframe(pd.DataFrame(sample))
    
    with col2:
        st.markdown("### Analytics Ready Data")
        for table in tables["ard_tables"]:
            with st.expander(table):
                sample = session_state.db.get_table_sample(table, 3)
                st.dataframe(pd.DataFrame(sample))
        
        st.markdown("### Data Products")
        for table in tables["data_products"]:
            with st.expander(table):
                sample = session_state.db.get_table_sample(table, 3)
                st.dataframe(pd.DataFrame(sample))
    
    # SQL Query Executor
    st.markdown("### Query Explorer")
    with st.form(key="sql_form"):
        sql_query = st.text_area("Enter SQL Query", height=100, 
                                placeholder="SELECT * FROM ARD_SALES_PERFORMANCE WHERE region = 'North' LIMIT 5")
        run_sql = st.form_submit_button("Run Query")
    
    if run_sql and sql_query:
        with st.spinner("Executing query..."):
            result = session_state.db.execute_query(sql_query)
            
            if "error" in result:
                st.error(f"Error executing query: {result['error']}")
            elif "data" in result:
                st.dataframe(pd.DataFrame(result["data"]))
                st.success(f"Query returned {len(result['data'])} rows")
            elif "tables" in result:
                st.write(result["tables"])
            elif "schema" in result:
                st.write(f"Schema for {result['table']}:")
                st.dataframe(pd.DataFrame(result["schema"]))
""")

# Now import UI modules after ensuring they exist
ensure_ui_files_exist()

# Import UI modules
from ui.conversation import render_conversation_tab
from ui.pipeline import render_pipeline_tab
from ui.agent_workflow import render_workflow_tab
from ui.db_explorer import render_db_explorer_tab

# Import agent state
from agents.state import AgentState

# Load environment variables from .env file if it exists
load_dotenv()

def main():
    """Main application entry point."""
    # Set page configuration
    st.set_page_config(page_title="Pharmaceutical Data Agent", page_icon="πŸ’Š", layout="wide")
    
    api_key = os.environ.get("ANTHROPIC_API_KEY")
    # Set the API key as an environment variable
    os.environ["ANTHROPIC_API_KEY"] = api_key
    
    # Initialize Anthropic client using default settings
    anthropic_client = Anthropic()  # Uses API key from environment variable
    
    # Initialize database with synthetic data
    if "db" not in st.session_state:
        st.session_state.db = SyntheticDatabase()
    
    # Initialize agent graph
    if "agent_graph" not in st.session_state or "update_state_dict" not in st.session_state:
        st.session_state.agent_graph, st.session_state.update_state_dict = create_agent_graph(
            anthropic_client, 
            st.session_state.db
        )
    
    # Initialize conversation state
    # Initialize conversation state
    # Initialize conversation state
    if "conversation" not in st.session_state:
        st.session_state.conversation = {
            "messages": [],
            "user_intent": {},
            "data_context": {},
            "pipeline_plan": {},
            "sql_queries": [],
            "execution_results": {},
            "confidence_scores": {},
            "status": "planning",
            "current_agent": "understanding_agent"  # Always a string, not a list
        }

# Initialize agent state for new runs
    if "agent_state" not in st.session_state:
        st.session_state.agent_state = AgentState(
            messages=[],
            user_intent={},
            data_context={},
            pipeline_plan={},
            sql_queries=[],
            execution_results={},
            confidence_scores={},
            status="planning",
            current_agent="understanding_agent"  # Always a string, not a list
        )
    
    # Main content area
    st.title("Pharmaceutical Data Management Agent")
    
    # Tabs for different views
    tab1, tab2, tab3, tab4 = st.tabs(["Conversation", "Pipeline Details", "Agent Workflow", "Database Explorer"])
    
    with tab1:
        render_conversation_tab(
            st.session_state, 
            st.session_state.agent_graph,
            st.session_state.update_state_dict
        )
    
    with tab2:
        render_pipeline_tab(st.session_state)
    
    with tab3:
        render_workflow_tab(st.session_state)
    
    with tab4:
        render_db_explorer_tab(st.session_state)

if __name__ == "__main__":
    main()