Rajan Sharma commited on
Commit
c90a683
·
verified ·
1 Parent(s): 2b74cfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -59
app.py CHANGED
@@ -34,55 +34,43 @@ from narrative_safetynet import build_narrative
34
  def _sanitize_text(s: str) -> str:
35
  if not isinstance(s, str):
36
  return s
37
- # remove non-printing/control chars except newlines & tabs
38
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
39
 
40
-
41
- def _dataset_catalog(results: Dict[str, Any]) -> Dict[str, List[str]]:
42
- """Simple catalog of dataset columns for the planner prompt; dynamic & scenario-agnostic."""
43
- cat: Dict[str, List[str]] = {}
44
- for k, v in results.items():
45
- if isinstance(v, pd.DataFrame):
46
- cat[k] = v.columns.tolist()
47
- return cat
48
-
49
-
50
- def is_healthcare_scenario(text: str, has_files: bool) -> bool:
51
  """
52
- Dynamic detection: require uploaded files AND either structured scenario sections
53
- or healthcare keywords (configured in settings).
54
  """
55
- t = (text or "").lower()
56
- kws = HEALTHCARE_SETTINGS["healthcare_keywords"]
57
- structured = any(s in t for s in ["background", "situation", "tasks", "deliverables"])
58
- return has_files and (structured or any(k in t for k in kws))
59
 
 
 
 
 
 
60
 
61
- def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
62
- return (history_messages or []) + [{"role": role, "content": content}]
63
-
64
-
65
- def ping_cohere() -> str:
66
- """Lightweight health check against Cohere (embeddings call)."""
67
- try:
68
- cli = _co_client()
69
- if not cli:
70
- return "Cohere client not initialized. Is COHERE_API_KEY set?"
71
- vecs = cohere_embed(["hello", "world"])
72
- if vecs and len(vecs) == 2:
73
- return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)"
74
- return "Cohere reachable, but embeddings returned no vectors."
75
- except Exception as e:
76
- return f"Cohere ping failed: {e}"
77
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  # ---------------- Core handler ----------------
80
  def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -> Tuple[List[Dict[str, str]], str]:
81
  """
82
- One entrypoint for both healthcare scenarios and general conversation.
83
- - NEW: If files are uploaded, a data-aware agent is used to perform analysis.
84
- - Scenario mode (no files): planner -> deterministic executor -> LLM narrative (Cohere).
85
- - General mode: direct to Cohere with a light system prompt.
86
  """
87
  try:
88
  # Safety filter for user input
@@ -95,10 +83,9 @@ def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -
95
 
96
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
97
 
98
- # --- NEW LOGIC: Activate data agent if files are uploaded ---
99
  if file_paths:
100
  try:
101
- # --- FINAL UPGRADE: Load ALL uploaded CSVs into a list of DataFrames ---
102
  dataframes = [pd.read_csv(p) for p in file_paths if p.endswith('.csv')]
103
  if not dataframes:
104
  return _append_msg(history_messages, "assistant", "Please upload at least one CSV file."), ""
@@ -106,6 +93,10 @@ def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -
106
  # Initialize the Cohere Chat LLM for the agent
107
  llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
108
 
 
 
 
 
109
  AGENT_PREFIX = """
110
  You are a data analysis agent. You have access to one or more pandas dataframes.
111
  You MUST respond in one of two formats.
@@ -116,17 +107,17 @@ Action: python_repl_ast
116
  Action Input: The Python code to run.
117
 
118
  FORMAT 2: To give the final answer. Your response must be a single block of text with ONLY these two sections:
119
- Thought: I can now answer the user's query.
120
- Final Answer: The complete answer.
121
 
122
  CRITICAL RULE: NEVER combine `Action` and `Final Answer` in the same response. Choose one format.
123
- Begin by analyzing the user's query and provide your first thought and action using FORMAT 1.
124
  """
125
 
126
- # Create the pandas DataFrame agent, now giving it the LIST of dataframes
127
  agent = create_pandas_dataframe_agent(
128
  llm,
129
- dataframes, # <-- PASSING THE LIST OF DATAFRAMES
130
  agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
131
  verbose=True,
132
  allow_dangerous_code=True,
@@ -134,24 +125,15 @@ Begin by analyzing the user's query and provide your first thought and action us
134
  prefix=AGENT_PREFIX
135
  )
136
 
137
- # Run the agent with the user's scenario text.
138
- reply = agent.run(safe_in)
139
  reply = _sanitize_text(reply)
140
 
141
  except Exception as e:
142
  tb = traceback.format_exc()
143
  log_event("agent_error", None, {"err": str(e), "tb": tb})
144
  reply = f"An error occurred while analyzing the data: {e}"
145
-
146
- # --- ORIGINAL LOGIC: Fallback for scenarios without files or general chat ---
147
- elif is_healthcare_scenario(safe_in, bool(file_paths)) and USE_SCENARIO_ENGINE:
148
- # This block remains for scenarios without data files
149
- registry = DataRegistry()
150
- rag = RAGIndex()
151
- # ... (rest of the original logic)
152
-
153
  else:
154
- # General conversation mode
155
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
156
  reply = cohere_chat(prompt) or open_fallback_chat(prompt) or "How can I help further?"
157
  reply = _sanitize_text(reply)
@@ -171,7 +153,7 @@ Begin by analyzing the user's query and provide your first thought and action us
171
 
172
  # ---------------- UI ----------------
173
  with gr.Blocks(analytics_enabled=False) as demo:
174
- gr.Markdown("## Canadian Healthcare AI Cohere API • Scenario-Agnostic • Deterministic Analytics")
175
 
176
  with gr.Row():
177
  chat = gr.Chatbot(label="Chat History", type="messages", height=520)
@@ -179,10 +161,10 @@ with gr.Blocks(analytics_enabled=False) as demo:
179
  label="Upload Data Files (CSV recommended)",
180
  file_count="multiple",
181
  type="filepath",
182
- file_types=HEALTHCARE_SETTINGS["supported_file_types"]
183
  )
184
 
185
- msg = gr.Textbox(label="Prompt", placeholder="Paste any scenario (Background / Situation / Tasks / Deliverables) or just chat.")
186
  with gr.Row():
187
  send = gr.Button("Send")
188
  clear = gr.Button("Clear")
 
34
  def _sanitize_text(s: str) -> str:
35
  if not isinstance(s, str):
36
  return s
 
37
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
38
 
39
+ # --- NEW: The "Intake Analyst" AI ---
40
+ def _create_enhanced_prompt(user_scenario: str) -> str:
 
 
 
 
 
 
 
 
 
41
  """
42
+ Uses an LLM to pre-process the user's messy prompt into a structured brief
43
+ for the data analysis agent.
44
  """
45
+ # This prompt instructs the first LLM to act as a project manager.
46
+ prompt_for_planner = f"""
47
+ You are an expert data analysis project manager. Your task is to read the user's unstructured scenario below and create a clear, structured brief for a data analysis AI.
 
48
 
49
+ From the user's text, extract the following:
50
+ 1. **Primary Objective:** A one-sentence summary of the user's main goal.
51
+ 2. **Key Tasks:** A numbered list of the specific questions the user wants answered.
52
+ 3. **Expert Guidelines & Assumptions:** A bulleted list of EVERY specific number, metric, calculation method, or assumption mentioned in the text. This is critical for high-quality analysis.
53
+ 4. **Required Output Format:** A description of how the user wants the final answer to be structured.
54
 
55
+ Present this as a clean brief. Then, include the user's original text at the end.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ --- USER'S SCENARIO ---
58
+ {user_scenario}
59
+ """
60
+
61
+ # Use the existing cohere_chat function to get the structured brief
62
+ structured_brief = cohere_chat(prompt_for_planner)
63
+
64
+ # If the LLM call fails, just use the original message
65
+ if not structured_brief:
66
+ return user_scenario
67
+
68
+ return structured_brief
69
 
70
  # ---------------- Core handler ----------------
71
  def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -> Tuple[List[Dict[str, str]], str]:
72
  """
73
+ Core logic handler with the new two-step AI process.
 
 
 
74
  """
75
  try:
76
  # Safety filter for user input
 
83
 
84
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
85
 
 
86
  if file_paths:
87
  try:
88
+ # Load ALL uploaded CSVs into a list of DataFrames
89
  dataframes = [pd.read_csv(p) for p in file_paths if p.endswith('.csv')]
90
  if not dataframes:
91
  return _append_msg(history_messages, "assistant", "Please upload at least one CSV file."), ""
 
93
  # Initialize the Cohere Chat LLM for the agent
94
  llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
95
 
96
+ # STEP 1: The "Intake Analyst" AI creates a structured brief.
97
+ enhanced_prompt = _create_enhanced_prompt(safe_in)
98
+
99
+ # This UNIVERSAL prefix contains only behavioral rules.
100
  AGENT_PREFIX = """
101
  You are a data analysis agent. You have access to one or more pandas dataframes.
102
  You MUST respond in one of two formats.
 
107
  Action Input: The Python code to run.
108
 
109
  FORMAT 2: To give the final answer. Your response must be a single block of text with ONLY these two sections:
110
+ Thought: I can now answer the user's query based on the analysis.
111
+ Final Answer: The complete answer, structured as the user requested.
112
 
113
  CRITICAL RULE: NEVER combine `Action` and `Final Answer` in the same response. Choose one format.
114
+ Begin by analyzing the structured brief provided.
115
  """
116
 
117
+ # STEP 2: The "Data Scientist" AI (Agent) executes the clean brief.
118
  agent = create_pandas_dataframe_agent(
119
  llm,
120
+ dataframes,
121
  agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
122
  verbose=True,
123
  allow_dangerous_code=True,
 
125
  prefix=AGENT_PREFIX
126
  )
127
 
128
+ reply = agent.run(enhanced_prompt)
 
129
  reply = _sanitize_text(reply)
130
 
131
  except Exception as e:
132
  tb = traceback.format_exc()
133
  log_event("agent_error", None, {"err": str(e), "tb": tb})
134
  reply = f"An error occurred while analyzing the data: {e}"
 
 
 
 
 
 
 
 
135
  else:
136
+ # Fallback to general conversation if no files are uploaded
137
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
138
  reply = cohere_chat(prompt) or open_fallback_chat(prompt) or "How can I help further?"
139
  reply = _sanitize_text(reply)
 
153
 
154
  # ---------------- UI ----------------
155
  with gr.Blocks(analytics_enabled=False) as demo:
156
+ gr.Markdown("## Universal AI Data Analyst")
157
 
158
  with gr.Row():
159
  chat = gr.Chatbot(label="Chat History", type="messages", height=520)
 
161
  label="Upload Data Files (CSV recommended)",
162
  file_count="multiple",
163
  type="filepath",
164
+ file_types=[".csv"]
165
  )
166
 
167
+ msg = gr.Textbox(label="Prompt", placeholder="Paste your scenario, tasks, and any specific instructions here.")
168
  with gr.Row():
169
  send = gr.Button("Send")
170
  clear = gr.Button("Clear")