Rajan Sharma commited on
Commit
b500d63
·
verified ·
1 Parent(s): 30b978e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -112
app.py CHANGED
@@ -15,7 +15,6 @@ from langchain_cohere import ChatCohere
15
  from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
16
 
17
  # --- LOCAL MODULE IMPORTS ---
18
- # (Assuming these files exist in your project)
19
  from settings import (
20
  HEALTHCARE_SETTINGS, GENERAL_CONVERSATION_PROMPT, USE_SCENARIO_ENGINE, DEBUG_PLAN,
21
  COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
@@ -24,26 +23,30 @@ from audit_log import log_event
24
  from privacy import safety_filter, refusal_reply
25
  from llm_router import cohere_chat, _co_client, cohere_embed
26
 
27
- # --- BACKEND UTILITY FUNCTIONS ---
 
 
 
 
 
 
 
 
28
 
29
  def _sanitize_text(s: str) -> str:
30
- if not isinstance(s, str):
31
- return s
32
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
33
 
34
  def _create_enhanced_prompt(user_scenario: str) -> str:
35
- """Uses an LLM to pre-process the user's messy prompt into a structured brief."""
36
  prompt_for_planner = f"""
37
  You are an expert data analysis project manager. Your task is to read the user's unstructured scenario below and create a clear, structured brief for a data analysis AI.
38
-
39
- From the user's text, extract the following:
40
- 1. **Primary Objective:** A one-sentence summary of the user's main goal.
41
- 2. **Key Tasks:** A numbered list of ALL the specific questions the user wants answered.
42
- 3. **Expert Guidelines & Assumptions:** A bulleted list of any specific numbers, metrics, or calculation methods mentioned.
43
- 4. **Required Output Format:** A description of how the user wants the final answer structured.
44
-
45
  CRITICAL INSTRUCTION: Tell the data analyst that it MUST answer ALL of the key tasks before providing its final answer.
46
-
47
  --- USER'S SCENARIO ---
48
  {user_scenario}
49
  """
@@ -57,126 +60,96 @@ def ping_cohere() -> str:
57
  """Lightweight health check against Cohere."""
58
  try:
59
  cli = _co_client()
60
- if not cli:
61
- return "Cohere client not initialized. Is COHERE_API_KEY set?"
62
  vecs = cohere_embed(["hello", "world"])
63
- if vecs and len(vecs) == 2:
64
- return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)"
65
- return "Cohere reachable, but embeddings returned no vectors."
66
  except Exception as e:
67
  return f"Cohere ping failed: {e}"
68
 
69
  # --- THE CORE ANALYSIS ENGINE ---
70
 
71
  def handle(user_msg: str, files: list) -> str:
72
- """
73
- This is the powerful backend engine. It takes the user's query and files
74
- and returns only the final AI-generated text response.
75
- """
76
  try:
77
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
78
- if blocked_in:
79
- return refusal_reply(reason_in)
80
 
81
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
82
 
83
  if file_paths:
84
  dataframes = [pd.read_csv(p) for p in file_paths if p.endswith('.csv')]
85
- if not dataframes:
86
- return "Please upload at least one CSV file."
87
 
88
  llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
89
  enhanced_prompt = _create_enhanced_prompt(safe_in)
90
 
91
- AGENT_PREFIX = """
92
- You are a data analysis agent. You have access to one or more pandas dataframes.
93
- You MUST respond in one of two formats.
94
-
95
- FORMAT 1: To perform a task. Your response must be a single block of text with ONLY these three sections:
96
- Thought: Your step-by-step reasoning.
97
- Action: python_repl_ast
98
- Action Input: The Python code to run.
99
-
100
- FORMAT 2: To give the final answer. Your response must be a single block of text with ONLY these two sections:
101
- Thought: I have now answered all the user's questions and can provide the final report.
102
- Final Answer: The complete answer, structured as the user requested.
103
-
104
- CRITICAL RULE: NEVER combine `Action` and `Final Answer` in the same response. Choose one format.
105
- """
106
 
107
  agent = create_pandas_dataframe_agent(
108
- llm,
109
- dataframes,
110
- agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
111
- verbose=True,
112
- allow_dangerous_code=True,
113
- prefix=AGENT_PREFIX,
114
- max_iterations=50
115
  )
116
-
117
  result = agent.invoke({"input": enhanced_prompt})
118
- reply = _sanitize_text(result.get("output", "No output generated."))
119
- return reply
120
  else:
121
- # General conversation mode if no files are uploaded
122
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
123
- reply = cohere_chat(prompt) or "How can I help further?"
124
- return _sanitize_text(reply)
125
 
126
  except Exception as e:
127
  tb = traceback.format_exc()
128
  log_event("app_error", None, {"err": str(e), "tb": tb})
129
  return f"A critical error occurred: {e}"
130
 
131
- # ---------------- THE NEW PROFESSIONAL UI ----------------
 
 
 
 
132
  with gr.Blocks(theme="soft", css="style.css") as demo:
133
- # State to store the history of all assessments in this session
134
  assessment_history = gr.State([])
135
 
136
- gr.Markdown("# ClarityOps Augmented Decision Tool")
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  with gr.Row(variant="panel"):
139
  # --- LEFT COLUMN: CONTROLS ---
140
  with gr.Column(scale=1):
141
  gr.Markdown("## New Assessment")
142
- files_input = gr.Files(
143
- label="Upload Data Files (CSV recommended)",
144
- file_count="multiple",
145
- type="filepath",
146
- file_types=[".csv"]
147
- )
148
- prompt_input = gr.Textbox(
149
- label="Prompt",
150
- placeholder="Paste your scenario, tasks, and any specific instructions here.",
151
- lines=15
152
- )
153
  with gr.Row():
154
  send_btn = gr.Button("▶️ Run Analysis", variant="primary", scale=2)
155
  clear_btn = gr.Button("🗑️ Clear")
156
-
157
  ping_btn = gr.Button("Ping Cohere")
158
  ping_out = gr.Markdown()
159
 
160
  # --- RIGHT COLUMN: RESULTS & HISTORY ---
161
  with gr.Column(scale=2):
162
  with gr.Tabs():
163
- # --- TAB 1: CURRENT ASSESSMENT ---
164
  with gr.TabItem("Current Assessment", id=0):
165
- chat_history_output = gr.Chatbot(
166
- label="Analysis Output",
167
- bubble_full_width=True,
168
- height=600
169
- )
170
- # --- TAB 2: ASSESSMENT HISTORY ---
171
  with gr.TabItem("Assessment History", id=1):
172
  gr.Markdown("## Review Past Assessments")
173
- history_dropdown = gr.Dropdown(
174
- label="Select an assessment to review",
175
- choices=[]
176
- )
177
- history_display = gr.Markdown(
178
- label="Selected Assessment Details"
179
- )
 
 
180
 
181
  # --- UI LOGIC ---
182
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
@@ -184,26 +157,15 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
184
  gr.Warning("Please provide both a prompt and at least one data file.")
185
  return chat_history_list, history_state_list, gr.update()
186
 
187
- # 1. Append the user's message to the chat
188
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
189
-
190
- # 2. Call the powerful backend engine to get the AI response
191
  ai_response_text = handle(prompt, files)
192
-
193
- # 3. Append the AI's response to the chat
194
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
195
 
196
- # 4. Save the completed assessment to our history state
197
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
198
- file_names = [os.path.basename(f) for f in files]
199
 
200
- new_assessment = {
201
- "id": timestamp, "prompt": prompt, "files": file_names,
202
- "response": ai_response_text
203
- }
204
  updated_history = history_state_list + [new_assessment]
205
-
206
- # 5. Create user-friendly labels for the history dropdown
207
  history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
208
 
209
  return final_chat, updated_history, gr.update(choices=history_labels)
@@ -212,20 +174,9 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
212
  if not selection or not history_state_list: return ""
213
  selected_id = selection.split(" - ")[0]
214
  selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
215
-
216
  if selected_assessment:
217
  file_list_md = "\n- ".join(selected_assessment['files'])
218
- return f"""
219
- ### Assessment from: {selected_assessment['id']}
220
- **Files Used:**
221
- - {file_list_md}
222
- ---
223
- **Original Prompt:**
224
- > {selected_assessment['prompt']}
225
- ---
226
- **AI Generated Response:**
227
- {selected_assessment['response']}
228
- """
229
  return "Could not find the selected assessment."
230
 
231
  # Wire up the components
@@ -234,18 +185,21 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
234
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
235
  outputs=[chat_history_output, assessment_history, history_dropdown]
236
  )
237
-
238
  history_dropdown.change(
239
  view_history,
240
  inputs=[history_dropdown, assessment_history],
241
  outputs=[history_display]
242
  )
243
-
244
  clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
245
- ping_btn.click(lambda: ping_cohere(), outputs=[ping_out])
 
 
 
 
 
 
246
 
247
  if __name__ == "__main__":
248
  if not os.getenv("COHERE_API_KEY"):
249
  print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
250
-
251
  demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
 
15
  from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
16
 
17
  # --- LOCAL MODULE IMPORTS ---
 
18
  from settings import (
19
  HEALTHCARE_SETTINGS, GENERAL_CONVERSATION_PROMPT, USE_SCENARIO_ENGINE, DEBUG_PLAN,
20
  COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
 
23
  from privacy import safety_filter, refusal_reply
24
  from llm_router import cohere_chat, _co_client, cohere_embed
25
 
26
+ # --- UTILITY FUNCTIONS ---
27
+
28
+ def load_markdown_text(filepath: str) -> str:
29
+ """Safely loads text content from a markdown file."""
30
+ try:
31
+ with open(filepath, 'r', encoding='utf-8') as f:
32
+ return f.read()
33
+ except FileNotFoundError:
34
+ return f"**Error:** The document `{os.path.basename(filepath)}` was not found. Please ensure it is in the same directory as the application."
35
 
36
  def _sanitize_text(s: str) -> str:
37
+ if not isinstance(s, str): return s
 
38
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
39
 
40
  def _create_enhanced_prompt(user_scenario: str) -> str:
41
+ """Uses an LLM to pre-process the user's prompt into a structured brief."""
42
  prompt_for_planner = f"""
43
  You are an expert data analysis project manager. Your task is to read the user's unstructured scenario below and create a clear, structured brief for a data analysis AI.
44
+ From the user's text, extract:
45
+ 1. Primary Objective: A one-sentence summary of the user's main goal.
46
+ 2. Key Tasks: A numbered list of ALL the specific questions the user wants answered.
47
+ 3. Expert Guidelines & Assumptions: A bulleted list of any specific numbers, metrics, or calculation methods mentioned.
48
+ 4. Required Output Format: A description of how the user wants the final answer structured.
 
 
49
  CRITICAL INSTRUCTION: Tell the data analyst that it MUST answer ALL of the key tasks before providing its final answer.
 
50
  --- USER'S SCENARIO ---
51
  {user_scenario}
52
  """
 
60
  """Lightweight health check against Cohere."""
61
  try:
62
  cli = _co_client()
63
+ if not cli: return "Cohere client not initialized. Is COHERE_API_KEY set?"
 
64
  vecs = cohere_embed(["hello", "world"])
65
+ return f"Cohere OK (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)" if vecs else "Cohere reachable, but embeddings returned no vectors."
 
 
66
  except Exception as e:
67
  return f"Cohere ping failed: {e}"
68
 
69
  # --- THE CORE ANALYSIS ENGINE ---
70
 
71
  def handle(user_msg: str, files: list) -> str:
72
+ """This is the powerful backend engine."""
 
 
 
73
  try:
74
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
75
+ if blocked_in: return refusal_reply(reason_in)
 
76
 
77
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
78
 
79
  if file_paths:
80
  dataframes = [pd.read_csv(p) for p in file_paths if p.endswith('.csv')]
81
+ if not dataframes: return "Please upload at least one CSV file."
 
82
 
83
  llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
84
  enhanced_prompt = _create_enhanced_prompt(safe_in)
85
 
86
+ AGENT_PREFIX = """...""" # Your perfected agent prefix remains here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  agent = create_pandas_dataframe_agent(
89
+ llm, dataframes, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
90
+ verbose=True, allow_dangerous_code=True, prefix=AGENT_PREFIX, max_iterations=50
 
 
 
 
 
91
  )
 
92
  result = agent.invoke({"input": enhanced_prompt})
93
+ return _sanitize_text(result.get("output", "No output generated."))
 
94
  else:
 
95
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
96
+ return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
 
97
 
98
  except Exception as e:
99
  tb = traceback.format_exc()
100
  log_event("app_error", None, {"err": str(e), "tb": tb})
101
  return f"A critical error occurred: {e}"
102
 
103
+ # --- PRE-LOAD LEGAL DOCUMENTS ---
104
+ PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
105
+ TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
106
+
107
+ # ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
108
  with gr.Blocks(theme="soft", css="style.css") as demo:
 
109
  assessment_history = gr.State([])
110
 
111
+ # --- MODALS (POPUPS) DEFINED FIRST, INITIALLY HIDDEN ---
112
+ with gr.Group(visible=False) as privacy_modal:
113
+ with gr.Blocks():
114
+ gr.Markdown(PRIVACY_POLICY_TEXT)
115
+ close_privacy_btn = gr.Button("Close")
116
+
117
+ with gr.Group(visible=False) as terms_modal:
118
+ with gr.Blocks():
119
+ gr.Markdown(TERMS_OF_SERVICE_TEXT)
120
+ close_terms_btn = gr.Button("Close")
121
+
122
+ # --- MAIN UI LAYOUT ---
123
+ gr.Markdown("# Universal AI Data Analyst")
124
 
125
  with gr.Row(variant="panel"):
126
  # --- LEFT COLUMN: CONTROLS ---
127
  with gr.Column(scale=1):
128
  gr.Markdown("## New Assessment")
129
+ files_input = gr.Files(label="Upload Data Files (.csv)", file_count="multiple", type="filepath", file_types=[".csv"])
130
+ prompt_input = gr.Textbox(label="Prompt", placeholder="Paste your scenario here.", lines=15)
 
 
 
 
 
 
 
 
 
131
  with gr.Row():
132
  send_btn = gr.Button("▶️ Run Analysis", variant="primary", scale=2)
133
  clear_btn = gr.Button("🗑️ Clear")
 
134
  ping_btn = gr.Button("Ping Cohere")
135
  ping_out = gr.Markdown()
136
 
137
  # --- RIGHT COLUMN: RESULTS & HISTORY ---
138
  with gr.Column(scale=2):
139
  with gr.Tabs():
 
140
  with gr.TabItem("Current Assessment", id=0):
141
+ chat_history_output = gr.Chatbot(label="Analysis Output", bubble_full_width=True, height=600)
 
 
 
 
 
142
  with gr.TabItem("Assessment History", id=1):
143
  gr.Markdown("## Review Past Assessments")
144
+ history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
145
+ history_display = gr.Markdown(label="Selected Assessment Details")
146
+
147
+ # --- FOOTER FOR LEGAL LINKS ---
148
+ with gr.Row():
149
+ gr.Markdown("---")
150
+ with gr.Row():
151
+ privacy_link = gr.Button("Privacy Policy", variant="link")
152
+ terms_link = gr.Button("Terms of Service", variant="link")
153
 
154
  # --- UI LOGIC ---
155
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
 
157
  gr.Warning("Please provide both a prompt and at least one data file.")
158
  return chat_history_list, history_state_list, gr.update()
159
 
 
160
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
 
 
161
  ai_response_text = handle(prompt, files)
 
 
162
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
163
 
 
164
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
165
+ file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
166
 
167
+ new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
 
 
 
168
  updated_history = history_state_list + [new_assessment]
 
 
169
  history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
170
 
171
  return final_chat, updated_history, gr.update(choices=history_labels)
 
174
  if not selection or not history_state_list: return ""
175
  selected_id = selection.split(" - ")[0]
176
  selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
 
177
  if selected_assessment:
178
  file_list_md = "\n- ".join(selected_assessment['files'])
179
+ return f"""### Assessment from: {selected_assessment['id']}\n**Files Used:**\n- {file_list_md}\n---\n**Original Prompt:**\n> {selected_assessment['prompt']}\n---\n**AI Generated Response:**\n{selected_assessment['response']}"""
 
 
 
 
 
 
 
 
 
 
180
  return "Could not find the selected assessment."
181
 
182
  # Wire up the components
 
185
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
186
  outputs=[chat_history_output, assessment_history, history_dropdown]
187
  )
 
188
  history_dropdown.change(
189
  view_history,
190
  inputs=[history_dropdown, assessment_history],
191
  outputs=[history_display]
192
  )
 
193
  clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
194
+ ping_btn.click(ping_cohere, outputs=[ping_out])
195
+
196
+ # Wire up the modal popups
197
+ privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
198
+ close_privacy_btn.click(lambda: gr.update(visible=False), outputs=[privacy_modal])
199
+ terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
200
+ close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
201
 
202
  if __name__ == "__main__":
203
  if not os.getenv("COHERE_API_KEY"):
204
  print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
 
205
  demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))