Rajan Sharma commited on
Commit
5be3717
·
verified ·
1 Parent(s): 7f73547

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -30
app.py CHANGED
@@ -37,18 +37,25 @@ def _sanitize_text(s: str) -> str:
37
  if not isinstance(s, str): return s
38
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
39
 
40
- def _create_enhanced_prompt(user_scenario: str) -> str:
41
- """Uses an LLM to pre-process the user's prompt into a structured brief."""
 
42
  prompt_for_planner = f"""
43
- You are an expert data analysis project manager. Your task is to read the user's unstructured scenario below and create a clear, structured brief for a data analysis AI.
44
- From the user's text, extract:
45
- 1. Primary Objective: A one-sentence summary of the user's main goal.
46
- 2. Key Tasks: A numbered list of ALL the specific questions the user wants answered.
47
- 3. Expert Guidelines & Assumptions: A bulleted list of any specific numbers, metrics, or calculation methods mentioned.
48
- 4. Required Output Format: A description of how the user wants the final answer structured.
49
- CRITICAL INSTRUCTION: Tell the data analyst that it MUST answer ALL of the key tasks before providing its final answer.
50
  --- USER'S SCENARIO ---
51
  {user_scenario}
 
 
 
 
 
 
 
52
  """
53
  structured_brief = cohere_chat(prompt_for_planner)
54
  return structured_brief if structured_brief else user_scenario
@@ -78,35 +85,41 @@ def handle(user_msg: str, files: list) -> str:
78
 
79
  if file_paths:
80
  dataframes = []
 
81
  for p in file_paths:
82
  if p.endswith('.csv'):
83
  try:
84
  df = pd.read_csv(p)
85
  dataframes.append(df)
 
86
  except UnicodeDecodeError:
87
  print(f"Warning: Failed to read {os.path.basename(p)} with UTF-8. Falling back to latin1 encoding.")
88
  df = pd.read_csv(p, encoding='latin1')
89
  dataframes.append(df)
 
90
 
91
  if not dataframes: return "Please upload at least one CSV file."
92
 
 
 
 
93
  llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
94
- enhanced_prompt = _create_enhanced_prompt(safe_in)
95
 
96
  AGENT_PREFIX = """
97
- You are a data analysis agent. You have access to one or more pandas dataframes.
98
  You MUST respond in one of two formats.
99
 
100
- FORMAT 1: To perform a task. Your response must be a single block of text with ONLY these three sections:
101
- Thought: Your step-by-step reasoning.
102
  Action: python_repl_ast
103
- Action Input: The Python code to run.
104
 
105
- FORMAT 2: To give the final answer. Your response must be a single block of text with ONLY these two sections:
106
- Thought: I have now answered all the user's questions and can provide the final report.
107
  Final Answer: The complete answer, structured as the user requested.
108
 
109
- CRITICAL RULE: NEVER combine `Action` and `Final Answer` in the same response. Choose one format.
110
  """
111
 
112
  agent = create_pandas_dataframe_agent(
@@ -131,7 +144,8 @@ TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
131
  # ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
132
  with gr.Blocks(theme="soft", css="style.css") as demo:
133
  assessment_history = gr.State([])
134
-
 
135
  # --- MODALS (POPUPS) DEFINED FIRST, INITIALLY HIDDEN ---
136
  with gr.Group(visible=False) as privacy_modal:
137
  with gr.Blocks():
@@ -162,7 +176,11 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
162
  with gr.Column(scale=2):
163
  with gr.Tabs():
164
  with gr.TabItem("Current Assessment", id=0):
165
- chat_history_output = gr.Chatbot(label="Analysis Output", type="messages", height=600)
 
 
 
 
166
  with gr.TabItem("Assessment History", id=1):
167
  gr.Markdown("## Review Past Assessments")
168
  history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
@@ -175,37 +193,28 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
175
  terms_link = gr.Button("Terms of Service", variant="link")
176
 
177
  # --- UI LOGIC ---
178
-
179
- # THIS IS THE NEW, RESPONSIVE "RUN" FUNCTION
180
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
181
  if not prompt or not files:
182
  gr.Warning("Please provide both a prompt and at least one data file.")
183
- # We must yield the original state to prevent an error on empty run
184
  yield chat_history_list, history_state_list, gr.update()
185
- return # This stops the generator
186
 
187
- # 1. Immediately show the user's message and a "Thinking..." status
188
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
189
  thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Analyzing... Please wait. This may take a minute.\n```")
190
- yield thinking_message, history_state_list, gr.update() # This provides immediate feedback
191
 
192
- # 2. Call the powerful (and slow) backend engine
193
  ai_response_text = handle(prompt, files)
194
 
195
- # 3. Replace "Thinking..." with the final AI response
196
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
197
 
198
- # 4. Save the completed assessment to our history state
199
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
200
  file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
201
 
202
  new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
203
  updated_history = history_state_list + [new_assessment]
204
 
205
- # 5. Create user-friendly labels for the history dropdown
206
  history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
207
 
208
- # 6. Yield the final, complete state to the UI
209
  yield final_chat, updated_history, gr.update(choices=history_labels)
210
 
211
  def view_history(selection, history_state_list):
@@ -237,6 +246,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
237
  terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
238
  close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
239
 
 
240
  if __name__ == "__main__":
241
  if not os.getenv("COHERE_API_KEY"):
242
  print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
 
37
  if not isinstance(s, str): return s
38
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
39
 
40
+ # THIS FUNCTION IS NOW UPGRADED
41
+ def _create_enhanced_prompt(user_scenario: str, file_context: str) -> str:
42
+ """Uses an LLM to pre-process the user's prompt and adds critical data context."""
43
  prompt_for_planner = f"""
44
+ You are an expert data analysis project manager. Your task is to create a clear, structured brief for a data analysis AI based on the user's scenario and the provided data context.
45
+
46
+ --- DATA CONTEXT ---
47
+ {file_context}
48
+ The dataframes are available in a list, indexed as df1, df2, and so on, in the order they are listed above. Your primary task is to use these dataframes to answer the user's questions. Do not use hypothetical data.
49
+
 
50
  --- USER'S SCENARIO ---
51
  {user_scenario}
52
+
53
+ --- YOUR TASK ---
54
+ Based on BOTH the user's scenario and the data context, extract the following:
55
+ 1. Primary Objective: A one-sentence summary of the user's main goal.
56
+ 2. Key Tasks: A numbered list of ALL specific questions the user wants answered using the provided data.
57
+ 3. Required Output Format: A description of how the user wants the final answer structured.
58
+ CRITICAL INSTRUCTION: Tell the data analyst that it MUST answer ALL of the key tasks before providing its final answer.
59
  """
60
  structured_brief = cohere_chat(prompt_for_planner)
61
  return structured_brief if structured_brief else user_scenario
 
85
 
86
  if file_paths:
87
  dataframes = []
88
+ file_names = []
89
  for p in file_paths:
90
  if p.endswith('.csv'):
91
  try:
92
  df = pd.read_csv(p)
93
  dataframes.append(df)
94
+ file_names.append(os.path.basename(p))
95
  except UnicodeDecodeError:
96
  print(f"Warning: Failed to read {os.path.basename(p)} with UTF-8. Falling back to latin1 encoding.")
97
  df = pd.read_csv(p, encoding='latin1')
98
  dataframes.append(df)
99
+ file_names.append(os.path.basename(p))
100
 
101
  if not dataframes: return "Please upload at least one CSV file."
102
 
103
+ # Create the crucial file context string
104
+ file_context_string = "The user has provided the following data files for your analysis: " + ", ".join(file_names)
105
+
106
  llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
107
+ enhanced_prompt = _create_enhanced_prompt(safe_in, file_context_string)
108
 
109
  AGENT_PREFIX = """
110
+ You are a data analysis agent. You have access to one or more pandas dataframes. Your task is to use the provided dataframes to answer the user's questions.
111
  You MUST respond in one of two formats.
112
 
113
+ FORMAT 1: To perform a task.
114
+ Thought: Your step-by-step reasoning for using the data.
115
  Action: python_repl_ast
116
+ Action Input: The Python code to run on the dataframes (df1, df2, etc.).
117
 
118
+ FORMAT 2: To give the final answer.
119
+ Thought: I have now completed all the tasks and can provide the final report based on the real data.
120
  Final Answer: The complete answer, structured as the user requested.
121
 
122
+ CRITICAL RULE: NEVER use hypothetical data. ALWAYS use the provided dataframes to generate your results.
123
  """
124
 
125
  agent = create_pandas_dataframe_agent(
 
144
  # ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
145
  with gr.Blocks(theme="soft", css="style.css") as demo:
146
  assessment_history = gr.State([])
147
+ # ... (The rest of the UI code is identical to the last version) ...
148
+ # ... (For brevity, I will omit it, but you should use the full UI code from the previous step)
149
  # --- MODALS (POPUPS) DEFINED FIRST, INITIALLY HIDDEN ---
150
  with gr.Group(visible=False) as privacy_modal:
151
  with gr.Blocks():
 
176
  with gr.Column(scale=2):
177
  with gr.Tabs():
178
  with gr.TabItem("Current Assessment", id=0):
179
+ chat_history_output = gr.Chatbot(
180
+ label="Analysis Output",
181
+ type="messages",
182
+ height=600
183
+ )
184
  with gr.TabItem("Assessment History", id=1):
185
  gr.Markdown("## Review Past Assessments")
186
  history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
 
193
  terms_link = gr.Button("Terms of Service", variant="link")
194
 
195
  # --- UI LOGIC ---
 
 
196
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
197
  if not prompt or not files:
198
  gr.Warning("Please provide both a prompt and at least one data file.")
 
199
  yield chat_history_list, history_state_list, gr.update()
200
+ return
201
 
 
202
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
203
  thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Analyzing... Please wait. This may take a minute.\n```")
204
+ yield thinking_message, history_state_list, gr.update()
205
 
 
206
  ai_response_text = handle(prompt, files)
207
 
 
208
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
209
 
 
210
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
211
  file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
212
 
213
  new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
214
  updated_history = history_state_list + [new_assessment]
215
 
 
216
  history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
217
 
 
218
  yield final_chat, updated_history, gr.update(choices=history_labels)
219
 
220
  def view_history(selection, history_state_list):
 
246
  terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
247
  close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
248
 
249
+
250
  if __name__ == "__main__":
251
  if not os.getenv("COHERE_API_KEY"):
252
  print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")