Rajan Sharma commited on
Commit
64e8c0c
·
verified ·
1 Parent(s): a365d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -31
app.py CHANGED
@@ -37,24 +37,30 @@ def _sanitize_text(s: str) -> str:
37
  if not isinstance(s, str): return s
38
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
39
 
 
40
  def _create_enhanced_prompt(user_scenario: str, file_context: str) -> str:
41
- """Uses an LLM to pre-process the user's prompt and adds critical data context."""
 
 
 
42
  prompt_for_planner = f"""
43
- You are an expert data analysis project manager. Your task is to create a clear, structured brief for a data analysis AI based on the user's scenario and the provided data context.
 
 
 
44
 
45
  --- DATA CONTEXT ---
46
  {file_context}
47
- The dataframes are available in a list, indexed as df1, df2, and so on, in the order they are listed above. Your primary task is to use these dataframes to answer the user's questions. Do not use hypothetical data.
48
 
49
  --- USER'S SCENARIO ---
50
  {user_scenario}
51
 
52
  --- YOUR TASK ---
53
- Based on BOTH the user's scenario and the data context, extract the following:
54
- 1. Primary Objective: A one-sentence summary of the user's main goal.
55
- 2. Key Tasks: A numbered list of ALL specific questions the user wants answered using the provided data.
56
- 3. Required Output Format: A description of how the user wants the final answer structured.
57
- CRITICAL INSTRUCTION: Tell the data analyst that it MUST answer ALL of the key tasks before providing its final answer.
58
  """
59
  structured_brief = cohere_chat(prompt_for_planner)
60
  return structured_brief if structured_brief else user_scenario
@@ -68,7 +74,7 @@ def ping_cohere() -> str:
68
  cli = _co_client()
69
  if not cli: return "Cohere client not initialized. Is COHERE_API_KEY set?"
70
  vecs = cohere_embed(["hello", "world"])
71
- return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)" if vecs else "Cohere reachable, but embeddings returned no vectors."
72
  except Exception as e:
73
  return f"Cohere ping failed: {e}"
74
 
@@ -92,41 +98,41 @@ def handle(user_msg: str, files: list) -> str:
92
  dataframes.append(df)
93
  file_names.append(os.path.basename(p))
94
  except UnicodeDecodeError:
95
- print(f"Warning: Failed to read {os.path.basename(p)} with UTF-8. Falling back to latin1 encoding.")
96
  df = pd.read_csv(p, encoding='latin1')
97
  dataframes.append(df)
98
  file_names.append(os.path.basename(p))
99
 
100
  if not dataframes: return "Please upload at least one CSV file."
101
 
102
- file_context_string = "The user has provided the following data files for your analysis: " + ", ".join(file_names)
103
  llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
104
  enhanced_prompt = _create_enhanced_prompt(safe_in, file_context_string)
105
 
106
- # --- THE FINAL, STRICTEST AGENT PREFIX ---
107
  AGENT_PREFIX = """
108
- Your job is to act as a data analyst. You have access to pandas dataframes (df1, df2, etc.).
109
- You MUST follow these rules. This is not a suggestion.
110
-
111
- 1. Your response MUST be in one of two formats. NEVER mix them.
112
- 2. To run code, use this exact format:
113
- Thought: Your reasoning for the code you are about to run.
114
- Action: python_repl_ast
115
- Action Input: The single line of python code to run.
116
-
117
- 3. To give the final answer, use this exact format:
118
- Thought: I have finished all the work and have the final answer.
119
- Final Answer: The complete, final answer to the user's question.
120
-
121
- NEVER, EVER, provide a "Final Answer" and an "Action" in the same response. This is a fatal error.
122
- Begin now. Analyze the user's request and provide your first "Thought" and "Action".
 
123
  """
124
 
125
  agent = create_pandas_dataframe_agent(
126
  llm, dataframes, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
127
  verbose=True, allow_dangerous_code=True, prefix=AGENT_PREFIX, max_iterations=50,
128
- # handle_parsing_errors is now less critical but a good safety net
129
- handle_parsing_errors=True
130
  )
131
  result = agent.invoke({"input": enhanced_prompt})
132
  return _sanitize_text(result.get("output", "No output generated."))
@@ -146,7 +152,7 @@ TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
146
  # ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
147
  with gr.Blocks(theme="soft", css="style.css") as demo:
148
  assessment_history = gr.State([])
149
- # ... (The rest of the UI code is identical to the last version) ...
150
  with gr.Group(visible=False) as privacy_modal:
151
  with gr.Blocks():
152
  gr.Markdown(PRIVACY_POLICY_TEXT)
@@ -188,7 +194,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
188
  return
189
 
190
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
191
- thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Analyzing... Please wait. This may take a minute.\n```")
192
  yield thinking_message, history_state_list, gr.update()
193
  ai_response_text = handle(prompt, files)
194
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
 
37
  if not isinstance(s, str): return s
38
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
39
 
40
+ # --- THE FINAL FIX (PART 1): The "Senior Analyst" AI ---
41
  def _create_enhanced_prompt(user_scenario: str, file_context: str) -> str:
42
+ """
43
+ Uses an LLM to act as a "Senior Analyst", breaking the complex user
44
+ scenario into a clear, step-by-step plan for the agent.
45
+ """
46
  prompt_for_planner = f"""
47
+ You are a Senior Data Analyst. Your job is to create a clear, step-by-step execution plan for a Junior AI Data Analyst.
48
+ The user has provided a complex scenario and a list of data files. The Junior Analyst gets confused by long prompts and can get stuck in loops.
49
+
50
+ Your plan must be simple, clear, and sequential.
51
 
52
  --- DATA CONTEXT ---
53
  {file_context}
54
+ The Junior Analyst has access to these files in a list of pandas dataframes (df1, df2, etc.), in the order listed above.
55
 
56
  --- USER'S SCENARIO ---
57
  {user_scenario}
58
 
59
  --- YOUR TASK ---
60
+ Create a "Step-by-Step Execution Plan" for the Junior Analyst. Tell it exactly what to do, one task at a time, referencing the correct dataframe (df1, df2, etc.).
61
+ Instruct it to perform all data preparation first, then the analysis, then the recommendations.
62
+ Tell it that it MUST complete ALL steps in the plan before providing the final report.
63
+ This plan will be given to the Junior Analyst. Make it easy to follow.
 
64
  """
65
  structured_brief = cohere_chat(prompt_for_planner)
66
  return structured_brief if structured_brief else user_scenario
 
74
  cli = _co_client()
75
  if not cli: return "Cohere client not initialized. Is COHERE_API_KEY set?"
76
  vecs = cohere_embed(["hello", "world"])
77
+ return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)" if vecs else "Cohere reachable."
78
  except Exception as e:
79
  return f"Cohere ping failed: {e}"
80
 
 
98
  dataframes.append(df)
99
  file_names.append(os.path.basename(p))
100
  except UnicodeDecodeError:
101
+ print(f"Warning: Reading {os.path.basename(p)} with fallback latin1 encoding.")
102
  df = pd.read_csv(p, encoding='latin1')
103
  dataframes.append(df)
104
  file_names.append(os.path.basename(p))
105
 
106
  if not dataframes: return "Please upload at least one CSV file."
107
 
108
+ file_context_string = "The user has provided the following data files: " + ", ".join(file_names)
109
  llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
110
  enhanced_prompt = _create_enhanced_prompt(safe_in, file_context_string)
111
 
112
+ # --- THE FINAL FIX (PART 2): Stricter Agent with Error Handling Rule ---
113
  AGENT_PREFIX = """
114
+ You are a Junior AI Data Analyst. Your job is to execute the step-by-step plan provided by your Senior Analyst using Python and pandas.
115
+ You have access to dataframes named df1, df2, etc.
116
+
117
+ You MUST follow these rules:
118
+
119
+ 1. **EXECUTE THE PLAN:** Follow the execution plan exactly, one step at a time.
120
+ 2. **FORMATTING:** Your response MUST be in one of two formats. NEVER mix them.
121
+ * **To run code:**
122
+ Thought: Your reasoning for the code you are about to run to complete the current step.
123
+ Action: python_repl_ast
124
+ Action Input: The single line of python code to run.
125
+ * **To give the final answer:**
126
+ Thought: I have finished all steps in the plan and can now provide the final report.
127
+ Final Answer: The complete, final answer, formatted as a concise report.
128
+
129
+ 3. **ERROR HANDLING:** If your code produces an error, DO NOT try the same code again. Analyze the error message and try a DIFFERENT approach to solve the step. If you are stuck, say so.
130
  """
131
 
132
  agent = create_pandas_dataframe_agent(
133
  llm, dataframes, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
134
  verbose=True, allow_dangerous_code=True, prefix=AGENT_PREFIX, max_iterations=50,
135
+ handle_parsing_errors=True
 
136
  )
137
  result = agent.invoke({"input": enhanced_prompt})
138
  return _sanitize_text(result.get("output", "No output generated."))
 
152
  # ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
153
  with gr.Blocks(theme="soft", css="style.css") as demo:
154
  assessment_history = gr.State([])
155
+ # ... (The rest of the UI code is identical to the last working version) ...
156
  with gr.Group(visible=False) as privacy_modal:
157
  with gr.Blocks():
158
  gr.Markdown(PRIVACY_POLICY_TEXT)
 
194
  return
195
 
196
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
197
+ thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Formulating execution plan... Please wait.\n```")
198
  yield thinking_message, history_state_list, gr.update()
199
  ai_response_text = handle(prompt, files)
200
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)