Rajan Sharma commited on
Commit
56f8933
·
verified ·
1 Parent(s): 64e8c0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -186
app.py CHANGED
@@ -10,9 +10,8 @@ import pandas as pd
10
  from datetime import datetime
11
 
12
  # --- BACKEND IMPORTS ---
13
- from langchain.agents.agent_types import AgentType
14
  from langchain_cohere import ChatCohere
15
- from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
16
 
17
  # --- LOCAL MODULE IMPORTS ---
18
  from settings import (
@@ -37,201 +36,204 @@ def _sanitize_text(s: str) -> str:
37
  if not isinstance(s, str): return s
38
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
39
 
40
- # --- THE FINAL FIX (PART 1): The "Senior Analyst" AI ---
41
- def _create_enhanced_prompt(user_scenario: str, file_context: str) -> str:
42
- """
43
- Uses an LLM to act as a "Senior Analyst", breaking the complex user
44
- scenario into a clear, step-by-step plan for the agent.
45
- """
46
- prompt_for_planner = f"""
47
- You are a Senior Data Analyst. Your job is to create a clear, step-by-step execution plan for a Junior AI Data Analyst.
48
- The user has provided a complex scenario and a list of data files. The Junior Analyst gets confused by long prompts and can get stuck in loops.
49
 
50
- Your plan must be simple, clear, and sequential.
 
 
 
51
 
52
- --- DATA CONTEXT ---
53
- {file_context}
54
- The Junior Analyst has access to these files in a list of pandas dataframes (df1, df2, etc.), in the order listed above.
 
 
 
 
55
 
56
  --- USER'S SCENARIO ---
57
  {user_scenario}
58
 
59
- --- YOUR TASK ---
60
- Create a "Step-by-Step Execution Plan" for the Junior Analyst. Tell it exactly what to do, one task at a time, referencing the correct dataframe (df1, df2, etc.).
61
- Instruct it to perform all data preparation first, then the analysis, then the recommendations.
62
- Tell it that it MUST complete ALL steps in the plan before providing the final report.
63
- This plan will be given to the Junior Analyst. Make it easy to follow.
64
- """
65
- structured_brief = cohere_chat(prompt_for_planner)
66
- return structured_brief if structured_brief else user_scenario
67
-
68
- def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
69
- return (history_messages or []) + [{"role": role, "content": content}]
70
 
71
- def ping_cohere() -> str:
72
- """Lightweight health check against Cohere."""
73
  try:
74
- cli = _co_client()
75
- if not cli: return "Cohere client not initialized. Is COHERE_API_KEY set?"
76
- vecs = cohere_embed(["hello", "world"])
77
- return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)" if vecs else "Cohere reachable."
78
  except Exception as e:
79
- return f"Cohere ping failed: {e}"
80
-
81
- # --- THE CORE ANALYSIS ENGINE ---
82
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  def handle(user_msg: str, files: list) -> str:
84
- """This is the powerful backend engine."""
85
- try:
86
- safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
87
- if blocked_in: return refusal_reply(reason_in)
88
-
89
- file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
90
-
91
- if file_paths:
92
- dataframes = []
93
- file_names = []
94
- for p in file_paths:
95
- if p.endswith('.csv'):
96
- try:
97
- df = pd.read_csv(p)
98
- dataframes.append(df)
99
- file_names.append(os.path.basename(p))
100
- except UnicodeDecodeError:
101
- print(f"Warning: Reading {os.path.basename(p)} with fallback latin1 encoding.")
102
- df = pd.read_csv(p, encoding='latin1')
103
- dataframes.append(df)
104
- file_names.append(os.path.basename(p))
105
-
106
- if not dataframes: return "Please upload at least one CSV file."
107
-
108
- file_context_string = "The user has provided the following data files: " + ", ".join(file_names)
109
- llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
110
- enhanced_prompt = _create_enhanced_prompt(safe_in, file_context_string)
111
-
112
- # --- THE FINAL FIX (PART 2): Stricter Agent with Error Handling Rule ---
113
- AGENT_PREFIX = """
114
- You are a Junior AI Data Analyst. Your job is to execute the step-by-step plan provided by your Senior Analyst using Python and pandas.
115
- You have access to dataframes named df1, df2, etc.
116
-
117
- You MUST follow these rules:
118
-
119
- 1. **EXECUTE THE PLAN:** Follow the execution plan exactly, one step at a time.
120
- 2. **FORMATTING:** Your response MUST be in one of two formats. NEVER mix them.
121
- * **To run code:**
122
- Thought: Your reasoning for the code you are about to run to complete the current step.
123
- Action: python_repl_ast
124
- Action Input: The single line of python code to run.
125
- * **To give the final answer:**
126
- Thought: I have finished all steps in the plan and can now provide the final report.
127
- Final Answer: The complete, final answer, formatted as a concise report.
128
-
129
- 3. **ERROR HANDLING:** If your code produces an error, DO NOT try the same code again. Analyze the error message and try a DIFFERENT approach to solve the step. If you are stuck, say so.
130
  """
131
-
132
- agent = create_pandas_dataframe_agent(
133
- llm, dataframes, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
134
- verbose=True, allow_dangerous_code=True, prefix=AGENT_PREFIX, max_iterations=50,
135
- handle_parsing_errors=True
136
- )
137
- result = agent.invoke({"input": enhanced_prompt})
138
- return _sanitize_text(result.get("output", "No output generated."))
139
- else:
140
- prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
141
- return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
142
-
143
- except Exception as e:
144
- tb = traceback.format_exc()
145
- log_event("app_error", None, {"err": str(e), "tb": tb})
146
- return f"A critical error occurred: {e}"
147
-
148
- # --- PRE-LOAD LEGAL DOCUMENTS ---
149
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
150
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
151
-
152
- # ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
153
  with gr.Blocks(theme="soft", css="style.css") as demo:
154
- assessment_history = gr.State([])
155
- # ... (The rest of the UI code is identical to the last working version) ...
156
- with gr.Group(visible=False) as privacy_modal:
157
- with gr.Blocks():
158
- gr.Markdown(PRIVACY_POLICY_TEXT)
159
- close_privacy_btn = gr.Button("Close")
160
-
161
- with gr.Group(visible=False) as terms_modal:
162
- with gr.Blocks():
163
- gr.Markdown(TERMS_OF_SERVICE_TEXT)
164
- close_terms_btn = gr.Button("Close")
165
-
166
- gr.Markdown("# Universal AI Data Analyst")
167
- with gr.Row(variant="panel"):
168
- with gr.Column(scale=1):
169
- gr.Markdown("## New Assessment")
170
- files_input = gr.Files(label="Upload Data Files (.csv)", file_count="multiple", type="filepath", file_types=[".csv"])
171
- prompt_input = gr.Textbox(label="Prompt", placeholder="Paste your scenario here.", lines=15)
172
- with gr.Row():
173
- send_btn = gr.Button("▶️ Run Analysis", variant="primary", scale=2)
174
- clear_btn = gr.Button("🗑️ Clear")
175
- ping_btn = gr.Button("Ping Cohere")
176
- ping_out = gr.Markdown()
177
- with gr.Column(scale=2):
178
- with gr.Tabs():
179
- with gr.TabItem("Current Assessment", id=0):
180
- chat_history_output = gr.Chatbot(label="Analysis Output", type="messages", height=600)
181
- with gr.TabItem("Assessment History", id=1):
182
- gr.Markdown("## Review Past Assessments")
183
- history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
184
- history_display = gr.Markdown(label="Selected Assessment Details")
185
- with gr.Row(): gr.Markdown("---")
186
- with gr.Row():
187
- privacy_link = gr.Button("Privacy Policy", variant="link")
188
- terms_link = gr.Button("Terms of Service", variant="link")
189
-
190
- def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
191
- if not prompt or not files:
192
- gr.Warning("Please provide both a prompt and at least one data file.")
193
- yield chat_history_list, history_state_list, gr.update()
194
- return
195
-
196
- chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
197
- thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Formulating execution plan... Please wait.\n```")
198
- yield thinking_message, history_state_list, gr.update()
199
- ai_response_text = handle(prompt, files)
200
- final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
201
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
202
- file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
203
- new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
204
- updated_history = history_state_list + [new_assessment]
205
- history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
206
- yield final_chat, updated_history, gr.update(choices=history_labels)
207
-
208
- def view_history(selection, history_state_list):
209
- if not selection or not history_state_list: return ""
210
- selected_id = selection.split(" - ")[0]
211
- selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
212
- if selected_assessment:
213
- file_list_md = "\n- ".join(selected_assessment['files'])
214
- return f"""### Assessment from: {selected_assessment['id']}\n**Files Used:**\n- {file_list_md}\n---\n**Original Prompt:**\n> {selected_assessment['prompt']}\n---\n**AI Generated Response:**\n{selected_assessment['response']}"""
215
- return "Could not find the selected assessment."
216
-
217
- send_btn.click(
218
- run_analysis_wrapper,
219
- inputs=[prompt_input, files_input, chat_history_output, assessment_history],
220
- outputs=[chat_history_output, assessment_history, history_dropdown]
221
- )
222
- history_dropdown.change(
223
- view_history,
224
- inputs=[history_dropdown, assessment_history],
225
- outputs=[history_display]
226
- )
227
- clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
228
- ping_btn.click(ping_cohere, outputs=[ping_out])
229
- privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
230
- close_privacy_btn.click(lambda: gr.update(visible=False), outputs=[privacy_modal])
231
- terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
232
- close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
233
-
234
- if __name__ == "__main__":
235
- if not os.getenv("COHERE_API_KEY"):
236
- print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
237
- demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
 
 
 
10
  from datetime import datetime
11
 
12
  # --- BACKEND IMPORTS ---
 
13
  from langchain_cohere import ChatCohere
14
+ from langchain_community.utilities.python import PythonREPL
15
 
16
  # --- LOCAL MODULE IMPORTS ---
17
  from settings import (
 
36
  if not isinstance(s, str): return s
37
  return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
38
 
39
+ def _create_python_script(user_scenario: str, schema_context: str) -> str:
40
+ """Uses an LLM to act as an "AI Coder", writing a complete Python script."""
41
+ prompt_for_coder = f"""
42
+ You are an expert Python data scientist. Your sole job is to write a single, complete, and executable Python script to answer the user's request.
43
+ You have access to a list of pandas dataframes loaded into a variable named `dfs`. The first dataframe is `dfs[0]`, the second is `dfs[1]`, and so on.
 
 
 
 
44
 
45
+ CRITICAL CONTEXT: Before writing any code, you MUST first understand the data you have been given. Here is the schema for each dataframe:
46
+ --- DATA SCHEMA ---
47
+ {schema_context}
48
+ --- END SCHEMA ---
49
 
50
+ Based on the user's scenario below, write a single Python script that performs the entire analysis.
51
+
52
+ RULES FOR YOUR SCRIPT:
53
+ 1. **Use the DataFrames:** Your script MUST use the `dfs` list to access the data.
54
+ 2. **Print Your Findings:** Use the `print()` function at each step of your analysis to output the results. The final output of your script should be the complete, formatted report.
55
+ 3. **No Placeholders:** Do not use placeholder data. Your code must perform the real calculations.
56
+ 4. **Self-Contained:** The script must be entirely self-contained.
57
 
58
  --- USER'S SCENARIO ---
59
  {user_scenario}
60
 
61
+ --- PYTHON SCRIPT ---
62
+ ```python
63
+ import pandas as pd
 
 
 
 
 
 
 
 
64
 
65
+ def analyze_data(dfs):
 
66
  try:
67
+ # Your generated Python code will go here.
68
+ pass
 
 
69
  except Exception as e:
70
+ print(f"An error occurred during analysis: {{e}}")
71
+ Now, write the complete Python script inside the try block.
72
+ """
73
+ generated_text = cohere_chat(prompt_for_coder)
74
+ match = re2.search(r"python\n(.*?)", generated_text, re2.DOTALL)
75
+ if match:
76
+ script_content = match.group(1).strip()
77
+ script_content = script_content.replace("def analyze_data(dfs):", "", 1)
78
+ script_content = "\n".join([line for line in script_content.split('\n') if "pass" not in line])
79
+ return script_content.strip()
80
+ else:
81
+ return "print('Error: The AI failed to generate a valid Python script.')"
82
+ def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
83
+ return (history_messages or []) + [{"role": role, "content": content}]
84
+ def ping_cohere() -> str:
85
+ """Lightweight health check against Cohere."""
86
+ try:
87
+ cli = _co_client()
88
+ if not cli: return "Cohere client not initialized. Is COHERE_API_KEY set?"
89
+ vecs = cohere_embed(["hello", "world"])
90
+ return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)" if vecs else "Cohere reachable."
91
+ except Exception as e:
92
+ return f"Cohere ping failed: {e}"
93
+ --- THE CORE ANALYSIS ENGINE ---
94
  def handle(user_msg: str, files: list) -> str:
95
+ """This is the powerful backend engine using the "Coder" pattern."""
96
+ try:
97
+ safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
98
+ if blocked_in: return refusal_reply(reason_in)
99
+ code
100
+ Code
101
+ file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
102
+
103
+ if file_paths:
104
+ dataframes = []
105
+ schema_parts = []
106
+ for i, p in enumerate(file_paths):
107
+ if p.endswith('.csv'):
108
+ try:
109
+ df = pd.read_csv(p)
110
+ dataframes.append(df)
111
+ schema_parts.append(f"DataFrame `dfs[{i}]` (from file `{os.path.basename(p)}`):\n{df.head().to_markdown()}\n")
112
+ except UnicodeDecodeError:
113
+ print(f"Warning: Reading {os.path.basename(p)} with fallback latin1 encoding.")
114
+ df = pd.read_csv(p, encoding='latin1')
115
+ dataframes.append(df)
116
+ schema_parts.append(f"DataFrame `dfs[{i}]` (from file `{os.path.basename(p)}`):\n{df.head().to_markdown()}\n")
117
+
118
+ if not dataframes: return "Please upload at least one CSV file."
119
+
120
+ schema_context = "\n".join(schema_parts)
121
+ analysis_script_logic = _create_python_script(safe_in, schema_context)
122
+
123
+ python_repl = PythonREPL()
124
+ full_script_to_run = f"""
125
+ import pandas as pd
126
+ def analyze_data(dfs):
127
+ try:
128
+ {analysis_script_logic}
129
+ except Exception as e:
130
+ print(f"An error occurred during analysis: {{e}}")
131
+ analyze_data(dfs)
 
 
 
 
 
 
 
 
 
132
  """
133
+ local_vars = {"dfs": dataframes}
134
+ try:
135
+ # --- THE FINAL FIX IS HERE ---
136
+ res = python_repl.run(command=full_script_to_run, locals=local_vars)
137
+ return _sanitize_text(res)
138
+ except Exception as e:
139
+ return f"An error occurred while executing the AI-generated script: {e}"
140
+ else:
141
+ prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
142
+ return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
143
+ code
144
+ Code
145
+ except Exception as e:
146
+ tb = traceback.format_exc()
147
+ log_event("app_error", None, {"err": str(e), "tb": tb})
148
+ return f"A critical error occurred: {e}"
149
+ --- PRE-LOAD LEGAL DOCUMENTS ---
 
150
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
151
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
152
+ ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
 
153
  with gr.Blocks(theme="soft", css="style.css") as demo:
154
+ assessment_history = gr.State([])
155
+ code
156
+ Code
157
+ with gr.Group(visible=False) as privacy_modal:
158
+ with gr.Blocks():
159
+ gr.Markdown(PRIVACY_POLICY_TEXT)
160
+ close_privacy_btn = gr.Button("Close")
161
+
162
+ with gr.Group(visible=False) as terms_modal:
163
+ with gr.Blocks():
164
+ gr.Markdown(TERMS_OF_SERVICE_TEXT)
165
+ close_terms_btn = gr.Button("Close")
166
+
167
+ gr.Markdown("# Universal AI Data Analyst")
168
+ with gr.Row(variant="panel"):
169
+ with gr.Column(scale=1):
170
+ gr.Markdown("## New Assessment")
171
+ files_input = gr.Files(label="Upload Data Files (.csv)", file_count="multiple", type="filepath", file_types=[".csv"])
172
+ prompt_input = gr.Textbox(label="Prompt", placeholder="Paste your scenario here.", lines=15)
173
+ with gr.Row():
174
+ send_btn = gr.Button("▶️ Run Analysis", variant="primary", scale=2)
175
+ clear_btn = gr.Button("🗑️ Clear")
176
+ ping_btn = gr.Button("Ping Cohere")
177
+ ping_out = gr.Markdown()
178
+ with gr.Column(scale=2):
179
+ with gr.Tabs():
180
+ with gr.TabItem("Current Assessment", id=0):
181
+ chat_history_output = gr.Chatbot(label="Analysis Output", type="messages", height=600)
182
+ with gr.TabItem("Assessment History", id=1):
183
+ gr.Markdown("## Review Past Assessments")
184
+ history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
185
+ history_display = gr.Markdown(label="Selected Assessment Details")
186
+ with gr.Row(): gr.Markdown("---")
187
+ with gr.Row():
188
+ privacy_link = gr.Button("Privacy Policy", variant="link")
189
+ terms_link = gr.Button("Terms of Service", variant="link")
190
+
191
+ def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
192
+ if not prompt or not files:
193
+ gr.Warning("Please provide both a prompt and at least one data file.")
194
+ yield chat_history_list, history_state_list, gr.update()
195
+ return
196
+
197
+ chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
198
+ thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Generating analysis script... This may take a moment.\n```")
199
+ yield thinking_message, history_state_list, gr.update()
200
+
201
+ ai_response_text = handle(prompt, files)
202
+
203
+ final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
204
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
205
+ file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
206
+ new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
207
+ updated_history = history_state_list + [new_assessment]
208
+ history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
209
+ yield final_chat, updated_history, gr.update(choices=history_labels)
210
+
211
+ def view_history(selection, history_state_list):
212
+ if not selection or not history_state_list: return ""
213
+ selected_id = selection.split(" - ")[0]
214
+ selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
215
+ if selected_assessment:
216
+ file_list_md = "\n- ".join(selected_assessment['files'])
217
+ return f"""### Assessment from: {selected_assessment['id']}\n**Files Used:**\n- {file_list_md}\n---\n**Original Prompt:**\n> {selected_assessment['prompt']}\n---\n**AI Generated Response:**\n{selected_assessment['response']}"""
218
+ return "Could not find the selected assessment."
219
+
220
+ send_btn.click(
221
+ run_analysis_wrapper,
222
+ inputs=[prompt_input, files_input, chat_history_output, assessment_history],
223
+ outputs=[chat_history_output, assessment_history, history_dropdown]
224
+ )
225
+ history_dropdown.change(
226
+ view_history,
227
+ inputs=[history_dropdown, assessment_history],
228
+ outputs=[history_display]
229
+ )
230
+ clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
231
+ ping_btn.click(ping_cohere, outputs=[ping_out])
232
+ privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
233
+ close_privacy_btn.click(lambda: gr.update(visible=False), outputs=[privacy_modal])
234
+ terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
235
+ close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
236
+ if name == "main":
237
+ if not os.getenv("COHERE_API_KEY"):
238
+ print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
239
+ demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))