Pulastya B commited on
Commit
4d07a53
Β·
1 Parent(s): 03b24f8

fix: Clarify report intent and fix parameter issues

Browse files

- Add 'DATA PROFILING REPORT' intent category
- 'detailed report' keyword now triggers ydata_profiling_report
- Fix 'None' string being passed as target_col -> convert to None
- Add plotly_dashboard to report detection in frontend
- Dashboard and ydata reports now show view buttons
- Fixes: LLM choosing visualization instead of profiling report

FRRONTEEEND/components/ChatInterface.tsx CHANGED
@@ -141,7 +141,7 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
141
 
142
  // Extract report paths from workflow history
143
  if (result.workflow_history) {
144
- const reportTools = ['generate_ydata_profiling_report'];
145
  result.workflow_history.forEach((step: any) => {
146
  if (reportTools.includes(step.tool)) {
147
  // Check multiple possible locations for the report path
 
141
 
142
  // Extract report paths from workflow history
143
  if (result.workflow_history) {
144
+ const reportTools = ['generate_ydata_profiling_report', 'generate_plotly_dashboard', 'generate_all_plots'];
145
  result.workflow_history.forEach((step: any) => {
146
  if (reportTools.includes(step.tool)) {
147
  // Check multiple possible locations for the report path
src/orchestrator.py CHANGED
@@ -397,14 +397,24 @@ When you need to use a tool, respond with a JSON block like this:
397
  2. STOP - DO NOT clean data, encode, or train models!
398
  - **Example**: "Generate interactive plots for Magnitude and latitude" β†’ generate_interactive_scatter β†’ DONE βœ“
399
 
400
- **C. DATA ANALYSIS WITH ML** - Full workflow with model training:
 
 
 
 
 
 
 
 
 
 
401
  - Real dataset file path provided (CSV, Excel, etc. - NOT "dummy")
402
  - Keywords: "train model", "predict", "classify", "build model", "forecast"
403
  - User wants: cleaning + feature engineering + model training
404
  - **ACTION**: Run full ML workflow (steps 1-15 below)
405
  - **Example**: "Train a model to predict earthquake magnitude" β†’ Full pipeline
406
 
407
- **D. UNCLEAR/AMBIGUOUS REQUESTS** - Intent is not obvious:
408
  - User says: "analyze", "look at", "check", "review" (without specifics)
409
  - Could mean: visualization only OR full ML OR just exploration
410
  - **ACTION**: ASK USER to clarify BEFORE starting work
@@ -413,7 +423,7 @@ When you need to use a tool, respond with a JSON block like this:
413
  - "Do you need model training or just want to explore the data visually?"
414
  - **DO NOT ASSUME** - Always ask when unclear!
415
 
416
- **E. SIMPLE QUESTIONS** - User asks for explanation/advice:
417
  - Keywords: "what is", "how to", "explain", "recommend"
418
  - **ACTION**: Answer directly, no tools needed
419
 
@@ -838,6 +848,11 @@ You are a DOER. Complete workflows based on user intent."""
838
  # Convert directory to full file path
839
  arguments["output_path"] = f"{output_dir}/ydata_profile.html"
840
 
 
 
 
 
 
841
  result = tool_func(**arguments)
842
 
843
  # Check if tool itself returned an error (some tools return dict with 'status': 'error')
 
397
  2. STOP - DO NOT clean data, encode, or train models!
398
  - **Example**: "Generate interactive plots for Magnitude and latitude" β†’ generate_interactive_scatter β†’ DONE βœ“
399
 
400
+ **C. DATA PROFILING REPORT** - User wants comprehensive data analysis report:
401
+ - Keywords: "detailed report", "comprehensive report", "data report", "profiling report", "full analysis"
402
+ - **NO specific visualization mentioned** (no "plot", "chart", "graph")
403
+ - Real dataset provided
404
+ - **ACTION**: Use generate_ydata_profiling_report tool
405
+ - **Workflow**:
406
+ 1. generate_ydata_profiling_report(file_path)
407
+ 2. STOP - This generates a complete HTML report with all stats, correlations, distributions
408
+ - **Example**: "Generate a detailed report for this" β†’ generate_ydata_profiling_report β†’ DONE βœ“
409
+
410
+ **D. DATA ANALYSIS WITH ML** - Full workflow with model training:
411
  - Real dataset file path provided (CSV, Excel, etc. - NOT "dummy")
412
  - Keywords: "train model", "predict", "classify", "build model", "forecast"
413
  - User wants: cleaning + feature engineering + model training
414
  - **ACTION**: Run full ML workflow (steps 1-15 below)
415
  - **Example**: "Train a model to predict earthquake magnitude" β†’ Full pipeline
416
 
417
+ **E. UNCLEAR/AMBIGUOUS REQUESTS** - Intent is not obvious:
418
  - User says: "analyze", "look at", "check", "review" (without specifics)
419
  - Could mean: visualization only OR full ML OR just exploration
420
  - **ACTION**: ASK USER to clarify BEFORE starting work
 
423
  - "Do you need model training or just want to explore the data visually?"
424
  - **DO NOT ASSUME** - Always ask when unclear!
425
 
426
+ **F. SIMPLE QUESTIONS** - User asks for explanation/advice:
427
  - Keywords: "what is", "how to", "explain", "recommend"
428
  - **ACTION**: Answer directly, no tools needed
429
 
 
848
  # Convert directory to full file path
849
  arguments["output_path"] = f"{output_dir}/ydata_profile.html"
850
 
851
+ # Fix "None" string being passed as actual None
852
+ for key, value in list(arguments.items()):
853
+ if isinstance(value, str) and value.lower() in ["none", "null", "undefined"]:
854
+ arguments[key] = None
855
+
856
  result = tool_func(**arguments)
857
 
858
  # Check if tool itself returned an error (some tools return dict with 'status': 'error')