Pulastya B commited on
Commit
6badf55
Β·
1 Parent(s): 4eacfaa

Fix visibility of changes: strengthen file path prohibition, prepend metrics to summary, add progress polling

Browse files
FRRONTEEEND/components/ChatInterface.tsx CHANGED
@@ -74,8 +74,35 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
74
  updateSession(activeSessionId, newMessages);
75
  setInput('');
76
  setIsTyping(true);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  try {
 
 
 
79
  // Use the current origin if running on same server, otherwise use env variable
80
  const API_URL = window.location.origin;
81
  console.log('API URL:', API_URL);
@@ -137,7 +164,10 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
137
 
138
  const data = await response.json();
139
 
140
- // Clear progress indicator
 
 
 
141
  setCurrentStep('');
142
 
143
  let assistantContent = '';
@@ -234,6 +264,12 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
234
  } catch (error: any) {
235
  console.error("Chat Error:", error);
236
 
 
 
 
 
 
 
237
  let errorMessage = "I'm sorry, I encountered an error processing your request.";
238
 
239
  if (error.message) {
@@ -260,6 +296,11 @@ export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
260
  timestamp: new Date()
261
  }]);
262
  } finally {
 
 
 
 
 
263
  setIsTyping(false);
264
  }
265
  };
 
74
  updateSession(activeSessionId, newMessages);
75
  setInput('');
76
  setIsTyping(true);
77
+
78
+ // Start polling for progress updates
79
+ const sessionKey = activeSessionId || 'default';
80
+ let progressInterval: NodeJS.Timeout | null = null;
81
+
82
+ const pollProgress = async () => {
83
+ try {
84
+ const API_URL = window.location.origin;
85
+ const progressResponse = await fetch(`${API_URL}/api/progress/${sessionKey}`);
86
+ if (progressResponse.ok) {
87
+ const progressData = await progressResponse.json();
88
+ const steps = progressData.steps || [];
89
+
90
+ // Find the most recent running step
91
+ const runningSteps = steps.filter((s: any) => s.status === 'running');
92
+ if (runningSteps.length > 0) {
93
+ const lastStep = runningSteps[runningSteps.length - 1];
94
+ setCurrentStep(lastStep.tool);
95
+ }
96
+ }
97
+ } catch (err) {
98
+ console.error('Progress polling error:', err);
99
+ }
100
+ };
101
 
102
  try {
103
+ // Start polling every 1 second
104
+ progressInterval = setInterval(pollProgress, 1000);
105
+
106
  // Use the current origin if running on same server, otherwise use env variable
107
  const API_URL = window.location.origin;
108
  console.log('API URL:', API_URL);
 
164
 
165
  const data = await response.json();
166
 
167
+ // Stop progress polling and clear indicator
168
+ if (progressInterval) {
169
+ clearInterval(progressInterval);
170
+ }
171
  setCurrentStep('');
172
 
173
  let assistantContent = '';
 
264
  } catch (error: any) {
265
  console.error("Chat Error:", error);
266
 
267
+ // Stop progress polling
268
+ if (progressInterval) {
269
+ clearInterval(progressInterval);
270
+ }
271
+ setCurrentStep('');
272
+
273
  let errorMessage = "I'm sorry, I encountered an error processing your request.";
274
 
275
  if (error.message) {
 
296
  timestamp: new Date()
297
  }]);
298
  } finally {
299
+ // Stop progress polling
300
+ if (progressInterval) {
301
+ clearInterval(progressInterval);
302
+ }
303
+ setCurrentStep('');
304
  setIsTyping(false);
305
  }
306
  };
src/orchestrator.py CHANGED
@@ -412,15 +412,17 @@ class DataScienceCopilot:
412
 
413
  **CRITICAL: User Interface Integration & Response Formatting**
414
  - The user interface automatically displays clickable buttons for all generated plots, reports, and outputs
415
- - **NEVER mention file paths** (e.g., "./outputs/plots/...", "./outputs/data/...", etc.) in your responses
416
- - **NEVER use markdown code blocks** for file paths or structured data in final summaries
417
- - DO NOT say "Output File: ..." or "Saved to: ..." - users can click buttons to view outputs
418
- - Simply describe what was created and what insights it shows
419
- - Use clean, aesthetic formatting with proper sections, bullet points, and spacing
420
- - Example: ❌ "πŸ“Š Output File: `./outputs/plots/heatmap.html`"
421
- βœ… "Generated an interactive correlation heatmap showing relationships between variables"
422
- - Example: ❌ "Saved cleaned data to: `./outputs/data/cleaned.csv`"
423
- βœ… "Cleaned the dataset by handling missing values and outliers"
 
 
424
 
425
  **CRITICAL: Tool Calling Format**
426
  When you need to use a tool, respond with a JSON block like this:
@@ -834,8 +836,12 @@ When you've finished all tool executions and are ready to return the final respo
834
  - What patterns were discovered in the data?
835
  - What were the most important features?
836
  - Were there any interesting correlations or anomalies?
837
- 3. **Model performance** (if trained):
838
- - Best model name and metrics (RΒ², RMSE, MAE)
 
 
 
 
839
  - How accurate is the model? What does the score mean in practical terms?
840
  - Were there any challenges (imbalanced data, multicollinearity, etc.)?
841
  4. **Recommendations**:
@@ -1093,15 +1099,13 @@ You are a DOER. Complete workflows based on user intent."""
1093
  "url": f"/outputs/{nested_result['output_path'].replace('./outputs/', '')}"
1094
  })
1095
 
1096
- # Build enhanced text summary
1097
  summary_lines = [
1098
  f"## πŸ“Š Analysis Complete",
1099
- "",
1100
- llm_summary,
1101
  ""
1102
  ]
1103
 
1104
- # Show all baseline models comparison first
1105
  if "all_models" in metrics and metrics["all_models"]:
1106
  summary_lines.extend([
1107
  "### πŸ”¬ Baseline Models Comparison",
@@ -1152,6 +1156,17 @@ You are a DOER. Complete workflows based on user intent."""
1152
  ""
1153
  ])
1154
 
 
 
 
 
 
 
 
 
 
 
 
1155
  # Add artifact links
1156
  if artifacts["models"]:
1157
  summary_lines.append("### πŸ’Ύ Trained Models")
 
412
 
413
  **CRITICAL: User Interface Integration & Response Formatting**
414
  - The user interface automatically displays clickable buttons for all generated plots, reports, and outputs
415
+ - **ABSOLUTELY FORBIDDEN**: NEVER EVER mention file paths in your responses
416
+ - ❌ NEVER write: "./outputs/...", "/outputs/...", "saved to", "output file:", "file path:"
417
+ - ❌ NEVER use markdown code blocks for file paths (no backticks around paths)
418
+ - ❌ NEVER say: "Output File:", "Saved to:", "File:", "Path:", "Location:"
419
+ - **WHAT TO SAY INSTEAD**:
420
+ - βœ… "Generated an interactive correlation heatmap"
421
+ - βœ… "Cleaned the dataset by handling missing values"
422
+ - βœ… "Created visualizations showing the relationships"
423
+ - βœ… "Trained multiple models and optimized the best performer"
424
+ - Users can click buttons to view outputs - you don't need to tell them where files are
425
+ - Use clean, aesthetic formatting with sections, bullets, and proper spacing
426
 
427
  **CRITICAL: Tool Calling Format**
428
  When you need to use a tool, respond with a JSON block like this:
 
836
  - What patterns were discovered in the data?
837
  - What were the most important features?
838
  - Were there any interesting correlations or anomalies?
839
+ 3. **Model performance** (if trained) - **CRITICAL: YOU MUST INCLUDE THESE METRICS**:
840
+ - **ALWAYS extract and display** the exact metrics from tool results:
841
+ - RΒ² Score, RMSE, MAE from the train_baseline_models results
842
+ - List ALL models trained (not just the best one)
843
+ - Example: "Trained 6 models: XGBoost (RΒ²=0.713, RMSE=0.207), Random Forest (RΒ²=0.685, RMSE=0.218), etc."
844
+ - If hyperparameter tuning was done, show before/after comparison
845
  - How accurate is the model? What does the score mean in practical terms?
846
  - Were there any challenges (imbalanced data, multicollinearity, etc.)?
847
  4. **Recommendations**:
 
1099
  "url": f"/outputs/{nested_result['output_path'].replace('./outputs/', '')}"
1100
  })
1101
 
1102
+ # Build enhanced text summary - start with metrics then LLM explanation
1103
  summary_lines = [
1104
  f"## πŸ“Š Analysis Complete",
 
 
1105
  ""
1106
  ]
1107
 
1108
+ # Show all baseline models comparison FIRST (before LLM summary)
1109
  if "all_models" in metrics and metrics["all_models"]:
1110
  summary_lines.extend([
1111
  "### πŸ”¬ Baseline Models Comparison",
 
1156
  ""
1157
  ])
1158
 
1159
+ # Add LLM's explanation after metrics
1160
+ if llm_summary and llm_summary.strip():
1161
+ summary_lines.extend([
1162
+ "---",
1163
+ "",
1164
+ "### πŸ“ Analysis Summary",
1165
+ "",
1166
+ llm_summary,
1167
+ ""
1168
+ ])
1169
+
1170
  # Add artifact links
1171
  if artifacts["models"]:
1172
  summary_lines.append("### πŸ’Ύ Trained Models")