Update data_analysis_agent.py
Browse files- data_analysis_agent.py +50 -13
data_analysis_agent.py
CHANGED
|
@@ -612,7 +612,7 @@ Use types: histogram, bar, scatter, heatmap, line"""
|
|
| 612 |
insights = state["insights"]
|
| 613 |
dataset_info = state["dataset_info"]
|
| 614 |
|
| 615 |
-
#
|
| 616 |
prompt = f"""Generate 5 recommendations for this dataset:
|
| 617 |
|
| 618 |
Dataset: {dataset_info.get('shape', [0])[0]} rows, {dataset_info.get('shape', [0])[1]} columns
|
|
@@ -635,21 +635,58 @@ Write exactly 5 numbered recommendations:
|
|
| 635 |
logger.info(response_content)
|
| 636 |
logger.info("=" * 50)
|
| 637 |
|
| 638 |
-
#
|
| 639 |
recommendations = []
|
| 640 |
|
| 641 |
-
#
|
| 642 |
lines = response_content.split('\n')
|
|
|
|
|
|
|
|
|
|
| 643 |
for line in lines:
|
| 644 |
line = line.strip()
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
if
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
while len(recommendations) < 5:
|
| 654 |
rec_num = len(recommendations) + 1
|
| 655 |
fallback_recs = [
|
|
@@ -665,13 +702,13 @@ Write exactly 5 numbered recommendations:
|
|
| 665 |
else:
|
| 666 |
recommendations.append(f"**Recommendation {rec_num}:** Conduct additional analysis to identify optimization opportunities")
|
| 667 |
|
| 668 |
-
#
|
| 669 |
recommendations = recommendations[:5]
|
| 670 |
|
| 671 |
# LOG what we're returning
|
| 672 |
logger.info(f"FINAL RECOMMENDATIONS COUNT: {len(recommendations)}")
|
| 673 |
for i, rec in enumerate(recommendations, 1):
|
| 674 |
-
logger.info(f"REC {i}: {rec}")
|
| 675 |
|
| 676 |
state["recommendations"] = recommendations
|
| 677 |
state["current_step"] = "recommendation_engine"
|
|
|
|
| 612 |
insights = state["insights"]
|
| 613 |
dataset_info = state["dataset_info"]
|
| 614 |
|
| 615 |
+
# Simple prompt that works well
|
| 616 |
prompt = f"""Generate 5 recommendations for this dataset:
|
| 617 |
|
| 618 |
Dataset: {dataset_info.get('shape', [0])[0]} rows, {dataset_info.get('shape', [0])[1]} columns
|
|
|
|
| 635 |
logger.info(response_content)
|
| 636 |
logger.info("=" * 50)
|
| 637 |
|
| 638 |
+
# PROPER PARSING: Extract the 5 numbered recommendations
|
| 639 |
recommendations = []
|
| 640 |
|
| 641 |
+
# Split by lines and look for numbered items
|
| 642 |
lines = response_content.split('\n')
|
| 643 |
+
current_rec = ""
|
| 644 |
+
current_num = 0
|
| 645 |
+
|
| 646 |
for line in lines:
|
| 647 |
line = line.strip()
|
| 648 |
+
|
| 649 |
+
# Check if this line starts a new numbered recommendation
|
| 650 |
+
if line.startswith(('1.', '2.', '3.', '4.', '5.')):
|
| 651 |
+
# Save the previous recommendation if we have one
|
| 652 |
+
if current_rec and current_num > 0:
|
| 653 |
+
clean_rec = current_rec.strip()
|
| 654 |
+
if len(clean_rec) > 20:
|
| 655 |
+
recommendations.append(f"**Recommendation {current_num}:** {clean_rec}")
|
| 656 |
+
|
| 657 |
+
# Start new recommendation
|
| 658 |
+
current_num = int(line[0]) # Get the number
|
| 659 |
+
current_rec = line[2:].strip() # Get text after "1. "
|
| 660 |
+
|
| 661 |
+
elif current_rec and line: # Continue previous recommendation
|
| 662 |
+
current_rec += " " + line
|
| 663 |
+
|
| 664 |
+
# Don't forget the last recommendation
|
| 665 |
+
if current_rec and current_num > 0:
|
| 666 |
+
clean_rec = current_rec.strip()
|
| 667 |
+
if len(clean_rec) > 20:
|
| 668 |
+
recommendations.append(f"**Recommendation {current_num}:** {clean_rec}")
|
| 669 |
+
|
| 670 |
+
logger.info(f"PARSED RECOMMENDATIONS COUNT: {len(recommendations)}")
|
| 671 |
+
|
| 672 |
+
# If parsing didn't work well, try a simpler approach
|
| 673 |
+
if len(recommendations) < 3:
|
| 674 |
+
logger.warning("Primary parsing failed, trying alternative...")
|
| 675 |
+
|
| 676 |
+
# Alternative: Just look for lines that start with numbers
|
| 677 |
+
recommendations = []
|
| 678 |
+
for line in lines:
|
| 679 |
+
line = line.strip()
|
| 680 |
+
if line and line[0].isdigit() and '. ' in line[:5]:
|
| 681 |
+
# Extract the text after the number
|
| 682 |
+
rec_text = line.split('. ', 1)[1] if '. ' in line else line[2:]
|
| 683 |
+
if len(rec_text) > 20:
|
| 684 |
+
rec_num = len(recommendations) + 1
|
| 685 |
+
recommendations.append(f"**Recommendation {rec_num}:** {rec_text}")
|
| 686 |
+
if len(recommendations) >= 5:
|
| 687 |
+
break
|
| 688 |
+
|
| 689 |
+
# Final fallback - ensure we have 5 recommendations
|
| 690 |
while len(recommendations) < 5:
|
| 691 |
rec_num = len(recommendations) + 1
|
| 692 |
fallback_recs = [
|
|
|
|
| 702 |
else:
|
| 703 |
recommendations.append(f"**Recommendation {rec_num}:** Conduct additional analysis to identify optimization opportunities")
|
| 704 |
|
| 705 |
+
# Ensure exactly 5 recommendations
|
| 706 |
recommendations = recommendations[:5]
|
| 707 |
|
| 708 |
# LOG what we're returning
|
| 709 |
logger.info(f"FINAL RECOMMENDATIONS COUNT: {len(recommendations)}")
|
| 710 |
for i, rec in enumerate(recommendations, 1):
|
| 711 |
+
logger.info(f"FINAL REC {i}: {rec}")
|
| 712 |
|
| 713 |
state["recommendations"] = recommendations
|
| 714 |
state["current_step"] = "recommendation_engine"
|