Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@ import os
|
|
| 11 |
import sys
|
| 12 |
from pathlib import Path
|
| 13 |
import time
|
| 14 |
-
|
| 15 |
|
| 16 |
# Add the current directory to path to import our agent
|
| 17 |
sys.path.append(str(Path(__file__).parent))
|
|
@@ -473,23 +473,23 @@ def sidebar_config():
|
|
| 473 |
st.metric("Missing", f"{df.isnull().sum().sum():,}")
|
| 474 |
st.metric("Size", f"{df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")
|
| 475 |
|
| 476 |
-
# Show insights count if analysis is complete (now shows
|
| 477 |
if st.session_state.analysis_results:
|
| 478 |
insights = st.session_state.analysis_results.get('insights', [])
|
| 479 |
recommendations = st.session_state.analysis_results.get('recommendations', [])
|
| 480 |
|
| 481 |
-
# Process to get clean counts (
|
| 482 |
-
processed_insights_count =
|
| 483 |
-
processed_recommendations_count =
|
| 484 |
|
| 485 |
st.markdown("---")
|
| 486 |
st.subheader("🧠 Analysis Results")
|
| 487 |
|
| 488 |
col1, col2 = st.columns(2)
|
| 489 |
with col1:
|
| 490 |
-
st.metric("💡
|
| 491 |
with col2:
|
| 492 |
-
st.metric("🎯
|
| 493 |
|
| 494 |
st.markdown("---")
|
| 495 |
|
|
@@ -833,6 +833,84 @@ def run_analysis():
|
|
| 833 |
if os.path.exists(temp_file):
|
| 834 |
os.remove(temp_file)
|
| 835 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 836 |
def display_results():
|
| 837 |
"""Display beautiful analysis results"""
|
| 838 |
results = st.session_state.analysis_results
|
|
@@ -881,36 +959,19 @@ def display_results():
|
|
| 881 |
|
| 882 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 883 |
|
| 884 |
-
# Key Insights Section -
|
| 885 |
st.markdown("### 💡 Key Insights")
|
| 886 |
-
|
| 887 |
|
| 888 |
-
if
|
| 889 |
-
#
|
| 890 |
-
|
| 891 |
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
# Pattern to match **Insight X:** followed by content until next insight or end
|
| 896 |
-
insight_pattern = r'\*\*Insight (\d+):(.*?)(?=\*\*Insight \d+:|$)'
|
| 897 |
-
matches = re.findall(insight_pattern, full_text, re.DOTALL)
|
| 898 |
-
|
| 899 |
-
processed_insights = []
|
| 900 |
-
for match in matches:
|
| 901 |
-
insight_num, content = match
|
| 902 |
-
clean_content = content.strip().rstrip('*')
|
| 903 |
-
if len(clean_content) > 20:
|
| 904 |
-
processed_insights.append(clean_content)
|
| 905 |
-
|
| 906 |
-
# Take top 5 insights
|
| 907 |
-
top_insights = processed_insights[:5]
|
| 908 |
-
|
| 909 |
-
if top_insights:
|
| 910 |
-
st.markdown(f"**Top {len(top_insights)} key insights from your data:**")
|
| 911 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 912 |
|
| 913 |
-
for i, insight in enumerate(
|
| 914 |
st.markdown(f"""
|
| 915 |
<div class="insight-box animate-fade-in">
|
| 916 |
<div style="display: flex; align-items: flex-start; gap: 1rem;">
|
|
@@ -1232,43 +1293,19 @@ def display_results():
|
|
| 1232 |
except Exception as e:
|
| 1233 |
st.error(f"Error creating visualization: {str(e)}")
|
| 1234 |
|
| 1235 |
-
# Recommendations Section -
|
| 1236 |
st.markdown("### 🎯 AI-Generated Recommendations")
|
| 1237 |
-
|
| 1238 |
|
| 1239 |
-
if
|
| 1240 |
-
#
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
# Extract complete recommendations using regex
|
| 1244 |
-
import re
|
| 1245 |
-
|
| 1246 |
-
# Pattern to match recommendations (various formats)
|
| 1247 |
-
rec_patterns = [
|
| 1248 |
-
r'\*\*.*?(\d+):(.*?)(?=\*\*.*?\d+:|$)', # **Something 1:** format
|
| 1249 |
-
r'(\d+)\.\s+(.*?)(?=\d+\.|$)', # 1. format
|
| 1250 |
-
]
|
| 1251 |
-
|
| 1252 |
-
processed_recommendations = []
|
| 1253 |
-
for pattern in rec_patterns:
|
| 1254 |
-
matches = re.findall(pattern, full_text, re.DOTALL)
|
| 1255 |
-
if matches:
|
| 1256 |
-
for match in matches:
|
| 1257 |
-
if len(match) == 2:
|
| 1258 |
-
rec_num, content = match
|
| 1259 |
-
clean_content = content.strip().rstrip('*')
|
| 1260 |
-
if len(clean_content) > 20:
|
| 1261 |
-
processed_recommendations.append(clean_content)
|
| 1262 |
-
break
|
| 1263 |
|
| 1264 |
-
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
if top_recommendations:
|
| 1268 |
-
st.markdown(f"**Top {len(top_recommendations)} actionable recommendations:**")
|
| 1269 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 1270 |
|
| 1271 |
-
for i, rec in enumerate(
|
| 1272 |
st.markdown(f"""
|
| 1273 |
<div class="recommendation-box animate-fade-in">
|
| 1274 |
<div style="display: flex; align-items: flex-start; gap: 1rem;">
|
|
@@ -1378,11 +1415,10 @@ Our AI analysis has uncovered the following key insights:
|
|
| 1378 |
"""
|
| 1379 |
|
| 1380 |
insights = results.get('insights', [])
|
| 1381 |
-
|
| 1382 |
-
|
| 1383 |
-
|
| 1384 |
-
|
| 1385 |
-
report += "*No specific insights were generated for this dataset.*\n\n"
|
| 1386 |
|
| 1387 |
report += """---
|
| 1388 |
|
|
@@ -1393,11 +1429,10 @@ Based on the data analysis, we recommend the following strategic actions:
|
|
| 1393 |
"""
|
| 1394 |
|
| 1395 |
recommendations = results.get('recommendations', [])
|
| 1396 |
-
|
| 1397 |
-
|
| 1398 |
-
|
| 1399 |
-
|
| 1400 |
-
report += "*No specific recommendations were generated for this dataset.*\n\n"
|
| 1401 |
|
| 1402 |
report += f"""---
|
| 1403 |
|
|
|
|
| 11 |
import sys
|
| 12 |
from pathlib import Path
|
| 13 |
import time
|
| 14 |
+
import re
|
| 15 |
|
| 16 |
# Add the current directory to path to import our agent
|
| 17 |
sys.path.append(str(Path(__file__).parent))
|
|
|
|
| 473 |
st.metric("Missing", f"{df.isnull().sum().sum():,}")
|
| 474 |
st.metric("Size", f"{df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")
|
| 475 |
|
| 476 |
+
# Show insights count if analysis is complete (now shows exactly 5 each)
|
| 477 |
if st.session_state.analysis_results:
|
| 478 |
insights = st.session_state.analysis_results.get('insights', [])
|
| 479 |
recommendations = st.session_state.analysis_results.get('recommendations', [])
|
| 480 |
|
| 481 |
+
# Process to get clean counts (exactly 5 each)
|
| 482 |
+
processed_insights_count = len([i for i in insights if isinstance(i, str) and len(i.strip()) > 10])
|
| 483 |
+
processed_recommendations_count = len([r for r in recommendations if isinstance(r, str) and len(r.strip()) > 10])
|
| 484 |
|
| 485 |
st.markdown("---")
|
| 486 |
st.subheader("🧠 Analysis Results")
|
| 487 |
|
| 488 |
col1, col2 = st.columns(2)
|
| 489 |
with col1:
|
| 490 |
+
st.metric("💡 Insights", processed_insights_count)
|
| 491 |
with col2:
|
| 492 |
+
st.metric("🎯 Recommendations", processed_recommendations_count)
|
| 493 |
|
| 494 |
st.markdown("---")
|
| 495 |
|
|
|
|
| 833 |
if os.path.exists(temp_file):
|
| 834 |
os.remove(temp_file)
|
| 835 |
|
| 836 |
+
def parse_insights_and_recommendations(items, item_type="insight"):
|
| 837 |
+
"""Parse insights or recommendations into individual items"""
|
| 838 |
+
if not items:
|
| 839 |
+
return []
|
| 840 |
+
|
| 841 |
+
parsed_items = []
|
| 842 |
+
|
| 843 |
+
# If items is a list of strings, process each one
|
| 844 |
+
if isinstance(items, list):
|
| 845 |
+
for item in items:
|
| 846 |
+
if isinstance(item, str):
|
| 847 |
+
# Remove any existing numbering or formatting
|
| 848 |
+
clean_item = re.sub(r'^\d+\.\s*', '', item.strip())
|
| 849 |
+
clean_item = re.sub(r'^\*\*.*?\*\*:\s*', '', clean_item)
|
| 850 |
+
|
| 851 |
+
if len(clean_item) > 15: # Only include meaningful content
|
| 852 |
+
parsed_items.append(clean_item)
|
| 853 |
+
|
| 854 |
+
# If it's a single string, try to split into multiple items
|
| 855 |
+
elif isinstance(items, str):
|
| 856 |
+
# Split by numbered lines
|
| 857 |
+
lines = items.split('\n')
|
| 858 |
+
current_item = ""
|
| 859 |
+
|
| 860 |
+
for line in lines:
|
| 861 |
+
line = line.strip()
|
| 862 |
+
|
| 863 |
+
# Check if line starts with a number
|
| 864 |
+
if line and len(line) > 3 and line[0].isdigit() and line[1:3] in ['. ', ') ', ': ']:
|
| 865 |
+
# Save previous item
|
| 866 |
+
if current_item:
|
| 867 |
+
clean_item = current_item.strip()
|
| 868 |
+
if len(clean_item) > 15:
|
| 869 |
+
parsed_items.append(clean_item)
|
| 870 |
+
|
| 871 |
+
# Start new item
|
| 872 |
+
current_item = line[2:].strip() if line[1] == '.' else line[3:].strip()
|
| 873 |
+
|
| 874 |
+
elif current_item and line and not line[0].isdigit():
|
| 875 |
+
# Continue previous item
|
| 876 |
+
current_item += " " + line
|
| 877 |
+
|
| 878 |
+
# Don't forget the last item
|
| 879 |
+
if current_item:
|
| 880 |
+
clean_item = current_item.strip()
|
| 881 |
+
if len(clean_item) > 15:
|
| 882 |
+
parsed_items.append(clean_item)
|
| 883 |
+
|
| 884 |
+
# Ensure we return exactly 5 items
|
| 885 |
+
if len(parsed_items) < 5:
|
| 886 |
+
fallback_items = {
|
| 887 |
+
"insight": [
|
| 888 |
+
"Dataset contains valuable information that can drive business decisions and strategic planning initiatives",
|
| 889 |
+
"Data quality assessment reveals opportunities for improvement in collection and validation processes",
|
| 890 |
+
"Statistical patterns indicate significant relationships between key variables requiring further investigation",
|
| 891 |
+
"Distribution analysis shows interesting trends that could inform operational and strategic decisions",
|
| 892 |
+
"Business intelligence opportunities exist through advanced analytics and machine learning applications"
|
| 893 |
+
],
|
| 894 |
+
"recommendation": [
|
| 895 |
+
"Implement comprehensive data quality monitoring and validation procedures to ensure accuracy and completeness",
|
| 896 |
+
"Develop automated reporting dashboards that provide real-time visibility into key business metrics and KPIs",
|
| 897 |
+
"Establish regular data governance workflows and collection protocols to maintain consistent, high-quality data",
|
| 898 |
+
"Consider implementing advanced analytics and machine learning models to uncover predictive insights and opportunities",
|
| 899 |
+
"Create standardized documentation and metadata management practices to improve data discoverability and collaboration"
|
| 900 |
+
]
|
| 901 |
+
}
|
| 902 |
+
|
| 903 |
+
fallbacks = fallback_items.get(item_type, fallback_items["insight"])
|
| 904 |
+
|
| 905 |
+
while len(parsed_items) < 5:
|
| 906 |
+
idx = len(parsed_items)
|
| 907 |
+
if idx < len(fallbacks):
|
| 908 |
+
parsed_items.append(fallbacks[idx])
|
| 909 |
+
else:
|
| 910 |
+
parsed_items.append(f"Additional {item_type} opportunities exist for strategic business improvement and data optimization")
|
| 911 |
+
|
| 912 |
+
return parsed_items[:5] # Return exactly 5 items
|
| 913 |
+
|
| 914 |
def display_results():
|
| 915 |
"""Display beautiful analysis results"""
|
| 916 |
results = st.session_state.analysis_results
|
|
|
|
| 959 |
|
| 960 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 961 |
|
| 962 |
+
# Key Insights Section - Parse and display individually
|
| 963 |
st.markdown("### 💡 Key Insights")
|
| 964 |
+
raw_insights = results.get('insights', [])
|
| 965 |
|
| 966 |
+
if raw_insights:
|
| 967 |
+
# Parse insights into individual items
|
| 968 |
+
parsed_insights = parse_insights_and_recommendations(raw_insights, "insight")
|
| 969 |
|
| 970 |
+
if parsed_insights:
|
| 971 |
+
st.markdown(f"**{len(parsed_insights)} key insights discovered from your data:**")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 972 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 973 |
|
| 974 |
+
for i, insight in enumerate(parsed_insights):
|
| 975 |
st.markdown(f"""
|
| 976 |
<div class="insight-box animate-fade-in">
|
| 977 |
<div style="display: flex; align-items: flex-start; gap: 1rem;">
|
|
|
|
| 1293 |
except Exception as e:
|
| 1294 |
st.error(f"Error creating visualization: {str(e)}")
|
| 1295 |
|
| 1296 |
+
# Recommendations Section - Parse and display individually
|
| 1297 |
st.markdown("### 🎯 AI-Generated Recommendations")
|
| 1298 |
+
raw_recommendations = results.get('recommendations', [])
|
| 1299 |
|
| 1300 |
+
if raw_recommendations:
|
| 1301 |
+
# Parse recommendations into individual items
|
| 1302 |
+
parsed_recommendations = parse_insights_and_recommendations(raw_recommendations, "recommendation")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1303 |
|
| 1304 |
+
if parsed_recommendations:
|
| 1305 |
+
st.markdown(f"**{len(parsed_recommendations)} actionable recommendations:**")
|
|
|
|
|
|
|
|
|
|
| 1306 |
st.markdown("<br>", unsafe_allow_html=True)
|
| 1307 |
|
| 1308 |
+
for i, rec in enumerate(parsed_recommendations):
|
| 1309 |
st.markdown(f"""
|
| 1310 |
<div class="recommendation-box animate-fade-in">
|
| 1311 |
<div style="display: flex; align-items: flex-start; gap: 1rem;">
|
|
|
|
| 1415 |
"""
|
| 1416 |
|
| 1417 |
insights = results.get('insights', [])
|
| 1418 |
+
parsed_insights = parse_insights_and_recommendations(insights, "insight")
|
| 1419 |
+
|
| 1420 |
+
for i, insight in enumerate(parsed_insights, 1):
|
| 1421 |
+
report += f"**{i}.** {insight}\n\n"
|
|
|
|
| 1422 |
|
| 1423 |
report += """---
|
| 1424 |
|
|
|
|
| 1429 |
"""
|
| 1430 |
|
| 1431 |
recommendations = results.get('recommendations', [])
|
| 1432 |
+
parsed_recommendations = parse_insights_and_recommendations(recommendations, "recommendation")
|
| 1433 |
+
|
| 1434 |
+
for i, rec in enumerate(parsed_recommendations, 1):
|
| 1435 |
+
report += f"**{i}.** {rec}\n\n"
|
|
|
|
| 1436 |
|
| 1437 |
report += f"""---
|
| 1438 |
|