File size: 2,193 Bytes
1067825 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | def build_prompt(summary, profile):
column_names = ", ".join(profile["column_name"])
column_details = "\n".join([
f"- {list(col.keys())[0]} | type: {list(col.values())[0]}"
for col in profile["column_details"]
])
features_text = "\n".join(
[f"- {k}: {v}" for k, v in summary["top_features"].items()]
)
pearson_text = "\n".join([
f"- {a} & {b}: {round(v, 2)}"
for a, b, v in summary.get("top_pearson", [])
])
spearman_text = "\n".join([
f"- {a} & {b}: {round(v, 2)}"
for a, b, v in summary.get("top_spearman", [])
])
clusters = summary["clusters"]
if clusters.get("status") == "success":
cluster_summary = clusters.get("cluster_summary", {})
cluster_summary_text = "\n".join([
f"Cluster {k}: {v}" for k, v in cluster_summary.items()
])
cluster_text = f"""
Cluster Analysis:
Best k: {clusters.get('best_k')}
Cluster Sizes:
{clusters.get('cluster_sizes')}
Cluster Summary:
{cluster_summary_text}
"""
else:
cluster_text = f"""
Cluster Analysis:
Not applied
Reason:
{clusters.get("reason")}
"""
prompt = f"""
You are a professional data analyst.
Generate a structured data analysis report.
Format:
1. Dataset Overview
2. Key Features
3. Cluster Analysis
4. Key Insights
5. Conclusion
Dataset:
- Rows: {summary['rows']}
- Columns: {summary['columns']}
- Numeric columns: {summary['numeric_columns']}
Column Names:
{column_names}
Column Details:
{column_details}
"""
prompt += f"""
Correlation Analysis:
Top Pearson (linear relationships):
{pearson_text}
Top Spearman (rank relationships):
{spearman_text}
"""
if summary["mode"] == "ml":
prompt += f"""
Target Column:
{summary['target']}
Top Features:
{features_text}
"""
else:
prompt += """
No target column.
Focus on patterns and relationships.
"""
prompt += cluster_text
prompt += """
Instructions:
- Use simple English
- No conversational text
- Use bullet points
- Use real column names
"""
return prompt |