File size: 2,193 Bytes
1067825
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
def build_prompt(summary, profile):

    column_names = ", ".join(profile["column_name"])

    column_details = "\n".join([
        f"- {list(col.keys())[0]} | type: {list(col.values())[0]}"
        for col in profile["column_details"]
    ])

    features_text = "\n".join(
        [f"- {k}: {v}" for k, v in summary["top_features"].items()]
    )

    pearson_text = "\n".join([
        f"- {a} & {b}: {round(v, 2)}"
        for a, b, v in summary.get("top_pearson", [])
    ])

    spearman_text = "\n".join([
        f"- {a} & {b}: {round(v, 2)}"
        for a, b, v in summary.get("top_spearman", [])
    ])

    clusters = summary["clusters"]

    if clusters.get("status") == "success":
        cluster_summary = clusters.get("cluster_summary", {})

        cluster_summary_text = "\n".join([
            f"Cluster {k}: {v}" for k, v in cluster_summary.items()
        ])

        cluster_text = f"""

Cluster Analysis:

Best k: {clusters.get('best_k')}



Cluster Sizes:

{clusters.get('cluster_sizes')}



Cluster Summary:

{cluster_summary_text}

"""
    else:
        cluster_text = f"""

Cluster Analysis:

Not applied



Reason:

{clusters.get("reason")}

"""

    prompt = f"""

You are a professional data analyst.



Generate a structured data analysis report.



Format:

1. Dataset Overview

2. Key Features

3. Cluster Analysis

4. Key Insights

5. Conclusion



Dataset:

- Rows: {summary['rows']}

- Columns: {summary['columns']}

- Numeric columns: {summary['numeric_columns']}



Column Names:

{column_names}



Column Details:

{column_details}

"""
    
    prompt += f"""

Correlation Analysis:



Top Pearson (linear relationships):

{pearson_text}



Top Spearman (rank relationships):

{spearman_text}

"""

    if summary["mode"] == "ml":
        prompt += f"""

Target Column:

{summary['target']}



Top Features:

{features_text}

"""
    else:
        prompt += """

No target column.



Focus on patterns and relationships.

"""

    prompt += cluster_text

    prompt += """

Instructions:

- Use simple English

- No conversational text

- Use bullet points

- Use real column names

"""

    return prompt