Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -137,51 +137,42 @@ def make_distribution_plots(df: pd.DataFrame, max_numeric=4, max_categorical=4):
|
|
| 137 |
|
| 138 |
|
| 139 |
# --------- OpenAI analysis ---------
|
| 140 |
-
def generate_ai_report(df_summary: str, api_key: str = None, model: str = "gpt-
|
| 141 |
-
"""
|
| 142 |
-
Sends the structured summary to OpenAI and gets a very detailed report.
|
| 143 |
-
"""
|
| 144 |
client = get_client(api_key)
|
| 145 |
|
| 146 |
system_msg = (
|
| 147 |
"You are a senior data analyst. You receive a structured summary of a dataset. "
|
| 148 |
"Your job is to produce a VERY detailed, structured analysis report.\n\n"
|
| 149 |
-
"Your report MUST include at least these sections
|
| 150 |
-
"1. Dataset Overview
|
| 151 |
-
"2. Data Quality & Missing Values
|
| 152 |
-
"3. Univariate Analysis
|
| 153 |
-
"4. Bivariate & Correlation Insights
|
| 154 |
-
"5.
|
| 155 |
-
|
| 156 |
-
"7.
|
| 157 |
-
"8. Risks, Biases & Limitations
|
| 158 |
-
"9.
|
| 159 |
-
"Be concrete and descriptive. Use bullet points and short paragraphs. "
|
| 160 |
-
"Assume the user understands basic data science but wants expert-level insight."
|
| 161 |
)
|
| 162 |
|
| 163 |
user_msg = (
|
| 164 |
-
"Here is a detailed summary of the dataset. Use ONLY this information
|
| 165 |
-
"do not invent columns that are not mentioned.\n\n"
|
| 166 |
f"{df_summary}"
|
| 167 |
)
|
| 168 |
|
| 169 |
-
response = client.
|
| 170 |
model=model,
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
{
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
{
|
| 178 |
-
"role": "user",
|
| 179 |
-
"content": user_msg,
|
| 180 |
-
},
|
| 181 |
-
],
|
| 182 |
-
max_output_tokens=1800,
|
| 183 |
)
|
| 184 |
|
|
|
|
|
|
|
|
|
|
| 185 |
# Extract text from the first output
|
| 186 |
chunks = []
|
| 187 |
for item in response.output[0].content:
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
# --------- OpenAI analysis ---------
|
| 140 |
+
def generate_ai_report(df_summary: str, api_key: str = None, model: str = "gpt-4o-mini") -> str:
|
|
|
|
|
|
|
|
|
|
| 141 |
client = get_client(api_key)
|
| 142 |
|
| 143 |
system_msg = (
|
| 144 |
"You are a senior data analyst. You receive a structured summary of a dataset. "
|
| 145 |
"Your job is to produce a VERY detailed, structured analysis report.\n\n"
|
| 146 |
+
"Your report MUST include at least these sections:\n"
|
| 147 |
+
"1. Dataset Overview\n"
|
| 148 |
+
"2. Data Quality & Missing Values\n"
|
| 149 |
+
"3. Univariate Analysis\n"
|
| 150 |
+
"4. Bivariate & Correlation Insights\n"
|
| 151 |
+
"5. Target Variables & Use Cases\n"
|
| 152 |
+
"6. Feature Engineering Ideas\n"
|
| 153 |
+
"7. Recommended Visualizations\n"
|
| 154 |
+
"8. Risks, Biases & Limitations\n"
|
| 155 |
+
"9. Next Steps for Modelling\n"
|
|
|
|
|
|
|
| 156 |
)
|
| 157 |
|
| 158 |
user_msg = (
|
| 159 |
+
"Here is a detailed summary of the dataset. Use ONLY this information while reasoning:\n\n"
|
|
|
|
| 160 |
f"{df_summary}"
|
| 161 |
)
|
| 162 |
|
| 163 |
+
response = client.chat.completions.create(
|
| 164 |
model=model,
|
| 165 |
+
messages=[
|
| 166 |
+
{"role": "system", "content": system_msg},
|
| 167 |
+
{"role": "user", "content": user_msg},
|
| 168 |
+
],
|
| 169 |
+
max_tokens=2000,
|
| 170 |
+
temperature=0.7
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
)
|
| 172 |
|
| 173 |
+
return response.choices[0].message.content
|
| 174 |
+
|
| 175 |
+
|
| 176 |
# Extract text from the first output
|
| 177 |
chunks = []
|
| 178 |
for item in response.output[0].content:
|