Update app.py
Browse files
app.py
CHANGED
|
@@ -160,7 +160,7 @@ def stream_save_and_hash(upload_file: UploadFile, tmp_path: str, size_limit: Opt
|
|
| 160 |
|
| 161 |
# ---------- AI interaction (blocking) ----------
|
| 162 |
def generate_summary_blocking(meta, fiverow) -> str:
|
| 163 |
-
api_key =
|
| 164 |
if not api_key:
|
| 165 |
raise RuntimeError("GEMINI_API_KEY not set")
|
| 166 |
client = genai.Client(api_key=api_key)
|
|
@@ -171,9 +171,39 @@ Input contains:
|
|
| 171 |
- meta: dataframe metadata
|
| 172 |
- fiverow: first 5 records of dataframe
|
| 173 |
You must output JSON with the following structure:
|
| 174 |
-
{
|
| 175 |
-
|
| 176 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
user_prompt = {"meta": meta, "fiverow": fiverow}
|
| 178 |
contents = [
|
| 179 |
types.Content(
|
|
|
|
| 160 |
|
| 161 |
# ---------- AI interaction (blocking) ----------
|
| 162 |
def generate_summary_blocking(meta, fiverow) -> str:
|
| 163 |
+
api_key = "AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs"
|
| 164 |
if not api_key:
|
| 165 |
raise RuntimeError("GEMINI_API_KEY not set")
|
| 166 |
client = genai.Client(api_key=api_key)
|
|
|
|
| 171 |
- meta: dataframe metadata
|
| 172 |
- fiverow: first 5 records of dataframe
|
| 173 |
You must output JSON with the following structure:
|
| 174 |
+
{
|
| 175 |
+
"summary": "<short natural language overview of dataset>",
|
| 176 |
+
"recommended_charts": [
|
| 177 |
+
{
|
| 178 |
+
"type": "<one of: bar, pie, timeseries, histogram, scatter, multiple_columns, stacked_bar, heatmap>",
|
| 179 |
+
"title": "<short title for chart>",
|
| 180 |
+
"columns": ["<col1>", "<col2>", "..."],
|
| 181 |
+
"python_code": "<full runnable Python code using seaborn/matplotlib that produces the chart>"
|
| 182 |
+
},
|
| 183 |
+
...
|
| 184 |
+
]
|
| 185 |
+
}
|
| 186 |
+
Mandatory rules:
|
| 187 |
+
- Always produce syntactically valid JSON ONLY. No text outside the JSON object.
|
| 188 |
+
- Provide at least these chart types somewhere in recommended_charts: bar, pie, timeseries, histogram, scatter, multiple_columns, stacked_bar, heatmap.
|
| 189 |
+
- Use only column names that appear in meta['column_names'].
|
| 190 |
+
- The python_code string must be self-contained and runnable assuming a variable `df` exists containing the full cleaned DataFrame. Start the code with imports:
|
| 191 |
+
import pandas as pd
|
| 192 |
+
import seaborn as sns
|
| 193 |
+
import matplotlib.pyplot as plt
|
| 194 |
+
and include any necessary preprocessing steps (e.g., parsing dates).
|
| 195 |
+
- For timeseries charts ensure the datetime column is parsed (`pd.to_datetime`) before plotting.
|
| 196 |
+
- For multiple_columns provide a pairplot or facetgrid example that uses up to 4 numeric columns or sensible categorical splits.
|
| 197 |
+
- For stacked_bar, show aggregation code (groupby + unstack) and plotting with df.plot(kind='bar', stacked=True).
|
| 198 |
+
- For heatmap, compute correlation matrix and plot sns.heatmap with annotations.
|
| 199 |
+
- For pie charts, ensure grouping/aggregation when there are >20 unique categories (group small categories into 'Other').
|
| 200 |
+
- For histogram and scatter include axis labels and tight_layout; include plt.show() at the end.
|
| 201 |
+
- Keep code minimal but complete so a user can copy-paste and run (assume seaborn, matplotlib, pandas installed).
|
| 202 |
+
- For each chart add a sensible "columns" list showing which columns the code uses.
|
| 203 |
+
- Do not include examples using columns not present in meta.
|
| 204 |
+
- Do not include more than 10 recommended_charts.
|
| 205 |
+
- Ensure strings inside the JSON are escaped properly so the JSON parses.
|
| 206 |
+
Produce concise natural-language one-line summary in "summary". Ensure JSON is parseable by json.loads in Python."""
|
| 207 |
user_prompt = {"meta": meta, "fiverow": fiverow}
|
| 208 |
contents = [
|
| 209 |
types.Content(
|