triflix commited on
Commit
ba5a2ff
·
verified ·
1 Parent(s): dafc2a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -4
app.py CHANGED
@@ -160,7 +160,7 @@ def stream_save_and_hash(upload_file: UploadFile, tmp_path: str, size_limit: Opt
160
 
161
  # ---------- AI interaction (blocking) ----------
162
  def generate_summary_blocking(meta, fiverow) -> str:
163
- api_key = os.getenv("GEMINI_API_KEY")
164
  if not api_key:
165
  raise RuntimeError("GEMINI_API_KEY not set")
166
  client = genai.Client(api_key=api_key)
@@ -171,9 +171,39 @@ Input contains:
171
  - meta: dataframe metadata
172
  - fiverow: first 5 records of dataframe
173
  You must output JSON with the following structure:
174
- { "summary": "<short natural language overview>", "recommended_charts": [ ... ] }
175
- Always produce syntactically valid JSON ONLY.
176
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  user_prompt = {"meta": meta, "fiverow": fiverow}
178
  contents = [
179
  types.Content(
 
160
 
161
  # ---------- AI interaction (blocking) ----------
162
  def generate_summary_blocking(meta, fiverow) -> str:
163
+ api_key = "AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs"
164
  if not api_key:
165
  raise RuntimeError("GEMINI_API_KEY not set")
166
  client = genai.Client(api_key=api_key)
 
171
  - meta: dataframe metadata
172
  - fiverow: first 5 records of dataframe
173
  You must output JSON with the following structure:
174
+ {
175
+ "summary": "<short natural language overview of dataset>",
176
+ "recommended_charts": [
177
+ {
178
+ "type": "<one of: bar, pie, timeseries, histogram, scatter, multiple_columns, stacked_bar, heatmap>",
179
+ "title": "<short title for chart>",
180
+ "columns": ["<col1>", "<col2>", "..."],
181
+ "python_code": "<full runnable Python code using seaborn/matplotlib that produces the chart>"
182
+ },
183
+ ...
184
+ ]
185
+ }
186
+ Mandatory rules:
187
+ - Always produce syntactically valid JSON ONLY. No text outside the JSON object.
188
+ - Provide at least these chart types somewhere in recommended_charts: bar, pie, timeseries, histogram, scatter, multiple_columns, stacked_bar, heatmap.
189
+ - Use only column names that appear in meta['column_names'].
190
+ - The python_code string must be self-contained and runnable assuming a variable `df` exists containing the full cleaned DataFrame. Start the code with imports:
191
+ import pandas as pd
192
+ import seaborn as sns
193
+ import matplotlib.pyplot as plt
194
+ and include any necessary preprocessing steps (e.g., parsing dates).
195
+ - For timeseries charts ensure the datetime column is parsed (`pd.to_datetime`) before plotting.
196
+ - For multiple_columns provide a pairplot or facetgrid example that uses up to 4 numeric columns or sensible categorical splits.
197
+ - For stacked_bar, show aggregation code (groupby + unstack) and plotting with df.plot(kind='bar', stacked=True).
198
+ - For heatmap, compute correlation matrix and plot sns.heatmap with annotations.
199
+ - For pie charts, ensure grouping/aggregation when there are >20 unique categories (group small categories into 'Other').
200
+ - For histogram and scatter include axis labels and tight_layout; include plt.show() at the end.
201
+ - Keep code minimal but complete so a user can copy-paste and run (assume seaborn, matplotlib, pandas installed).
202
+ - For each chart add a sensible "columns" list showing which columns the code uses.
203
+ - Do not include examples using columns not present in meta.
204
+ - Do not include more than 10 recommended_charts.
205
+ - Ensure strings inside the JSON are escaped properly so the JSON parses.
206
+ Produce concise natural-language one-line summary in "summary". Ensure JSON is parseable by json.loads in Python."""
207
  user_prompt = {"meta": meta, "fiverow": fiverow}
208
  contents = [
209
  types.Content(