DataWizard9742 commited on
Commit
43600c2
·
verified ·
1 Parent(s): 1097a29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -31
app.py CHANGED
@@ -137,51 +137,42 @@ def make_distribution_plots(df: pd.DataFrame, max_numeric=4, max_categorical=4):
137
 
138
 
139
  # --------- OpenAI analysis ---------
140
- def generate_ai_report(df_summary: str, api_key: str = None, model: str = "gpt-4.1-mini") -> str:
141
- """
142
- Sends the structured summary to OpenAI and gets a very detailed report.
143
- """
144
  client = get_client(api_key)
145
 
146
  system_msg = (
147
  "You are a senior data analyst. You receive a structured summary of a dataset. "
148
  "Your job is to produce a VERY detailed, structured analysis report.\n\n"
149
- "Your report MUST include at least these sections with clear headings:\n"
150
- "1. Dataset Overview (rows, columns, column types, what this might be about)\n"
151
- "2. Data Quality & Missing Values (what is good/bad, issues, suggestions)\n"
152
- "3. Univariate Analysis (patterns in individual columns: numeric & categorical)\n"
153
- "4. Bivariate & Correlation Insights (relationships between key columns)\n"
154
- "5. Potential Target Variables & Use Cases (what could be predicted or modelled)\n"
155
- '6. Feature Engineering Ideas (new variables or transformations to create)\n'
156
- "7. Potential Visualizations (suggest specific plots and what they would reveal)\n"
157
- "8. Risks, Biases & Limitations of this dataset\n"
158
- "9. Recommended Next Steps for deeper analysis or modelling.\n\n"
159
- "Be concrete and descriptive. Use bullet points and short paragraphs. "
160
- "Assume the user understands basic data science but wants expert-level insight."
161
  )
162
 
163
  user_msg = (
164
- "Here is a detailed summary of the dataset. Use ONLY this information in your reasoning; "
165
- "do not invent columns that are not mentioned.\n\n"
166
  f"{df_summary}"
167
  )
168
 
169
- response = client.responses.create(
170
  model=model,
171
- reasoning={"effort": "medium"},
172
- input=[
173
- {
174
- "role": "system",
175
- "content": system_msg,
176
- },
177
- {
178
- "role": "user",
179
- "content": user_msg,
180
- },
181
- ],
182
- max_output_tokens=1800,
183
  )
184
 
 
 
 
185
  # Extract text from the first output
186
  chunks = []
187
  for item in response.output[0].content:
 
137
 
138
 
139
  # --------- OpenAI analysis ---------
140
+ def generate_ai_report(df_summary: str, api_key: str = None, model: str = "gpt-4o-mini") -> str:
 
 
 
141
  client = get_client(api_key)
142
 
143
  system_msg = (
144
  "You are a senior data analyst. You receive a structured summary of a dataset. "
145
  "Your job is to produce a VERY detailed, structured analysis report.\n\n"
146
+ "Your report MUST include at least these sections:\n"
147
+ "1. Dataset Overview\n"
148
+ "2. Data Quality & Missing Values\n"
149
+ "3. Univariate Analysis\n"
150
+ "4. Bivariate & Correlation Insights\n"
151
+ "5. Target Variables & Use Cases\n"
152
+ "6. Feature Engineering Ideas\n"
153
+ "7. Recommended Visualizations\n"
154
+ "8. Risks, Biases & Limitations\n"
155
+ "9. Next Steps for Modelling\n"
 
 
156
  )
157
 
158
  user_msg = (
159
+ "Here is a detailed summary of the dataset. Use ONLY this information while reasoning:\n\n"
 
160
  f"{df_summary}"
161
  )
162
 
163
+ response = client.chat.completions.create(
164
  model=model,
165
+ messages=[
166
+ {"role": "system", "content": system_msg},
167
+ {"role": "user", "content": user_msg},
168
+ ],
169
+ max_tokens=2000,
170
+ temperature=0.7
 
 
 
 
 
 
171
  )
172
 
173
+ return response.choices[0].message.content
174
+
175
+
176
  # Extract text from the first output
177
  chunks = []
178
  for item in response.output[0].content: