Spaces:

DataWizard9742
/

DataAnalyst

Sleeping

App Files Files Community

DataWizard9742 commited on Nov 19, 2025

Commit

43600c2

verified ·

1 Parent(s): 1097a29

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -31

app.py CHANGED Viewed

@@ -137,51 +137,42 @@ def make_distribution_plots(df: pd.DataFrame, max_numeric=4, max_categorical=4):
 # --------- OpenAI analysis ---------
-def generate_ai_report(df_summary: str, api_key: str = None, model: str = "gpt-4.1-mini") -> str:
-    """
-    Sends the structured summary to OpenAI and gets a very detailed report.
-    """
     client = get_client(api_key)
     system_msg = (
         "You are a senior data analyst. You receive a structured summary of a dataset. "
         "Your job is to produce a VERY detailed, structured analysis report.\n\n"
-        "Your report MUST include at least these sections with clear headings:\n"
-        "1. Dataset Overview (rows, columns, column types, what this might be about)\n"
-        "2. Data Quality & Missing Values (what is good/bad, issues, suggestions)\n"
-        "3. Univariate Analysis (patterns in individual columns: numeric & categorical)\n"
-        "4. Bivariate & Correlation Insights (relationships between key columns)\n"
-        "5. Potential Target Variables & Use Cases (what could be predicted or modelled)\n"
-        '6. Feature Engineering Ideas (new variables or transformations to create)\n'
-        "7. Potential Visualizations (suggest specific plots and what they would reveal)\n"
-        "8. Risks, Biases & Limitations of this dataset\n"
-        "9. Recommended Next Steps for deeper analysis or modelling.\n\n"
-        "Be concrete and descriptive. Use bullet points and short paragraphs. "
-        "Assume the user understands basic data science but wants expert-level insight."
     )
     user_msg = (
-        "Here is a detailed summary of the dataset. Use ONLY this information in your reasoning; "
-        "do not invent columns that are not mentioned.\n\n"
         f"{df_summary}"
     )
-    response = client.responses.create(
         model=model,
-        reasoning={"effort": "medium"},
-        input=[
-            {
-                "role": "system",
-                "content": system_msg,
-            },
-            {
-                "role": "user",
-                "content": user_msg,
-            },
-        ],
-        max_output_tokens=1800,
     )
     # Extract text from the first output
     chunks = []
     for item in response.output[0].content:

 # --------- OpenAI analysis ---------
+def generate_ai_report(df_summary: str, api_key: str = None, model: str = "gpt-4o-mini") -> str:
     client = get_client(api_key)
     system_msg = (
         "You are a senior data analyst. You receive a structured summary of a dataset. "
         "Your job is to produce a VERY detailed, structured analysis report.\n\n"
+        "Your report MUST include at least these sections:\n"
+        "1. Dataset Overview\n"
+        "2. Data Quality & Missing Values\n"
+        "3. Univariate Analysis\n"
+        "4. Bivariate & Correlation Insights\n"
+        "5. Target Variables & Use Cases\n"
+        "6. Feature Engineering Ideas\n"
+        "7. Recommended Visualizations\n"
+        "8. Risks, Biases & Limitations\n"
+        "9. Next Steps for Modelling\n"
     )
     user_msg = (
+        "Here is a detailed summary of the dataset. Use ONLY this information while reasoning:\n\n"
         f"{df_summary}"
     )
+    response = client.chat.completions.create(
         model=model,
+        messages=[
+            {"role": "system", "content": system_msg},
+            {"role": "user", "content": user_msg},
+        ],
+        max_tokens=2000,
+        temperature=0.7
     )
+    return response.choices[0].message.content
     # Extract text from the first output
     chunks = []
     for item in response.output[0].content: