def build_context(df, target, eda_report, decisions, model_results): lines = [] # dataset overview lines.append("DATASET OVERVIEW") lines.append(f"Rows: {df.shape[0]}") lines.append(f"Columns: {df.shape[1]}") ## target , problem type lines.append(f"Target Column: {target}") problem_type = decisions.get("problem_type", "Unknown") lines.append(f"Problem Type: {problem_type}") lines.append("\nDATA SAMPLE") lines.append(df.head(5).to_string()) lines.append("\nCOLUMN TYPES") lines.append(df.dtypes.to_string()) ## add lines.append("\nNUMERICAL SUMMARY") lines.append(df.describe().to_string()) ######## add cat_cols = df.select_dtypes(include="object").columns if len(cat_cols) > 0: lines.append("\nCATEGORICAL SUMMARY") for col in cat_cols[:5]: top_vals = df[col].value_counts().head(5) lines.append(f"\n{col}:") lines.append(top_vals.to_string()) #### nulls missing = df.isnull().sum().sum() lines.append(f"Total Missing Values: {missing}") ### outlier outliers = eda_report.get("outliers", {}) if isinstance(outliers, dict) and outliers: lines.append(f"Outlier Columns: {list(outliers.keys())}") # high cardinality high_card = decisions.get("high_cardinality", []) if high_card: lines.append(f"High Cardinality Columns: {high_card}") # best ml model best_model = model_results.get("best_model_name", "Unknown") lines.append(f"Best Model: {best_model}") # result results = model_results.get("results", []) # safety: ensure list if isinstance(results, dict): results = [results] if isinstance(results, str): results = [] lines.append("\nMODEL PERFORMANCE") for r in results: # safety check if not isinstance(r, dict): continue model_name = r.get("model", "Unknown Model") metrics = r.get("metrics", {}) lines.append(f"\nModel: {model_name}") if isinstance(metrics, dict) and metrics: for k, v in metrics.items(): lines.append(f" {k}: {v}") else: lines.append(" No metrics available") return "\n".join(lines)