Spaces:
Sleeping
Sleeping
File size: 2,424 Bytes
f73646a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | def build_context(df, target, eda_report, decisions, model_results):
lines = []
# dataset overview
lines.append("DATASET OVERVIEW")
lines.append(f"Rows: {df.shape[0]}")
lines.append(f"Columns: {df.shape[1]}")
## target , problem type
lines.append(f"Target Column: {target}")
problem_type = decisions.get("problem_type", "Unknown")
lines.append(f"Problem Type: {problem_type}")
lines.append("\nDATA SAMPLE")
lines.append(df.head(5).to_string())
lines.append("\nCOLUMN TYPES")
lines.append(df.dtypes.to_string())
## add
lines.append("\nNUMERICAL SUMMARY")
lines.append(df.describe().to_string())
######## add
cat_cols = df.select_dtypes(include="object").columns
if len(cat_cols) > 0:
lines.append("\nCATEGORICAL SUMMARY")
for col in cat_cols[:5]:
top_vals = df[col].value_counts().head(5)
lines.append(f"\n{col}:")
lines.append(top_vals.to_string())
#### nulls
missing = df.isnull().sum().sum()
lines.append(f"Total Missing Values: {missing}")
### outlier
outliers = eda_report.get("outliers", {})
if isinstance(outliers, dict) and outliers:
lines.append(f"Outlier Columns: {list(outliers.keys())}")
# high cardinality
high_card = decisions.get("high_cardinality", [])
if high_card:
lines.append(f"High Cardinality Columns: {high_card}")
# best ml model
best_model = model_results.get("best_model_name", "Unknown")
lines.append(f"Best Model: {best_model}")
# result
results = model_results.get("results", [])
# safety: ensure list
if isinstance(results, dict):
results = [results]
if isinstance(results, str):
results = []
lines.append("\nMODEL PERFORMANCE")
for r in results:
# safety check
if not isinstance(r, dict):
continue
model_name = r.get("model", "Unknown Model")
metrics = r.get("metrics", {})
lines.append(f"\nModel: {model_name}")
if isinstance(metrics, dict) and metrics:
for k, v in metrics.items():
lines.append(f" {k}: {v}")
else:
lines.append(" No metrics available")
return "\n".join(lines) |