Update app.py
Browse files
app.py
CHANGED
|
@@ -12,16 +12,21 @@ import time
|
|
| 12 |
import psutil
|
| 13 |
import shutil
|
| 14 |
import ast
|
|
|
|
|
|
|
| 15 |
from smolagents import HfApiModel, CodeAgent
|
| 16 |
from huggingface_hub import login
|
| 17 |
from sklearn.model_selection import train_test_split, cross_val_score
|
| 18 |
-
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
|
|
|
|
| 19 |
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
| 20 |
from sklearn.linear_model import LogisticRegression
|
| 21 |
from sklearn.preprocessing import LabelEncoder
|
| 22 |
from datetime import datetime
|
| 23 |
from PIL import Image
|
| 24 |
-
from
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# Authenticate with Hugging Face
|
| 27 |
hf_token = os.getenv("HF_TOKEN")
|
|
@@ -43,12 +48,12 @@ def clean_data(df):
|
|
| 43 |
def upload_file(file):
|
| 44 |
global df_global
|
| 45 |
if file is None:
|
| 46 |
-
return pd.DataFrame({"Error": ["No file uploaded."]})
|
| 47 |
ext = os.path.splitext(file.name)[-1]
|
| 48 |
df = pd.read_csv(file.name) if ext == ".csv" else pd.read_excel(file.name)
|
| 49 |
df = clean_data(df)
|
| 50 |
df_global = df
|
| 51 |
-
return df.head()
|
| 52 |
|
| 53 |
def format_analysis_report(raw_output, visuals):
|
| 54 |
try:
|
|
@@ -154,117 +159,445 @@ def analyze_data(csv_file, additional_notes=""):
|
|
| 154 |
run.finish()
|
| 155 |
return format_analysis_report(analysis_result, visuals)
|
| 156 |
|
| 157 |
-
def compare_models(
|
| 158 |
-
if
|
| 159 |
-
return
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
| 163 |
if y.dtype == 'object':
|
| 164 |
y = LabelEncoder().fit_transform(y)
|
| 165 |
-
|
|
|
|
| 166 |
"RandomForest": RandomForestClassifier(),
|
| 167 |
"LogisticRegression": LogisticRegression(max_iter=1000),
|
| 168 |
-
"SVC": SVC(
|
| 169 |
}
|
|
|
|
| 170 |
results = []
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
model.fit(X_train, y_train)
|
| 176 |
y_pred = model.predict(X_test)
|
| 177 |
-
|
|
|
|
| 178 |
metrics = {
|
| 179 |
-
"
|
| 180 |
-
"
|
| 181 |
-
"
|
| 182 |
-
"
|
| 183 |
-
"F1": f1_score(y_test, y_pred, average="weighted", zero_division=0),
|
| 184 |
-
"ROC-AUC": roc_auc_score(y_test, y_proba) if y_proba is not None else "N/A"
|
| 185 |
}
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
if y.dtype == 'object':
|
| 204 |
y = LabelEncoder().fit_transform(y)
|
|
|
|
| 205 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
model_dict = {
|
| 210 |
"RandomForest": RandomForestClassifier(),
|
| 211 |
"LogisticRegression": LogisticRegression(max_iter=1000),
|
|
|
|
|
|
|
| 212 |
"SVC": SVC(probability=True)
|
| 213 |
}
|
|
|
|
| 214 |
results = []
|
| 215 |
-
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
model.fit(X_train, y_train)
|
| 218 |
-
y_pred = model.predict(
|
|
|
|
|
|
|
|
|
|
| 219 |
metrics = {
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
}
|
|
|
|
| 226 |
results.append(metrics)
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
def
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
)
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
import psutil
|
| 13 |
import shutil
|
| 14 |
import ast
|
| 15 |
+
import seaborn as sns
|
| 16 |
+
from sklearn.svm import SVC
|
| 17 |
from smolagents import HfApiModel, CodeAgent
|
| 18 |
from huggingface_hub import login
|
| 19 |
from sklearn.model_selection import train_test_split, cross_val_score
|
| 20 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
|
| 21 |
+
from sklearn.metrics import ConfusionMatrixDisplay
|
| 22 |
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
| 23 |
from sklearn.linear_model import LogisticRegression
|
| 24 |
from sklearn.preprocessing import LabelEncoder
|
| 25 |
from datetime import datetime
|
| 26 |
from PIL import Image
|
| 27 |
+
from xgboost import XGBClassifier
|
| 28 |
+
|
| 29 |
+
|
| 30 |
|
| 31 |
# Authenticate with Hugging Face
|
| 32 |
hf_token = os.getenv("HF_TOKEN")
|
|
|
|
| 48 |
def upload_file(file):
|
| 49 |
global df_global
|
| 50 |
if file is None:
|
| 51 |
+
return pd.DataFrame({"Error": ["No file uploaded."]})
|
| 52 |
ext = os.path.splitext(file.name)[-1]
|
| 53 |
df = pd.read_csv(file.name) if ext == ".csv" else pd.read_excel(file.name)
|
| 54 |
df = clean_data(df)
|
| 55 |
df_global = df
|
| 56 |
+
return df.head()
|
| 57 |
|
| 58 |
def format_analysis_report(raw_output, visuals):
|
| 59 |
try:
|
|
|
|
| 159 |
run.finish()
|
| 160 |
return format_analysis_report(analysis_result, visuals)
|
| 161 |
|
| 162 |
+
def compare_models():
|
| 163 |
+
if df_global is None:
|
| 164 |
+
return "Please upload and preprocess a dataset first."
|
| 165 |
+
|
| 166 |
+
target = df_global.columns[-1]
|
| 167 |
+
X = df_global.drop(target, axis=1)
|
| 168 |
+
y = df_global[target]
|
| 169 |
+
|
| 170 |
if y.dtype == 'object':
|
| 171 |
y = LabelEncoder().fit_transform(y)
|
| 172 |
+
|
| 173 |
+
models = {
|
| 174 |
"RandomForest": RandomForestClassifier(),
|
| 175 |
"LogisticRegression": LogisticRegression(max_iter=1000),
|
| 176 |
+
"SVC": SVC()
|
| 177 |
}
|
| 178 |
+
|
| 179 |
results = []
|
| 180 |
+
for name, model in models.items():
|
| 181 |
+
scores = cross_val_score(model, X, y, cv=5)
|
| 182 |
+
results.append({
|
| 183 |
+
"Model": name,
|
| 184 |
+
"CV Mean Accuracy": np.mean(scores),
|
| 185 |
+
"CV Std Dev": np.std(scores)
|
| 186 |
+
})
|
| 187 |
+
wandb.log({f"{name}_cv_mean": np.mean(scores), f"{name}_cv_std": np.std(scores)})
|
| 188 |
+
|
| 189 |
+
results_df = pd.DataFrame(results)
|
| 190 |
+
return results_df
|
| 191 |
+
|
| 192 |
+
# 1. prepare_data should come first
|
| 193 |
+
def prepare_data(df, target_column=None):
|
| 194 |
+
from sklearn.model_selection import train_test_split
|
| 195 |
+
|
| 196 |
+
# If no target column is specified, select the first object column or the last column
|
| 197 |
+
if target_column is None:
|
| 198 |
+
target_column = df.select_dtypes(include=['object']).columns[0] if len(df.select_dtypes(include=['object']).columns) > 0 else df.columns[-1]
|
| 199 |
+
|
| 200 |
+
X = df.drop(columns=[target_column])
|
| 201 |
+
y = df[target_column]
|
| 202 |
+
|
| 203 |
+
return train_test_split(X, y, test_size=0.3, random_state=42)
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def train_model(_):
|
| 207 |
+
try:
|
| 208 |
+
wandb.login(key=os.environ.get("WANDB_API_KEY"))
|
| 209 |
+
wandb_run = wandb.init(
|
| 210 |
+
project="huggingface-data-analysis",
|
| 211 |
+
name=f"Optuna_Run_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
| 212 |
+
reinit=True
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
X_train, X_test, y_train, y_test = prepare_data(df_global)
|
| 216 |
+
|
| 217 |
+
def objective(trial):
|
| 218 |
+
params = {
|
| 219 |
+
"n_estimators": trial.suggest_int("n_estimators", 50, 200),
|
| 220 |
+
"max_depth": trial.suggest_int("max_depth", 3, 10),
|
| 221 |
+
}
|
| 222 |
+
model = RandomForestClassifier()
|
| 223 |
+
score = cross_val_score(model, X_train, y_train, cv=3).mean()
|
| 224 |
+
wandb.log({**params, "cv_score": score})
|
| 225 |
+
return score
|
| 226 |
+
|
| 227 |
+
study = optuna.create_study(direction="maximize")
|
| 228 |
+
study.optimize(objective, n_trials=15)
|
| 229 |
+
|
| 230 |
+
best_params = study.best_params
|
| 231 |
+
model = RandomForestClassifier()
|
| 232 |
model.fit(X_train, y_train)
|
| 233 |
y_pred = model.predict(X_test)
|
| 234 |
+
|
| 235 |
+
|
| 236 |
metrics = {
|
| 237 |
+
"accuracy": accuracy_score(y_test, y_pred),
|
| 238 |
+
"precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
|
| 239 |
+
"recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
|
| 240 |
+
"f1_score": f1_score(y_test, y_pred, average="weighted", zero_division=0),
|
|
|
|
|
|
|
| 241 |
}
|
| 242 |
+
wandb.log(metrics)
|
| 243 |
+
wandb_run.finish()
|
| 244 |
+
|
| 245 |
+
# Top 7 trials
|
| 246 |
+
top_trials = sorted(study.trials, key=lambda x: x.value, reverse=True)[:7]
|
| 247 |
+
trial_rows = []
|
| 248 |
+
for t in top_trials:
|
| 249 |
+
row = t.params.copy()
|
| 250 |
+
row["score"] = t.value
|
| 251 |
+
trial_rows.append(row)
|
| 252 |
+
trials_df = pd.DataFrame(trial_rows)
|
| 253 |
+
|
| 254 |
+
return metrics, trials_df
|
| 255 |
+
|
| 256 |
+
except Exception as e:
|
| 257 |
+
print(f"Training Error: {e}")
|
| 258 |
+
return {}, pd.DataFrame()
|
| 259 |
+
|
| 260 |
+
# Added a/b functions to existing code
|
| 261 |
+
def create_model_comparison_plots(results_df):
|
| 262 |
+
"""Create visualizations for model comparison results"""
|
| 263 |
+
os.makedirs('./comparison_plots', exist_ok=True)
|
| 264 |
+
plot_paths = []
|
| 265 |
+
|
| 266 |
+
# Model performance comparison
|
| 267 |
+
plt.figure(figsize=(12, 6))
|
| 268 |
+
sns.barplot(data=results_df, x='Model', y='Test Accuracy')
|
| 269 |
+
plt.title('Model Accuracy Comparison')
|
| 270 |
+
plt.xticks(rotation=45)
|
| 271 |
+
accuracy_path = './comparison_plots/accuracy_comparison.png'
|
| 272 |
+
plt.savefig(accuracy_path, bbox_inches='tight')
|
| 273 |
+
plot_paths.append(accuracy_path)
|
| 274 |
+
plt.close()
|
| 275 |
+
|
| 276 |
+
# Metric radar chart
|
| 277 |
+
metrics = ['Test Accuracy', 'Precision', 'Recall', 'F1 Score']
|
| 278 |
+
if not results_df['ROC AUC'].isna().all():
|
| 279 |
+
metrics.append('ROC AUC')
|
| 280 |
+
|
| 281 |
+
plt.figure(figsize=(10, 10))
|
| 282 |
+
ax = plt.subplot(111, polar=True)
|
| 283 |
+
|
| 284 |
+
angles = np.linspace(0, 2*np.pi, len(metrics), endpoint=False)
|
| 285 |
+
angles = np.concatenate((angles, [angles[0]]))
|
| 286 |
+
|
| 287 |
+
for idx, row in results_df.iterrows():
|
| 288 |
+
values = row[metrics].values.flatten().tolist()
|
| 289 |
+
values += values[:1]
|
| 290 |
+
ax.plot(angles, values, 'o-', label=row['Model'])
|
| 291 |
+
|
| 292 |
+
ax.set_thetagrids(angles[:-1] * 180/np.pi, metrics)
|
| 293 |
+
ax.set_title('Model Performance Radar Chart')
|
| 294 |
+
ax.legend(bbox_to_anchor=(1.1, 1.1))
|
| 295 |
+
radar_path = './comparison_plots/radar_chart.png'
|
| 296 |
+
plt.savefig(radar_path, bbox_inches='tight')
|
| 297 |
+
plot_paths.append(radar_path)
|
| 298 |
+
plt.close()
|
| 299 |
+
|
| 300 |
+
return plot_paths
|
| 301 |
+
|
| 302 |
+
def compare_models_enhanced():
|
| 303 |
+
"""Enhanced model comparison with more metrics and visualizations"""
|
| 304 |
+
if df_global is None:
|
| 305 |
+
return "Please upload and preprocess a dataset first.", [], []
|
| 306 |
+
|
| 307 |
+
target = df_global.columns[-1]
|
| 308 |
+
X = df_global.drop(target, axis=1)
|
| 309 |
+
y = df_global[target]
|
| 310 |
+
|
| 311 |
if y.dtype == 'object':
|
| 312 |
y = LabelEncoder().fit_transform(y)
|
| 313 |
+
|
| 314 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 315 |
+
|
| 316 |
+
# Define models to compare
|
| 317 |
+
models = {
|
|
|
|
| 318 |
"RandomForest": RandomForestClassifier(),
|
| 319 |
"LogisticRegression": LogisticRegression(max_iter=1000),
|
| 320 |
+
"GradientBoosting": GradientBoostingClassifier(),
|
| 321 |
+
"XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
|
| 322 |
"SVC": SVC(probability=True)
|
| 323 |
}
|
| 324 |
+
|
| 325 |
results = []
|
| 326 |
+
|
| 327 |
+
for name, model in models.items():
|
| 328 |
+
start_time = time.time()
|
| 329 |
+
|
| 330 |
+
# Cross validation
|
| 331 |
+
cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
|
| 332 |
+
|
| 333 |
+
# Full training and test evaluation
|
| 334 |
model.fit(X_train, y_train)
|
| 335 |
+
y_pred = model.predict(X_test)
|
| 336 |
+
y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None
|
| 337 |
+
|
| 338 |
+
# Calculate metrics
|
| 339 |
metrics = {
|
| 340 |
+
'Model': name,
|
| 341 |
+
'CV Mean Accuracy': np.mean(cv_scores),
|
| 342 |
+
'CV Std Dev': np.std(cv_scores),
|
| 343 |
+
'Test Accuracy': accuracy_score(y_test, y_pred),
|
| 344 |
+
'Precision': precision_score(y_test, y_pred, average='weighted'),
|
| 345 |
+
'Recall': recall_score(y_test, y_pred, average='weighted'),
|
| 346 |
+
'F1 Score': f1_score(y_test, y_pred, average='weighted'),
|
| 347 |
+
'ROC AUC': roc_auc_score(y_test, y_proba) if y_proba is not None and len(np.unique(y_test)) == 2 else np.nan,
|
| 348 |
+
'Training Time (s)': time.time() - start_time
|
| 349 |
}
|
| 350 |
+
|
| 351 |
results.append(metrics)
|
| 352 |
+
|
| 353 |
+
# Log to wandb
|
| 354 |
+
if wandb.run:
|
| 355 |
+
wandb.log({f"{name}_{k}": v for k, v in metrics.items() if k != 'Model'})
|
| 356 |
+
|
| 357 |
+
# Create visualizations
|
| 358 |
+
results_df = pd.DataFrame(results)
|
| 359 |
+
plot_paths = create_model_comparison_plots(results_df)
|
| 360 |
+
|
| 361 |
+
return results_df, plot_paths
|
| 362 |
+
|
| 363 |
+
def perform_ab_test(model_a_name, model_b_name):
|
| 364 |
+
"""Perform A/B test between two specific models"""
|
| 365 |
+
if df_global is None:
|
| 366 |
+
return {"error": "Please upload and preprocess a dataset first."}, []
|
| 367 |
+
|
| 368 |
+
target = df_global.columns[-1]
|
| 369 |
+
X = df_global.drop(target, axis=1)
|
| 370 |
+
y = df_global[target]
|
| 371 |
+
|
| 372 |
+
if y.dtype == 'object':
|
| 373 |
+
y = LabelEncoder().fit_transform(y)
|
| 374 |
+
|
| 375 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 376 |
+
|
| 377 |
+
# Define all available models
|
| 378 |
+
model_library = {
|
| 379 |
+
"RandomForest": RandomForestClassifier(),
|
| 380 |
+
"LogisticRegression": LogisticRegression(max_iter=1000),
|
| 381 |
+
"GradientBoosting": GradientBoostingClassifier(),
|
| 382 |
+
"XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
|
| 383 |
+
"SVC": SVC(probability=True)
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
# Get the selected models
|
| 387 |
+
model_a = model_library.get(model_a_name)
|
| 388 |
+
model_b = model_library.get(model_b_name)
|
| 389 |
+
|
| 390 |
+
if not model_a or not model_b:
|
| 391 |
+
return {"error": "Invalid model selection"}, []
|
| 392 |
+
|
| 393 |
+
# Train both models
|
| 394 |
+
model_a.fit(X_train, y_train)
|
| 395 |
+
model_b.fit(X_train, y_train)
|
| 396 |
+
|
| 397 |
+
# Get predictions
|
| 398 |
+
y_pred_a = model_a.predict(X_test)
|
| 399 |
+
y_pred_b = model_b.predict(X_test)
|
| 400 |
+
|
| 401 |
+
# Calculate metrics
|
| 402 |
+
metrics_a = {
|
| 403 |
+
'accuracy': accuracy_score(y_test, y_pred_a),
|
| 404 |
+
'precision': precision_score(y_test, y_pred_a, average='weighted'),
|
| 405 |
+
'recall': recall_score(y_test, y_pred_a, average='weighted'),
|
| 406 |
+
'f1': f1_score(y_test, y_pred_a, average='weighted')
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
metrics_b = {
|
| 410 |
+
'accuracy': accuracy_score(y_test, y_pred_b),
|
| 411 |
+
'precision': precision_score(y_test, y_pred_b, average='weighted'),
|
| 412 |
+
'recall': recall_score(y_test, y_pred_b, average='weighted'),
|
| 413 |
+
'f1': f1_score(y_test, y_pred_b, average='weighted')
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
# Calculate relative improvements
|
| 417 |
+
improvements = {
|
| 418 |
+
'accuracy_improvement': metrics_b['accuracy'] - metrics_a['accuracy'],
|
| 419 |
+
'f1_improvement': metrics_b['f1'] - metrics_a['f1'],
|
| 420 |
+
'relative_improvement': (metrics_b['accuracy'] - metrics_a['accuracy']) / metrics_a['accuracy'] if metrics_a['accuracy'] != 0 else 0
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
# Create comparison DataFrame
|
| 424 |
+
comparison_df = pd.DataFrame({
|
| 425 |
+
'Metric': list(metrics_a.keys()),
|
| 426 |
+
model_a_name: list(metrics_a.values()),
|
| 427 |
+
model_b_name: list(metrics_b.values())
|
| 428 |
+
})
|
| 429 |
+
|
| 430 |
+
# Log to wandb
|
| 431 |
+
if wandb.run:
|
| 432 |
+
wandb.log({
|
| 433 |
+
f"A_B_Test/{model_a_name}_metrics": metrics_a,
|
| 434 |
+
f"A_B_Test/{model_b_name}_metrics": metrics_b,
|
| 435 |
+
f"A_B_Test/Improvements": improvements
|
| 436 |
+
})
|
| 437 |
+
|
| 438 |
+
# Create visualization
|
| 439 |
+
plt.figure(figsize=(10, 6))
|
| 440 |
+
comparison_df.set_index('Metric').plot(kind='bar', rot=0)
|
| 441 |
+
plt.title(f'A/B Test: {model_a_name} vs {model_b_name}')
|
| 442 |
+
plt.ylabel('Score')
|
| 443 |
+
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
| 444 |
+
plot_path = './comparison_plots/ab_test_comparison.png'
|
| 445 |
+
plt.savefig(plot_path, bbox_inches='tight')
|
| 446 |
+
plt.close()
|
| 447 |
+
|
| 448 |
+
return {
|
| 449 |
+
'metrics_comparison': comparison_df.to_dict(),
|
| 450 |
+
'improvements': improvements
|
| 451 |
+
}, [plot_path]
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
def explainability(_):
|
| 456 |
+
import warnings
|
| 457 |
+
warnings.filterwarnings("ignore")
|
| 458 |
+
|
| 459 |
+
target = df_global.columns[-1]
|
| 460 |
+
X = df_global.drop(target, axis=1)
|
| 461 |
+
y = df_global[target]
|
| 462 |
+
|
| 463 |
+
if y.dtype == "object":
|
| 464 |
+
y = LabelEncoder().fit_transform(y)
|
| 465 |
+
|
| 466 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
| 467 |
+
|
| 468 |
+
model = RandomForestClassifier()
|
| 469 |
+
model.fit(X_train, y_train)
|
| 470 |
+
|
| 471 |
+
explainer = shap.TreeExplainer(model)
|
| 472 |
+
shap_values = explainer.shap_values(X_test)
|
| 473 |
+
|
| 474 |
+
try:
|
| 475 |
+
if isinstance(shap_values, list):
|
| 476 |
+
class_idx = 0
|
| 477 |
+
sv = shap_values[class_idx]
|
| 478 |
+
else:
|
| 479 |
+
sv = shap_values
|
| 480 |
+
|
| 481 |
+
# Ensure 2D input shape for SHAP plot
|
| 482 |
+
if len(sv.shape) > 2:
|
| 483 |
+
sv = sv.reshape(sv.shape[0], -1) # Flatten any extra dimensions
|
| 484 |
+
|
| 485 |
+
# Use safe feature names if mismatch, fallback to dummy
|
| 486 |
+
num_features = sv.shape[1]
|
| 487 |
+
if num_features <= X_test.shape[1]:
|
| 488 |
+
feature_names = X_test.columns[:num_features]
|
| 489 |
+
else:
|
| 490 |
+
feature_names = [f"Feature_{i}" for i in range(num_features)]
|
| 491 |
+
|
| 492 |
+
X_shap_safe = pd.DataFrame(np.zeros_like(sv), columns=feature_names)
|
| 493 |
+
|
| 494 |
+
shap.summary_plot(sv, X_shap_safe, show=False)
|
| 495 |
+
shap_path = "./shap_plot.png"
|
| 496 |
+
plt.title("SHAP Summary")
|
| 497 |
+
plt.savefig(shap_path)
|
| 498 |
+
if wandb.run:
|
| 499 |
+
wandb.log({"shap_summary": wandb.Image(shap_path)})
|
| 500 |
+
plt.clf()
|
| 501 |
+
|
| 502 |
+
except Exception as e:
|
| 503 |
+
shap_path = "./shap_error.png"
|
| 504 |
+
print("SHAP plotting failed:", e)
|
| 505 |
+
plt.figure(figsize=(6, 3))
|
| 506 |
+
plt.text(0.5, 0.5, f"SHAP Error:\n{str(e)}", ha='center', va='center')
|
| 507 |
+
plt.axis('off')
|
| 508 |
+
plt.savefig(shap_path)
|
| 509 |
+
if wandb.run:
|
| 510 |
+
wandb.log({"shap_error": wandb.Image(shap_path)})
|
| 511 |
+
plt.clf()
|
| 512 |
+
|
| 513 |
+
# LIME
|
| 514 |
+
lime_explainer = lime.lime_tabular.LimeTabularExplainer(
|
| 515 |
+
X_train.values,
|
| 516 |
+
feature_names=X_train.columns.tolist(),
|
| 517 |
+
class_names=[str(c) for c in np.unique(y_train)],
|
| 518 |
+
mode='classification'
|
| 519 |
+
)
|
| 520 |
+
lime_exp = lime_explainer.explain_instance(X_test.iloc[0].values, model.predict_proba)
|
| 521 |
+
lime_fig = lime_exp.as_pyplot_figure()
|
| 522 |
+
lime_path = "./lime_plot.png"
|
| 523 |
+
lime_fig.savefig(lime_path)
|
| 524 |
+
if wandb.run:
|
| 525 |
+
wandb.log({"lime_explanation": wandb.Image(lime_path)})
|
| 526 |
+
plt.clf()
|
| 527 |
+
|
| 528 |
+
return shap_path, lime_path
|
| 529 |
+
|
| 530 |
+
with gr.Blocks() as demo:
|
| 531 |
+
gr.Markdown("## 📊 AI-Powered Data Analysis with Hyperparameter Optimization")
|
| 532 |
+
|
| 533 |
+
with gr.Row():
|
| 534 |
+
with gr.Column():
|
| 535 |
+
file_input = gr.File(label="Upload CSV or Excel", type="filepath")
|
| 536 |
+
df_output = gr.DataFrame(label="Cleaned Data Preview")
|
| 537 |
+
file_input.change(fn=upload_file, inputs=file_input, outputs=df_output)
|
| 538 |
+
|
| 539 |
+
with gr.Column():
|
| 540 |
+
insights_output = gr.HTML(label="Insights from SmolAgent")
|
| 541 |
+
visual_output = gr.Gallery(label="Visualizations (Auto-generated by Agent)", columns=2)
|
| 542 |
+
agent_btn = gr.Button("Run AI Agent (5 Insights + 5 Visualizations)")
|
| 543 |
+
|
| 544 |
+
with gr.Row():
|
| 545 |
+
train_btn = gr.Button("Train Model with Optuna + WandB")
|
| 546 |
+
metrics_output = gr.JSON(label="Performance Metrics")
|
| 547 |
+
trials_output = gr.DataFrame(label="Top 7 Hyperparameter Trials")
|
| 548 |
+
|
| 549 |
+
with gr.Row():
|
| 550 |
+
explain_btn = gr.Button("SHAP + LIME Explainability")
|
| 551 |
+
shap_img = gr.Image(label="SHAP Summary Plot")
|
| 552 |
+
lime_img = gr.Image(label="LIME Explanation")
|
| 553 |
+
|
| 554 |
+
# Add new A/B testing components
|
| 555 |
+
with gr.Row():
|
| 556 |
+
with gr.Column():
|
| 557 |
+
gr.Markdown("### 🆚 Model A/B Testing")
|
| 558 |
+
with gr.Row():
|
| 559 |
+
model_a_select = gr.Dropdown(
|
| 560 |
+
choices=["RandomForest", "LogisticRegression", "GradientBoosting", "XGBoost", "SVC"],
|
| 561 |
+
label="Select Model A",
|
| 562 |
+
value="RandomForest"
|
| 563 |
+
)
|
| 564 |
+
model_b_select = gr.Dropdown(
|
| 565 |
+
choices=["RandomForest", "LogisticRegression", "GradientBoosting", "XGBoost", "SVC"],
|
| 566 |
+
label="Select Model B",
|
| 567 |
+
value="LogisticRegression"
|
| 568 |
)
|
| 569 |
+
ab_test_btn = gr.Button("Run A/B Test")
|
| 570 |
+
|
| 571 |
+
with gr.Column():
|
| 572 |
+
ab_test_results = gr.JSON(label="A/B Test Results")
|
| 573 |
+
ab_test_plots = gr.Gallery(label="A/B Test Visualizations")
|
| 574 |
+
|
| 575 |
+
# Add model comparison components
|
| 576 |
+
with gr.Row():
|
| 577 |
+
compare_btn = gr.Button("Compare All Models")
|
| 578 |
+
comparison_results = gr.DataFrame(label="Model Comparison Results")
|
| 579 |
+
comparison_plots = gr.Gallery(label="Comparison Visualizations")
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
|
| 583 |
+
|
| 584 |
+
agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
|
| 585 |
+
train_btn.click(fn=train_model, inputs=[file_input], outputs=[metrics_output, trials_output])
|
| 586 |
+
explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
|
| 587 |
+
|
| 588 |
+
# New handlers for A/B testing and comparison
|
| 589 |
+
ab_test_btn.click(
|
| 590 |
+
fn=perform_ab_test,
|
| 591 |
+
inputs=[model_a_select, model_b_select],
|
| 592 |
+
outputs=[ab_test_results, ab_test_plots]
|
| 593 |
+
)
|
| 594 |
+
|
| 595 |
+
compare_btn.click(
|
| 596 |
+
fn=compare_models_enhanced,
|
| 597 |
+
inputs=[],
|
| 598 |
+
outputs=[comparison_results, comparison_plots]
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
|
| 602 |
+
|
| 603 |
+
demo.launch(debug=True)
|