pavanmutha commited on
Commit
a27678b
·
verified ·
1 Parent(s): df3617f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -39
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import gradio as gr
3
  import pandas as pd
@@ -7,44 +8,35 @@ import shap
7
  import lime.lime_tabular
8
  import optuna
9
  import wandb
10
- import ast
11
  from smolagents import HfApiModel, CodeAgent
12
  from huggingface_hub import login
13
  from sklearn.ensemble import RandomForestClassifier
14
  from sklearn.model_selection import train_test_split, cross_val_score
15
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 
16
 
17
- # Authenticate Hugging Face Hub
18
  hf_token = os.getenv("HF_TOKEN")
19
  login(token=hf_token)
20
 
21
- # Setup SmolAgent with LLM
22
  model = HfApiModel("mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
23
  agent = CodeAgent(
24
  tools=[],
25
  model=model,
26
- additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn"],
27
- )
28
 
29
- # Data cleaning function
30
- from sklearn.preprocessing import LabelEncoder
31
 
32
  def clean_data(df):
33
  df = df.dropna(how='all', axis=1).dropna(how='all', axis=0)
34
-
35
- # Encode categorical features
36
  for col in df.select_dtypes(include='object').columns:
37
  df[col] = df[col].astype(str)
38
  df[col] = LabelEncoder().fit_transform(df[col])
39
-
40
  df = df.fillna(df.mean(numeric_only=True))
41
  return df
42
 
43
-
44
- # Global dataframe
45
- df_global = None
46
-
47
- # Upload and clean
48
  def upload_file(file):
49
  global df_global
50
  ext = os.path.splitext(file.name)[-1]
@@ -53,7 +45,6 @@ def upload_file(file):
53
  df_global = df
54
  return df.head()
55
 
56
- # Run SmolAgent for analysis
57
  def run_agent(_):
58
  try:
59
  output = agent.run(
@@ -64,7 +55,6 @@ def run_agent(_):
64
  except Exception as e:
65
  return f"SmolAgent Error: {str(e)}"
66
 
67
- # Train model + Optuna + WandB
68
  def train_model(_):
69
  wandb.login(key=os.environ.get("WANDB_API_KEY"))
70
  wandb_run = wandb.init(project="huggingface-data-analysis", name="Optuna_Run", reinit=True)
@@ -72,6 +62,10 @@ def train_model(_):
72
  target = df_global.columns[-1]
73
  X = df_global.drop(target, axis=1)
74
  y = df_global[target]
 
 
 
 
75
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
76
 
77
  def objective(trial):
@@ -104,42 +98,55 @@ def train_model(_):
104
  top_trials = pd.DataFrame(study.trials_dataframe().sort_values(by="value", ascending=False).head(7))
105
  return metrics, top_trials
106
 
107
- # SHAP & LIME
108
  def explainability(_):
109
  target = df_global.columns[-1]
110
  X = df_global.drop(target, axis=1)
111
  y = df_global[target]
112
 
 
 
 
 
 
113
  model = RandomForestClassifier()
114
- model.fit(X, y)
115
 
116
- # SHAP
117
  explainer = shap.TreeExplainer(model)
118
- shap_values = explainer.shap_values(X)
119
- shap.summary_plot(shap_values, X, show=False)
120
- shap_fig_path = "./shap_plot.png"
121
- plt.savefig(shap_fig_path)
122
- plt.clf()
123
-
124
- # LIME
125
- lime_explainer = lime.lime_tabular.LimeTabularExplainer(X.values, feature_names=X.columns, class_names=['target'], mode="classification")
126
- lime_exp = lime_explainer.explain_instance(X.iloc[0].values, model.predict_proba)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  lime_fig = lime_exp.as_pyplot_figure()
128
  lime_fig_path = "./lime_plot.png"
129
  lime_fig.savefig(lime_fig_path)
 
130
  plt.clf()
131
 
132
- # Log to wandb
133
- wandb.init(project="huggingface-data-analysis", name="Explainability", reinit=True)
134
- wandb.log({
135
- "shap_summary": wandb.Image(shap_fig_path),
136
- "lime_explanation": wandb.Image(lime_fig_path)
137
- })
138
- wandb.finish()
139
-
140
- return shap_fig_path, lime_fig_path
141
 
142
- # Gradio UI
143
  with gr.Blocks() as demo:
144
  gr.Markdown("## 📊 AI-Powered Data Analysis with Hyperparameter Optimization")
145
 
 
1
+
2
  import os
3
  import gradio as gr
4
  import pandas as pd
 
8
  import lime.lime_tabular
9
  import optuna
10
  import wandb
 
11
  from smolagents import HfApiModel, CodeAgent
12
  from huggingface_hub import login
13
  from sklearn.ensemble import RandomForestClassifier
14
  from sklearn.model_selection import train_test_split, cross_val_score
15
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
16
+ from sklearn.preprocessing import LabelEncoder
17
 
18
+ # Authenticate with Hugging Face
19
  hf_token = os.getenv("HF_TOKEN")
20
  login(token=hf_token)
21
 
22
+ # SmolAgent initialization
23
  model = HfApiModel("mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
24
  agent = CodeAgent(
25
  tools=[],
26
  model=model,
27
+ additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn"]
28
+ )
29
 
30
+ df_global = None
 
31
 
32
  def clean_data(df):
33
  df = df.dropna(how='all', axis=1).dropna(how='all', axis=0)
 
 
34
  for col in df.select_dtypes(include='object').columns:
35
  df[col] = df[col].astype(str)
36
  df[col] = LabelEncoder().fit_transform(df[col])
 
37
  df = df.fillna(df.mean(numeric_only=True))
38
  return df
39
 
 
 
 
 
 
40
  def upload_file(file):
41
  global df_global
42
  ext = os.path.splitext(file.name)[-1]
 
45
  df_global = df
46
  return df.head()
47
 
 
48
  def run_agent(_):
49
  try:
50
  output = agent.run(
 
55
  except Exception as e:
56
  return f"SmolAgent Error: {str(e)}"
57
 
 
58
  def train_model(_):
59
  wandb.login(key=os.environ.get("WANDB_API_KEY"))
60
  wandb_run = wandb.init(project="huggingface-data-analysis", name="Optuna_Run", reinit=True)
 
62
  target = df_global.columns[-1]
63
  X = df_global.drop(target, axis=1)
64
  y = df_global[target]
65
+
66
+ if y.dtype == "object":
67
+ y = LabelEncoder().fit_transform(y)
68
+
69
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
70
 
71
  def objective(trial):
 
98
  top_trials = pd.DataFrame(study.trials_dataframe().sort_values(by="value", ascending=False).head(7))
99
  return metrics, top_trials
100
 
 
101
  def explainability(_):
102
  target = df_global.columns[-1]
103
  X = df_global.drop(target, axis=1)
104
  y = df_global[target]
105
 
106
+ if y.dtype == "object":
107
+ y = LabelEncoder().fit_transform(y)
108
+
109
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
110
+
111
  model = RandomForestClassifier()
112
+ model.fit(X_train, y_train)
113
 
 
114
  explainer = shap.TreeExplainer(model)
115
+ shap_values = explainer.shap_values(X_test)
116
+
117
+ shap_path = None
118
+ if isinstance(shap_values, list):
119
+ for i, class_vals in enumerate(shap_values):
120
+ shap.summary_plot(class_vals, X_test, show=False)
121
+ class_path = f"./shap_class_{i}.png"
122
+ plt.title(f"SHAP Summary - Class {i}")
123
+ plt.savefig(class_path)
124
+ wandb.log({f"shap_class_{i}": wandb.Image(class_path)})
125
+ plt.clf()
126
+ if shap_path is None:
127
+ shap_path = class_path
128
+ else:
129
+ shap.summary_plot(shap_values, X_test, show=False)
130
+ shap_path = "./shap_plot.png"
131
+ plt.savefig(shap_path)
132
+ wandb.log({"shap_summary": wandb.Image(shap_path)})
133
+ plt.clf()
134
+
135
+ lime_explainer = lime.lime_tabular.LimeTabularExplainer(
136
+ X_train.values,
137
+ feature_names=X_train.columns.tolist(),
138
+ class_names=[str(label) for label in np.unique(y_train)],
139
+ mode='classification'
140
+ )
141
+ lime_exp = lime_explainer.explain_instance(X_test.iloc[0].values, model.predict_proba)
142
  lime_fig = lime_exp.as_pyplot_figure()
143
  lime_fig_path = "./lime_plot.png"
144
  lime_fig.savefig(lime_fig_path)
145
+ wandb.log({"lime_explanation": wandb.Image(lime_fig_path)})
146
  plt.clf()
147
 
148
+ return shap_path, lime_fig_path
 
 
 
 
 
 
 
 
149
 
 
150
  with gr.Blocks() as demo:
151
  gr.Markdown("## 📊 AI-Powered Data Analysis with Hyperparameter Optimization")
152