clementBE commited on
Commit
3750790
Β·
verified Β·
1 Parent(s): 8b22417

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -100
app.py CHANGED
@@ -5,77 +5,66 @@ from sklearn.model_selection import train_test_split
5
  from sklearn.ensemble import RandomForestClassifier
6
  from sklearn.metrics import classification_report
7
 
8
- # βœ… Load data from CSV or Excel
 
 
 
 
9
  def load_data(file):
 
10
  try:
11
- # πŸ”„ Use file object directly to avoid FileNotFoundError
12
- if file.name.endswith(".csv"):
13
- df = pd.read_csv(file)
14
  else:
15
- df = pd.read_excel(file)
16
-
17
- preview = df.head(5)
18
- summary = pd.DataFrame({
19
- "Column": df.columns,
20
- "Data Type": [df[col].dtype for col in df.columns],
21
- "Missing (%)": [df[col].isnull().mean() * 100 for col in df.columns]
22
- })
23
 
24
- return df, df.columns.tolist(), preview, summary.to_markdown(), ""
25
  except Exception as e:
26
- return None, [], pd.DataFrame(), "", f"❌ Error loading file: {e}"
27
-
28
- # βœ… Preprocess DataFrame
29
- def preprocess_dataframe(df, quantile_binning=False, count_words=False):
30
- df = df.copy()
31
-
32
- # βž• Add _qbin columns for numeric columns
33
- if quantile_binning:
34
- numeric_cols = df.select_dtypes(include=np.number).columns
35
- for col in numeric_cols:
36
- try:
37
- df[col + "_qbin"] = pd.qcut(df[col], q=4, labels=False, duplicates='drop')
38
- except:
39
- pass # Some columns can't be binned (e.g., constant values)
40
-
41
- # βž• Add _wordcount columns for text columns
42
- if count_words:
43
- text_cols = df.select_dtypes(include="object").columns
44
- for col in text_cols:
45
- df[col + "_wordcount"] = df[col].astype(str).apply(lambda x: len(x.split()))
46
-
47
- return df
48
-
49
- # βœ… Handle file input and update UI
50
- def on_file_change(file, quantile_binning, count_words):
51
- df, _, preview, summary_md, error = load_data(file)
52
- if df is None:
53
- return None, gr.update(choices=[], value=None), gr.update(choices=[], value=[]), pd.DataFrame(), "", "", "", error
54
-
55
- # πŸ”„ Preprocess data and get new columns
56
- df_processed = preprocess_dataframe(df, quantile_binning, count_words)
57
- columns = list(df_processed.columns)
58
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  return (
60
- df_processed, # Save in state
61
- gr.update(choices=columns, value=None), # Update target dropdown
62
- gr.update(choices=columns, value=[]), # Update feature selector
63
- preview, # Show original preview
64
- summary_md, # Show summary table
65
- df_processed.head(100), # Show processed data
66
- "", "", # Clear output and help box
67
  )
68
 
69
- # βœ… Train model
70
- def train_model(df, target, features):
71
- if df is None or target is None or not features:
72
- return "⚠️ Please upload data, select a target column and features.", ""
 
 
 
 
 
73
 
74
  try:
75
- X = df[features]
76
- y = df[target]
77
 
78
- # Handle categorical features
79
  X = pd.get_dummies(X)
80
 
81
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
@@ -83,61 +72,54 @@ def train_model(df, target, features):
83
  clf.fit(X_train, y_train)
84
  y_pred = clf.predict(X_test)
85
 
86
- report = classification_report(y_test, y_pred)
87
- help_text = "βœ… Model trained successfully. You can review the metrics below."
88
- return report, help_text
89
-
90
  except Exception as e:
91
- return f"❌ Error training model: {e}", ""
92
-
93
- # βœ… Gradio App Interface
94
- with gr.Blocks(title="πŸ“Š ML Model Trainer with Quartiles and Word Counts") as app:
95
- gr.Markdown("## 🧠 Train a Machine Learning Model on Your Dataset")
96
 
97
- with gr.Row():
98
- file_input = gr.File(label="πŸ“ Upload CSV or Excel", file_types=[".csv", ".xls", ".xlsx"])
99
- quantile_option = gr.Checkbox(label="Discretize into Quartiles", value=True)
100
- wordcount_option = gr.Checkbox(label="Count Words in Text Columns", value=True)
101
 
 
102
  with gr.Row():
103
- target_col = gr.Dropdown(label="🎯 Select Target Column")
104
- feature_cols = gr.CheckboxGroup(label="🧠 Select Feature Columns")
105
 
106
- with gr.Row():
107
- df_state = gr.State()
108
 
109
- with gr.Row():
110
- table_preview = gr.DataFrame(label="πŸ“‹ Data Preview")
111
- processed_preview = gr.DataFrame(label="πŸ” Processed Data (100 rows)")
 
112
 
113
- data_summary = gr.Markdown()
 
 
114
 
115
- with gr.Row():
116
- train_button = gr.Button("πŸš€ Train Model")
117
- output = gr.Textbox(label="πŸ“Š Classification Report", lines=10)
118
- help_box = gr.Textbox(label="ℹ️ Status", interactive=False)
119
 
120
- # πŸ”„ Events
121
  file_input.change(
122
- fn=on_file_change,
123
- inputs=[file_input, quantile_option, wordcount_option],
124
- outputs=[
125
- df_state,
126
- target_col,
127
- feature_cols,
128
- table_preview,
129
- data_summary,
130
- processed_preview,
131
- output,
132
- help_box,
133
- ],
134
  )
135
 
 
136
  train_button.click(
137
  fn=train_model,
138
- inputs=[df_state, target_col, feature_cols],
139
- outputs=[output, help_box],
140
  )
141
 
142
- # πŸ” Launch the app
143
  app.launch()
 
5
  from sklearn.ensemble import RandomForestClassifier
6
  from sklearn.metrics import classification_report
7
 
8
+ # Global states for original and processed data
9
+ original_df = None
10
+ processed_df = None
11
+
12
+ # βœ… STEP 1: Load file
13
  def load_data(file):
14
+ global original_df
15
  try:
16
+ if file.name.endswith('.csv'):
17
+ original_df = pd.read_csv(file)
 
18
  else:
19
+ original_df = pd.read_excel(file)
 
 
 
 
 
 
 
20
 
21
+ return original_df.head(10), "βœ… File loaded successfully."
22
  except Exception as e:
23
+ return pd.DataFrame(), f"❌ Error: {e}"
24
+
25
+ # βœ… STEP 2: Process data (discretize + word count)
26
+ def process_data():
27
+ global original_df, processed_df
28
+
29
+ if original_df is None:
30
+ return pd.DataFrame(), gr.update(choices=[]), gr.update(choices=[]), "⚠️ Please load a dataset first."
31
+
32
+ df = original_df.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ # Discretize numeric columns into quartiles
35
+ for col in df.select_dtypes(include=np.number).columns:
36
+ try:
37
+ df[col + "_qbin"] = pd.qcut(df[col], 4, labels=False, duplicates='drop')
38
+ except:
39
+ pass
40
+
41
+ # Add word count for text columns
42
+ for col in df.select_dtypes(include='object').columns:
43
+ df[col + "_wordcount"] = df[col].astype(str).apply(lambda x: len(x.split()))
44
+
45
+ processed_df = df.copy()
46
  return (
47
+ df.head(10),
48
+ gr.update(choices=df.columns.tolist()),
49
+ gr.update(choices=df.columns.tolist()),
50
+ "βœ… Data processed: discretized and word counts added."
 
 
 
51
  )
52
 
53
+ # βœ… STEP 3: Train model
54
+ def train_model(target_col, feature_cols):
55
+ global processed_df
56
+
57
+ if processed_df is None:
58
+ return "⚠️ Please process your data first."
59
+
60
+ if not target_col or not feature_cols:
61
+ return "⚠️ Please select target and at least one feature."
62
 
63
  try:
64
+ X = processed_df[feature_cols]
65
+ y = processed_df[target_col]
66
 
67
+ # Handle categorical variables
68
  X = pd.get_dummies(X)
69
 
70
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
72
  clf.fit(X_train, y_train)
73
  y_pred = clf.predict(X_test)
74
 
75
+ return classification_report(y_test, y_pred)
 
 
 
76
  except Exception as e:
77
+ return f"❌ Model training failed: {e}"
 
 
 
 
78
 
79
+ # βœ… Gradio UI
80
+ with gr.Blocks(title="Step-by-Step Model Trainer") as app:
81
+ gr.Markdown("## 🧠 Step-by-Step Model Trainer with Discretization and Word Count")
 
82
 
83
+ # Step 1: Load file
84
  with gr.Row():
85
+ file_input = gr.File(label="πŸ“ Upload CSV or Excel")
86
+ load_output = gr.Textbox(label="ℹ️ File Load Status", interactive=False)
87
 
88
+ original_preview = gr.DataFrame(label="πŸ” Original Data (First 10 Rows)")
 
89
 
90
+ # Step 2: Process Data
91
+ process_button = gr.Button("βš™οΈ Apply Discretization + Word Count")
92
+ processed_preview = gr.DataFrame(label="πŸ”¬ Processed Data (First 10 Rows)")
93
+ process_status = gr.Textbox(label="ℹ️ Process Status", interactive=False)
94
 
95
+ # Step 3: Select Columns
96
+ target_selector = gr.Dropdown(label="🎯 Target Column")
97
+ feature_selector = gr.CheckboxGroup(label="πŸ“Š Feature Columns")
98
 
99
+ # Step 4: Train
100
+ train_button = gr.Button("πŸš€ Train Model")
101
+ train_output = gr.Textbox(label="πŸ“ˆ Classification Report", lines=10)
 
102
 
103
+ # Step 1: File input event
104
  file_input.change(
105
+ fn=load_data,
106
+ inputs=[file_input],
107
+ outputs=[original_preview, load_output]
108
+ )
109
+
110
+ # Step 2: Process data event
111
+ process_button.click(
112
+ fn=process_data,
113
+ inputs=[],
114
+ outputs=[processed_preview, target_selector, feature_selector, process_status]
 
 
115
  )
116
 
117
+ # Step 3 + 4: Train model event
118
  train_button.click(
119
  fn=train_model,
120
+ inputs=[target_selector, feature_selector],
121
+ outputs=[train_output]
122
  )
123
 
124
+ # Launch the app
125
  app.launch()