Mehak-Mazhar commited on
Commit
90b6905
·
verified ·
1 Parent(s): b911157

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -167
app.py CHANGED
@@ -1,181 +1,36 @@
1
  import pandas as pd
2
- import numpy as np
3
- import matplotlib.pyplot as plt
4
- from sklearn.model_selection import train_test_split, GridSearchCV
5
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
- from sklearn.impute import SimpleImputer
7
- from sklearn.compose import ColumnTransformer
8
- from sklearn.pipeline import Pipeline
9
- from sklearn.linear_model import LogisticRegression
10
- from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
11
  import gradio as gr
12
 
13
-
14
- # Function to load CSV/Excel
15
  def load_csv(file_path):
16
  try:
17
- df = pd.read_csv(file_path)
18
- except Exception as e:
19
  try:
20
- df = pd.read_excel(file_path)
21
- except Exception as e2:
22
- return None, f"Failed to read file: {e} / {e2}"
23
- return df, None
24
-
25
 
26
- # File upload handler
27
  def on_upload(file):
28
- if file is None:
29
- return gr.Dropdown.update(choices=[]), "No file uploaded", None, pd.DataFrame()
30
-
31
- df, err = load_csv(file.name) # use file.name to get path
32
  if err:
33
- return gr.Dropdown.update(choices=[]), f"Error: {err}", None, pd.DataFrame()
34
-
35
- cols = df.columns.tolist()
36
- status_msg = f"Loaded {len(df)} rows, {len(cols)} columns"
37
- preview_df = df.head(20)
38
- return gr.Dropdown.update(choices=cols, value=cols[-1] if cols else None), status_msg, df, preview_df
39
-
40
-
41
- # Build preprocessing pipeline
42
- def build_pipeline(df, target_col, impute_strategy, apply_scaling, encode_categorical):
43
- X = df.drop(columns=[target_col])
44
- numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()
45
- categorical_cols = X.select_dtypes(exclude=[np.number]).columns.tolist()
46
-
47
- transformers = []
48
- if numeric_cols:
49
- num_transformers = []
50
- if impute_strategy != 'none':
51
- num_transformers.append(('imputer', SimpleImputer(strategy=impute_strategy)))
52
- if apply_scaling:
53
- num_transformers.append(('scaler', StandardScaler()))
54
- from sklearn.pipeline import make_pipeline
55
- transformers.append(('num', make_pipeline(*[t[1] for t in num_transformers]), numeric_cols))
56
-
57
- if categorical_cols and encode_categorical:
58
- cat_transformer = Pipeline(steps=[
59
- ('imputer', SimpleImputer(strategy='most_frequent')),
60
- ('ohe', OneHotEncoder(handle_unknown='ignore', sparse=False))
61
- ])
62
- transformers.append(('cat', cat_transformer, categorical_cols))
63
-
64
- preprocessor = ColumnTransformer(transformers=transformers, remainder='passthrough') if transformers else 'passthrough'
65
-
66
- pipe = Pipeline(steps=[('preproc', preprocessor), ('clf', LogisticRegression(max_iter=200))])
67
- return pipe
68
-
69
-
70
- # Train model
71
- def train_model(df, target_col, test_size, random_state, impute_strategy, apply_scaling, encode_categorical,
72
- use_grid, c_min, c_max, c_steps, penalties, solver, cv_folds, max_iter, n_jobs):
73
-
74
- if df is None:
75
- return "No data loaded", None, None, None
76
- if target_col not in df.columns:
77
- return f"Target column '{target_col}' not found", None, None, None
78
-
79
- data = df.copy().dropna(subset=[target_col])
80
- y = data[target_col]
81
- if y.dtype == object or y.dtype.name == 'category' or y.dtype == bool:
82
- y = pd.factorize(y)[0]
83
-
84
- X = data.drop(columns=[target_col])
85
-
86
- X_train, X_test, y_train, y_test = train_test_split(
87
- X, y, test_size=test_size, random_state=random_state,
88
- stratify=y if len(np.unique(y)) > 1 else None
89
- )
90
-
91
- pipe = build_pipeline(pd.concat([X_train, y_train], axis=1), target_col, impute_strategy, apply_scaling, encode_categorical)
92
- pipe.named_steps['clf'].max_iter = max_iter
93
-
94
- if use_grid:
95
- C_values = np.linspace(c_min, c_max, int(max(1, c_steps)))
96
- param_grid = {
97
- 'clf__C': C_values,
98
- 'clf__penalty': penalties if penalties else ['l2'],
99
- 'clf__solver': [solver]
100
- }
101
- gs = GridSearchCV(pipe, param_grid, cv=cv_folds, n_jobs=n_jobs, scoring='accuracy')
102
- gs.fit(X_train, y_train)
103
- model = gs.best_estimator_
104
- extra = f"Best params: {gs.best_params_}"
105
- else:
106
- clf = pipe.named_steps['clf']
107
- clf.set_params(C=float((c_min + c_max) / 2), penalty=penalties[0] if penalties else 'l2', solver=solver)
108
- pipe.fit(X_train, y_train)
109
- model = pipe
110
- extra = "Trained with provided hyperparameters"
111
-
112
- test_pred = model.predict(X_test)
113
- acc = accuracy_score(y_test, test_pred)
114
- report = classification_report(y_test, test_pred)
115
- cm = confusion_matrix(y_test, test_pred)
116
-
117
- # Confusion matrix plot
118
- fig, ax = plt.subplots(figsize=(4, 4))
119
- ax.imshow(cm, interpolation='nearest')
120
- ax.set_title('Confusion matrix')
121
- ax.set_xlabel('Predicted')
122
- ax.set_ylabel('Actual')
123
- for i in range(cm.shape[0]):
124
- for j in range(cm.shape[1]):
125
- ax.text(j, i, str(cm[i, j]), ha='center', va='center',
126
- color='white' if cm[i, j] > cm.max() / 2 else 'black')
127
- plt.tight_layout()
128
-
129
- return f"Accuracy: {acc:.4f}\n{extra}", fig, report, str(model)
130
-
131
-
132
- # Gradio Interface
133
- with gr.Blocks(title="CSV -> Logistic Regression") as demo:
134
- gr.Markdown("## CSV → Logistic Regression with Hyperparameter Tuning")
135
-
136
- with gr.Row():
137
- with gr.Column():
138
- file_input = gr.File(label="Upload CSV/Excel file", file_types=['.csv', '.xls', '.xlsx'])
139
- load_status = gr.Textbox(label="File status", interactive=False)
140
- target_dropdown = gr.Dropdown(label="Select target column", choices=[])
141
- preview_output = gr.Dataframe(label="Data Preview", interactive=False)
142
-
143
- with gr.Row():
144
- with gr.Column():
145
- impute_radio = gr.Radio(['mean', 'median', 'most_frequent', 'none'], value='mean', label='Numeric imputation')
146
- scaler_checkbox = gr.Checkbox(label='Apply Standard Scaling', value=True)
147
- encode_checkbox = gr.Checkbox(label='One-Hot Encode categorical', value=True)
148
- test_size = gr.Slider(0.05, 0.5, value=0.2, step=0.05, label='Test size')
149
- random_state = gr.Number(value=42, precision=0, label='Random state')
150
-
151
- use_grid = gr.Checkbox(label='Use GridSearchCV', value=True)
152
- c_min = gr.Number(value=0.01, label='C min')
153
- c_max = gr.Number(value=10.0, label='C max')
154
- c_steps = gr.Slider(1, 20, value=5, step=1, label='C steps')
155
- penalties = gr.CheckboxGroup(['l1', 'l2', 'elasticnet', 'none'], value=['l2'], label='Penalties')
156
- solver = gr.Dropdown(['lbfgs', 'liblinear', 'saga'], value='lbfgs', label='Solver')
157
- max_iter = gr.Slider(50, 1000, value=200, step=10, label='Max iterations')
158
- cv_folds = gr.Slider(2, 10, value=5, step=1, label='CV folds')
159
- n_jobs = gr.Slider(1, 8, value=1, step=1, label='n_jobs')
160
-
161
- train_btn = gr.Button("Train Model")
162
-
163
- with gr.Row():
164
- accuracy_text = gr.Textbox(label='Accuracy & Notes', interactive=False)
165
- conf_plot = gr.Plot(label='Confusion Matrix')
166
- class_report = gr.Textbox(label='Classification Report', interactive=False)
167
- model_obj = gr.Textbox(label='Model', interactive=False)
168
 
169
- df_state = gr.State()
 
 
170
 
171
- file_input.change(fn=on_upload, inputs=file_input, outputs=[target_dropdown, load_status, df_state, preview_output])
 
 
172
 
173
- train_btn.click(
174
- fn=train_model,
175
- inputs=[df_state, target_dropdown, test_size, random_state, impute_radio, scaler_checkbox, encode_checkbox,
176
- use_grid, c_min, c_max, c_steps, penalties, solver, cv_folds, max_iter, n_jobs],
177
- outputs=[accuracy_text, conf_plot, class_report, model_obj]
178
- )
179
 
180
- if __name__ == '__main__':
181
  demo.launch()
 
1
  import pandas as pd
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
 
4
+ # File loading function
 
5
  def load_csv(file_path):
6
  try:
7
+ return pd.read_csv(file_path), None
8
+ except Exception as e_csv:
9
  try:
10
+ return pd.read_excel(file_path), None
11
+ except Exception as e_xls:
12
+ return None, f"Failed to read file. CSV error: {e_csv} | Excel error: {e_xls}"
 
 
13
 
14
+ # Upload handler
15
  def on_upload(file):
16
+ if not file:
17
+ return "No file uploaded", pd.DataFrame()
18
+
19
+ df, err = load_csv(file.name)
20
  if err:
21
+ return f"Error: {err}", pd.DataFrame()
22
+
23
+ return f"Loaded {len(df)} rows, {len(df.columns)} columns", df.head(20)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Gradio UI
26
+ with gr.Blocks() as demo:
27
+ gr.Markdown("## 📂 CSV/Excel File Upload & Preview")
28
 
29
+ file_input = gr.File(label="Upload CSV or Excel File", file_types=[".csv", ".xlsx"], type="file")
30
+ status_output = gr.Textbox(label="Status")
31
+ preview_output = gr.DataFrame(label="Preview (first 20 rows)")
32
 
33
+ file_input.change(fn=on_upload, inputs=file_input, outputs=[status_output, preview_output])
 
 
 
 
 
34
 
35
+ if __name__ == "__main__":
36
  demo.launch()