clementBE commited on
Commit
6c51406
·
verified ·
1 Parent(s): e6b6ed3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -45
app.py CHANGED
@@ -1,71 +1,72 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.linear_model import LogisticRegression
5
- from sklearn.pipeline import Pipeline
6
 
7
- model = None
8
- target_column = None
9
  df_train = None
 
10
 
11
- def load_training_file(file_path):
12
- xls = pd.ExcelFile(file_path)
13
- sheet_names = xls.sheet_names
14
- return gr.update(choices=sheet_names, value=sheet_names[0]) # return gr.update for dropdown
15
-
16
- def load_columns(sheet_name, file_path):
17
  global df_train
18
- df_train = pd.read_excel(file_path, sheet_name=sheet_name)
19
- column_names = df_train.columns.tolist()
20
- return gr.update(choices=column_names, value=column_names[0]) # also update dropdown
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- def train_model(column):
23
- global model, target_column, df_train
24
- if df_train is None or column not in df_train.columns:
25
- return "Please load a valid training file and column."
26
- target_column = column
27
- df_filtered = df_train.dropna(subset=["Sentence", target_column])
28
- X = df_filtered["Sentence"]
 
29
  y = df_filtered[target_column]
 
30
  model = Pipeline([
31
  ("tfidf", TfidfVectorizer()),
32
  ("clf", LogisticRegression(max_iter=1000))
33
  ])
34
  model.fit(X, y)
35
- return f"✅ Model trained on {len(X)} samples for target column: {target_column}"
36
 
37
- def test_model(file_path):
38
- global model, target_column
39
  if model is None:
40
- return "⚠️ Please train a model first."
41
- df_test = pd.read_excel(file_path)
42
- if "Sentence" not in df_test.columns:
43
- return "❌ Test file must contain a 'Sentence' column."
44
- df_test["Predicted_" + target_column] = model.predict(df_test["Sentence"].fillna(""))
45
- return df_test.head(20)
46
 
47
  with gr.Blocks() as demo:
48
- gr.Markdown("## 🧠 Text Classifier with XLSX (Gradio)")
49
-
50
  with gr.Row():
51
- training_file = gr.File(label="Upload Training XLSX")
52
- load_btn = gr.Button("Load Sheets")
53
-
54
- sheet_dropdown = gr.Dropdown(label="Select Sheet", choices=[])
55
- column_dropdown = gr.Dropdown(label="Select Target Column", choices=[])
56
- train_btn = gr.Button("Train Model")
57
- status_output = gr.Textbox(label="Training Output")
58
 
59
  with gr.Row():
60
- test_file = gr.File(label="Upload Test XLSX")
61
- test_btn = gr.Button("Test Model")
 
62
 
63
- prediction_output = gr.Dataframe(label="Predictions")
 
 
 
64
 
65
- # Events
66
- load_btn.click(fn=load_training_file, inputs=training_file, outputs=sheet_dropdown)
67
- sheet_dropdown.change(fn=load_columns, inputs=[sheet_dropdown, training_file], outputs=column_dropdown)
68
- train_btn.click(fn=train_model, inputs=column_dropdown, outputs=status_output)
69
- test_btn.click(fn=test_model, inputs=test_file, outputs=prediction_output)
70
 
71
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
+ from sklearn.pipeline import Pipeline
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.linear_model import LogisticRegression
6
+ import tempfile
7
 
 
 
8
  df_train = None
9
+ model = None
10
 
11
+ def load_training_file(file):
 
 
 
 
 
12
  global df_train
13
+ if file is None:
14
+ return "❌ Please upload a file.", gr.update(choices=[], value=None), gr.update(choices=[], value=None)
15
+
16
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
17
+ tmp.write(file.read_bytes())
18
+ tmp_path = tmp.name
19
+
20
+ df_train = pd.read_excel(tmp_path)
21
+ col_names = list(df_train.columns)
22
+ return f"✅ Loaded file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
23
+
24
+ def train_model(text_column, target_column):
25
+ global model, df_train
26
+ if df_train is None:
27
+ return "⚠️ Please load a training file first."
28
 
29
+ if text_column not in df_train.columns or target_column not in df_train.columns:
30
+ return "❌ Selected columns not found in the data."
31
+
32
+ df_filtered = df_train.dropna(subset=[text_column, target_column])
33
+ if df_filtered.empty:
34
+ return "❌ No valid data after dropping missing values."
35
+
36
+ X = df_filtered[text_column]
37
  y = df_filtered[target_column]
38
+
39
  model = Pipeline([
40
  ("tfidf", TfidfVectorizer()),
41
  ("clf", LogisticRegression(max_iter=1000))
42
  ])
43
  model.fit(X, y)
44
+ return f"✅ Model trained with {len(X)} samples."
45
 
46
+ def predict(text):
 
47
  if model is None:
48
+ return "⚠️ Please train the model first."
49
+ return model.predict([text])[0]
 
 
 
 
50
 
51
  with gr.Blocks() as demo:
52
+ gr.Markdown("## 🧠 Text Classification Trainer")
53
+
54
  with gr.Row():
55
+ training_file = gr.File(label="Upload Excel file (.xlsx)")
56
+ status = gr.Textbox(label="Status", interactive=False)
 
 
 
 
 
57
 
58
  with gr.Row():
59
+ text_column = gr.Dropdown(choices=[], label="Select Text Column")
60
+ target_column = gr.Dropdown(choices=[], label="Select Target Column")
61
+ train_btn = gr.Button("Train Model")
62
 
63
+ with gr.Row():
64
+ input_text = gr.Textbox(label="Enter text to predict")
65
+ output_label = gr.Textbox(label="Predicted Label", interactive=False)
66
+ predict_btn = gr.Button("Predict")
67
 
68
+ training_file.change(fn=load_training_file, inputs=[training_file], outputs=[status, text_column, target_column])
69
+ train_btn.click(fn=train_model, inputs=[text_column, target_column], outputs=[status])
70
+ predict_btn.click(fn=predict, inputs=[input_text], outputs=[output_label])
 
 
71
 
72
  demo.launch()