clementBE commited on
Commit
b412fe9
ยท
verified ยท
1 Parent(s): 6c51406

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -36
app.py CHANGED
@@ -1,72 +1,88 @@
1
  import gradio as gr
2
  import pandas as pd
3
- from sklearn.pipeline import Pipeline
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.linear_model import LogisticRegression
6
- import tempfile
7
 
8
  df_train = None
9
  model = None
 
10
 
11
  def load_training_file(file):
12
  global df_train
13
  if file is None:
14
  return "โŒ Please upload a file.", gr.update(choices=[], value=None), gr.update(choices=[], value=None)
15
-
16
- with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
17
- tmp.write(file.read_bytes())
18
- tmp_path = tmp.name
19
 
20
- df_train = pd.read_excel(tmp_path)
21
  col_names = list(df_train.columns)
 
22
  return f"โœ… Loaded file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
23
 
24
  def train_model(text_column, target_column):
25
- global model, df_train
 
26
  if df_train is None:
27
- return "โš ๏ธ Please load a training file first."
28
 
29
  if text_column not in df_train.columns or target_column not in df_train.columns:
30
- return "โŒ Selected columns not found in the data."
31
 
32
  df_filtered = df_train.dropna(subset=[text_column, target_column])
33
- if df_filtered.empty:
34
- return "โŒ No valid data after dropping missing values."
35
 
36
- X = df_filtered[text_column]
 
37
  y = df_filtered[target_column]
38
 
39
- model = Pipeline([
40
- ("tfidf", TfidfVectorizer()),
41
- ("clf", LogisticRegression(max_iter=1000))
42
- ])
43
  model.fit(X, y)
44
- return f"โœ… Model trained with {len(X)} samples."
45
 
46
- def predict(text):
47
- if model is None:
48
- return "โš ๏ธ Please train the model first."
49
- return model.predict([text])[0]
 
 
 
 
 
50
 
51
  with gr.Blocks() as demo:
52
- gr.Markdown("## ๐Ÿง  Text Classification Trainer")
53
-
54
  with gr.Row():
55
- training_file = gr.File(label="Upload Excel file (.xlsx)")
56
- status = gr.Textbox(label="Status", interactive=False)
57
 
 
58
  with gr.Row():
59
- text_column = gr.Dropdown(choices=[], label="Select Text Column")
60
- target_column = gr.Dropdown(choices=[], label="Select Target Column")
61
- train_btn = gr.Button("Train Model")
 
 
62
 
63
  with gr.Row():
64
- input_text = gr.Textbox(label="Enter text to predict")
65
- output_label = gr.Textbox(label="Predicted Label", interactive=False)
66
- predict_btn = gr.Button("Predict")
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- training_file.change(fn=load_training_file, inputs=[training_file], outputs=[status, text_column, target_column])
69
- train_btn.click(fn=train_model, inputs=[text_column, target_column], outputs=[status])
70
- predict_btn.click(fn=predict, inputs=[input_text], outputs=[output_label])
 
 
71
 
72
- demo.launch()
 
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.linear_model import LogisticRegression
 
5
 
6
  df_train = None
7
  model = None
8
+ vectorizer = None
9
 
10
  def load_training_file(file):
11
  global df_train
12
  if file is None:
13
  return "โŒ Please upload a file.", gr.update(choices=[], value=None), gr.update(choices=[], value=None)
 
 
 
 
14
 
15
+ df_train = pd.read_excel(file.name)
16
  col_names = list(df_train.columns)
17
+
18
  return f"โœ… Loaded file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
19
 
20
  def train_model(text_column, target_column):
21
+ global model, vectorizer
22
+
23
  if df_train is None:
24
+ return "โŒ No training data loaded."
25
 
26
  if text_column not in df_train.columns or target_column not in df_train.columns:
27
+ return "โŒ Invalid column selection."
28
 
29
  df_filtered = df_train.dropna(subset=[text_column, target_column])
 
 
30
 
31
+ vectorizer = TfidfVectorizer()
32
+ X = vectorizer.fit_transform(df_filtered[text_column])
33
  y = df_filtered[target_column]
34
 
35
+ model = LogisticRegression(max_iter=1000)
 
 
 
36
  model.fit(X, y)
 
37
 
38
+ return f"โœ… Model trained on {len(df_filtered)} examples."
39
+
40
+ def predict_label(text_input):
41
+ if model is None or vectorizer is None:
42
+ return "โŒ Model is not trained yet."
43
+
44
+ X = vectorizer.transform([text_input])
45
+ prediction = model.predict(X)[0]
46
+ return f"๐Ÿ”ฎ Prediction: {prediction}"
47
 
48
  with gr.Blocks() as demo:
49
+ gr.Markdown("# ๐Ÿง  Text Classification App")
50
+
51
  with gr.Row():
52
+ file_input = gr.File(label="Upload Excel File (.xlsx)", file_types=[".xlsx"])
53
+ load_button = gr.Button("๐Ÿ“‚ Load File")
54
 
55
+ status_output = gr.Markdown()
56
  with gr.Row():
57
+ text_column_dropdown = gr.Dropdown(label="Text column")
58
+ target_column_dropdown = gr.Dropdown(label="Target column")
59
+
60
+ train_button = gr.Button("๐Ÿš€ Train Model")
61
+ training_status = gr.Markdown()
62
 
63
  with gr.Row():
64
+ input_text = gr.Textbox(label="Enter text to classify")
65
+ predict_button = gr.Button("๐Ÿ” Predict")
66
+
67
+ prediction_output = gr.Markdown()
68
+
69
+ load_button.click(
70
+ fn=load_training_file,
71
+ inputs=file_input,
72
+ outputs=[status_output, text_column_dropdown, target_column_dropdown]
73
+ )
74
+
75
+ train_button.click(
76
+ fn=train_model,
77
+ inputs=[text_column_dropdown, target_column_dropdown],
78
+ outputs=training_status
79
+ )
80
 
81
+ predict_button.click(
82
+ fn=predict_label,
83
+ inputs=input_text,
84
+ outputs=prediction_output
85
+ )
86
 
87
+ if __name__ == "__main__":
88
+ demo.launch()