clementBE commited on
Commit
16b89ff
Β·
verified Β·
1 Parent(s): 489d682

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -13
app.py CHANGED
@@ -4,7 +4,6 @@ from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.linear_model import LogisticRegression
5
  from sklearn.model_selection import train_test_split
6
  from sklearn.metrics import classification_report, accuracy_score, precision_score
7
- import os
8
 
9
  df_train = None
10
  model = None
@@ -24,6 +23,15 @@ def load_training_file(file):
24
 
25
  return f"βœ… Loaded training file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
26
 
 
 
 
 
 
 
 
 
 
27
  def train_model(text_column, target_column):
28
  global model, vectorizer, test_metrics, df_train
29
 
@@ -35,6 +43,9 @@ def train_model(text_column, target_column):
35
 
36
  df_filtered = df_train.dropna(subset=[text_column, target_column])
37
 
 
 
 
38
  X_train, X_test, y_train, y_test = train_test_split(
39
  df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42
40
  )
@@ -52,7 +63,14 @@ def train_model(text_column, target_column):
52
  precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
53
  report = classification_report(y_test, y_pred, zero_division=0)
54
 
55
- test_metrics = f"Accuracy: {accuracy:.2%}\nPrecision (weighted): {precision:.2%}\n\nClassification Report:\n{report}"
 
 
 
 
 
 
 
56
 
57
  return f"βœ… Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}"
58
 
@@ -100,47 +118,85 @@ def export_predictions():
100
  global df_predict_results
101
  if df_predict_results is None:
102
  return None
103
- # Save file locally - current working directory
104
- export_path = "predictions_output.xlsx"
105
  df_predict_results.to_excel(export_path, index=False)
106
  return export_path
107
 
108
  with gr.Blocks() as demo:
109
  gr.Markdown("# 🧠 Text Classification App")
110
 
111
- # Training data upload
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  with gr.Row():
113
- file_input = gr.File(label="Upload Training Excel File (.xlsx)", file_types=[".xlsx"])
 
114
  load_button = gr.Button("πŸ“‚ Load Training File")
115
 
116
  status_output = gr.Markdown()
 
 
 
 
 
117
  with gr.Row():
118
- text_column_dropdown = gr.Dropdown(label="Text column")
119
- target_column_dropdown = gr.Dropdown(label="Target column")
 
 
 
 
120
 
121
  train_button = gr.Button("πŸš€ Train Model")
122
  training_status = gr.Markdown()
123
 
124
- # Single prediction
 
 
 
 
125
  with gr.Row():
126
- input_text = gr.Textbox(label="Enter text to classify")
127
  predict_button = gr.Button("πŸ” Predict Single")
128
 
129
  prediction_output = gr.Markdown()
130
 
131
- # Batch prediction upload
 
 
 
 
132
  with gr.Row():
133
  pred_file_input = gr.File(label="Upload Prediction Excel File (.xlsx)", file_types=[".xlsx"])
134
  load_pred_button = gr.Button("πŸ“‚ Load Prediction File")
135
 
136
  pred_status = gr.Markdown()
137
- pred_text_column_dropdown = gr.Dropdown(label="Text column for Prediction")
 
 
138
 
139
  batch_pred_button = gr.Button("⚑ Run Batch Prediction")
140
  batch_pred_status = gr.Markdown()
141
  batch_pred_preview = gr.Dataframe(headers=None, interactive=False)
142
 
143
  export_button = gr.Button("⬇️ Export Predictions")
 
 
 
144
 
145
  # Button connections
146
  load_button.click(
@@ -173,7 +229,6 @@ with gr.Blocks() as demo:
173
  outputs=[batch_pred_status, batch_pred_preview]
174
  )
175
 
176
- # Export returns a downloadable file
177
  export_button.click(
178
  fn=export_predictions,
179
  inputs=[],
 
4
  from sklearn.linear_model import LogisticRegression
5
  from sklearn.model_selection import train_test_split
6
  from sklearn.metrics import classification_report, accuracy_score, precision_score
 
7
 
8
  df_train = None
9
  model = None
 
23
 
24
  return f"βœ… Loaded training file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
25
 
26
+ def interpret_score(score):
27
+ # Simple interpretation based on accuracy score
28
+ if score < 0.6:
29
+ return "πŸ”΄ The model performance is LOW. Consider improving your data or features."
30
+ elif score < 0.8:
31
+ return "🟠 The model performance is MODERATE. It may work but could be improved."
32
+ else:
33
+ return "🟒 The model performance is STRONG. The model is reliable."
34
+
35
  def train_model(text_column, target_column):
36
  global model, vectorizer, test_metrics, df_train
37
 
 
43
 
44
  df_filtered = df_train.dropna(subset=[text_column, target_column])
45
 
46
+ if len(df_filtered) < 10:
47
+ return "❌ Not enough data after filtering for training. Need at least 10 samples."
48
+
49
  X_train, X_test, y_train, y_test = train_test_split(
50
  df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42
51
  )
 
63
  precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
64
  report = classification_report(y_test, y_pred, zero_division=0)
65
 
66
+ performance_msg = interpret_score(accuracy)
67
+
68
+ test_metrics = (
69
+ f"Accuracy: {accuracy:.2%}\n"
70
+ f"Precision (weighted): {precision:.2%}\n\n"
71
+ f"{performance_msg}\n\n"
72
+ f"Classification Report:\n{report}"
73
+ )
74
 
75
  return f"βœ… Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}"
76
 
 
118
  global df_predict_results
119
  if df_predict_results is None:
120
  return None
121
+ export_path = "/mnt/data/predictions_output.xlsx" # Gradio environment allows writing here
 
122
  df_predict_results.to_excel(export_path, index=False)
123
  return export_path
124
 
125
  with gr.Blocks() as demo:
126
  gr.Markdown("# 🧠 Text Classification App")
127
 
128
+ gr.Markdown(
129
+ """
130
+ ### How does this model work?
131
+ This app uses a **Logistic Regression** model trained on your text data.
132
+ - Text data is transformed into numbers using **TF-IDF vectorization**, which converts text into features based on word importance.
133
+ - The model learns patterns from labeled examples you provide.
134
+ - After training, it can predict the label/category of new text inputs.
135
+ \n
136
+ **Note:** Model performance depends heavily on quality and quantity of your data.
137
+ """
138
+ )
139
+
140
+ gr.Markdown(
141
+ "### Step 1: Upload your training data\n"
142
+ "Upload an Excel file (`.xlsx`) containing your texts and corresponding labels."
143
+ )
144
+
145
  with gr.Row():
146
+ file_input = gr.File(label="Upload Training Excel File (.xlsx)", file_types=[".xlsx"],
147
+ interactive=True)
148
  load_button = gr.Button("πŸ“‚ Load Training File")
149
 
150
  status_output = gr.Markdown()
151
+
152
+ gr.Markdown(
153
+ "After loading, select the text and target columns for training."
154
+ )
155
+
156
  with gr.Row():
157
+ text_column_dropdown = gr.Dropdown(label="Text column",
158
+ interactive=True,
159
+ info="Select the column that contains the text data.")
160
+ target_column_dropdown = gr.Dropdown(label="Target column",
161
+ interactive=True,
162
+ info="Select the column that contains the labels to predict.")
163
 
164
  train_button = gr.Button("πŸš€ Train Model")
165
  training_status = gr.Markdown()
166
 
167
+ gr.Markdown(
168
+ "### Step 2: Predict on single texts\n"
169
+ "Enter a text below to get the model's predicted label."
170
+ )
171
+
172
  with gr.Row():
173
+ input_text = gr.Textbox(label="Enter text to classify", placeholder="Type some text here...")
174
  predict_button = gr.Button("πŸ” Predict Single")
175
 
176
  prediction_output = gr.Markdown()
177
 
178
+ gr.Markdown(
179
+ "### Step 3: Batch prediction\n"
180
+ "Upload a new Excel file with texts to predict multiple labels at once."
181
+ )
182
+
183
  with gr.Row():
184
  pred_file_input = gr.File(label="Upload Prediction Excel File (.xlsx)", file_types=[".xlsx"])
185
  load_pred_button = gr.Button("πŸ“‚ Load Prediction File")
186
 
187
  pred_status = gr.Markdown()
188
+
189
+ pred_text_column_dropdown = gr.Dropdown(label="Text column for Prediction",
190
+ info="Select the column in your prediction file containing text to classify.")
191
 
192
  batch_pred_button = gr.Button("⚑ Run Batch Prediction")
193
  batch_pred_status = gr.Markdown()
194
  batch_pred_preview = gr.Dataframe(headers=None, interactive=False)
195
 
196
  export_button = gr.Button("⬇️ Export Predictions")
197
+ gr.Markdown(
198
+ "Click **Export Predictions** to download the batch prediction results as an Excel file."
199
+ )
200
 
201
  # Button connections
202
  load_button.click(
 
229
  outputs=[batch_pred_status, batch_pred_preview]
230
  )
231
 
 
232
  export_button.click(
233
  fn=export_predictions,
234
  inputs=[],