Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
| 4 |
from sklearn.linear_model import LogisticRegression
|
| 5 |
from sklearn.model_selection import train_test_split
|
| 6 |
from sklearn.metrics import classification_report, accuracy_score, precision_score
|
| 7 |
-
import os
|
| 8 |
|
| 9 |
df_train = None
|
| 10 |
model = None
|
|
@@ -24,6 +23,15 @@ def load_training_file(file):
|
|
| 24 |
|
| 25 |
return f"β
Loaded training file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def train_model(text_column, target_column):
|
| 28 |
global model, vectorizer, test_metrics, df_train
|
| 29 |
|
|
@@ -35,6 +43,9 @@ def train_model(text_column, target_column):
|
|
| 35 |
|
| 36 |
df_filtered = df_train.dropna(subset=[text_column, target_column])
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
X_train, X_test, y_train, y_test = train_test_split(
|
| 39 |
df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42
|
| 40 |
)
|
|
@@ -52,7 +63,14 @@ def train_model(text_column, target_column):
|
|
| 52 |
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
|
| 53 |
report = classification_report(y_test, y_pred, zero_division=0)
|
| 54 |
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
return f"β
Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}"
|
| 58 |
|
|
@@ -100,47 +118,85 @@ def export_predictions():
|
|
| 100 |
global df_predict_results
|
| 101 |
if df_predict_results is None:
|
| 102 |
return None
|
| 103 |
-
|
| 104 |
-
export_path = "predictions_output.xlsx"
|
| 105 |
df_predict_results.to_excel(export_path, index=False)
|
| 106 |
return export_path
|
| 107 |
|
| 108 |
with gr.Blocks() as demo:
|
| 109 |
gr.Markdown("# π§ Text Classification App")
|
| 110 |
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
with gr.Row():
|
| 113 |
-
file_input = gr.File(label="Upload Training Excel File (.xlsx)", file_types=[".xlsx"]
|
|
|
|
| 114 |
load_button = gr.Button("π Load Training File")
|
| 115 |
|
| 116 |
status_output = gr.Markdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
with gr.Row():
|
| 118 |
-
text_column_dropdown = gr.Dropdown(label="Text column"
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
train_button = gr.Button("π Train Model")
|
| 122 |
training_status = gr.Markdown()
|
| 123 |
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
with gr.Row():
|
| 126 |
-
input_text = gr.Textbox(label="Enter text to classify")
|
| 127 |
predict_button = gr.Button("π Predict Single")
|
| 128 |
|
| 129 |
prediction_output = gr.Markdown()
|
| 130 |
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
with gr.Row():
|
| 133 |
pred_file_input = gr.File(label="Upload Prediction Excel File (.xlsx)", file_types=[".xlsx"])
|
| 134 |
load_pred_button = gr.Button("π Load Prediction File")
|
| 135 |
|
| 136 |
pred_status = gr.Markdown()
|
| 137 |
-
|
|
|
|
|
|
|
| 138 |
|
| 139 |
batch_pred_button = gr.Button("β‘ Run Batch Prediction")
|
| 140 |
batch_pred_status = gr.Markdown()
|
| 141 |
batch_pred_preview = gr.Dataframe(headers=None, interactive=False)
|
| 142 |
|
| 143 |
export_button = gr.Button("β¬οΈ Export Predictions")
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
# Button connections
|
| 146 |
load_button.click(
|
|
@@ -173,7 +229,6 @@ with gr.Blocks() as demo:
|
|
| 173 |
outputs=[batch_pred_status, batch_pred_preview]
|
| 174 |
)
|
| 175 |
|
| 176 |
-
# Export returns a downloadable file
|
| 177 |
export_button.click(
|
| 178 |
fn=export_predictions,
|
| 179 |
inputs=[],
|
|
|
|
| 4 |
from sklearn.linear_model import LogisticRegression
|
| 5 |
from sklearn.model_selection import train_test_split
|
| 6 |
from sklearn.metrics import classification_report, accuracy_score, precision_score
|
|
|
|
| 7 |
|
| 8 |
df_train = None
|
| 9 |
model = None
|
|
|
|
| 23 |
|
| 24 |
return f"β
Loaded training file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
|
| 25 |
|
| 26 |
+
def interpret_score(score):
|
| 27 |
+
# Simple interpretation based on accuracy score
|
| 28 |
+
if score < 0.6:
|
| 29 |
+
return "π΄ The model performance is LOW. Consider improving your data or features."
|
| 30 |
+
elif score < 0.8:
|
| 31 |
+
return "π The model performance is MODERATE. It may work but could be improved."
|
| 32 |
+
else:
|
| 33 |
+
return "π’ The model performance is STRONG. The model is reliable."
|
| 34 |
+
|
| 35 |
def train_model(text_column, target_column):
|
| 36 |
global model, vectorizer, test_metrics, df_train
|
| 37 |
|
|
|
|
| 43 |
|
| 44 |
df_filtered = df_train.dropna(subset=[text_column, target_column])
|
| 45 |
|
| 46 |
+
if len(df_filtered) < 10:
|
| 47 |
+
return "β Not enough data after filtering for training. Need at least 10 samples."
|
| 48 |
+
|
| 49 |
X_train, X_test, y_train, y_test = train_test_split(
|
| 50 |
df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42
|
| 51 |
)
|
|
|
|
| 63 |
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
|
| 64 |
report = classification_report(y_test, y_pred, zero_division=0)
|
| 65 |
|
| 66 |
+
performance_msg = interpret_score(accuracy)
|
| 67 |
+
|
| 68 |
+
test_metrics = (
|
| 69 |
+
f"Accuracy: {accuracy:.2%}\n"
|
| 70 |
+
f"Precision (weighted): {precision:.2%}\n\n"
|
| 71 |
+
f"{performance_msg}\n\n"
|
| 72 |
+
f"Classification Report:\n{report}"
|
| 73 |
+
)
|
| 74 |
|
| 75 |
return f"β
Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}"
|
| 76 |
|
|
|
|
| 118 |
global df_predict_results
|
| 119 |
if df_predict_results is None:
|
| 120 |
return None
|
| 121 |
+
export_path = "/mnt/data/predictions_output.xlsx" # Gradio environment allows writing here
|
|
|
|
| 122 |
df_predict_results.to_excel(export_path, index=False)
|
| 123 |
return export_path
|
| 124 |
|
| 125 |
with gr.Blocks() as demo:
|
| 126 |
gr.Markdown("# π§ Text Classification App")
|
| 127 |
|
| 128 |
+
gr.Markdown(
|
| 129 |
+
"""
|
| 130 |
+
### How does this model work?
|
| 131 |
+
This app uses a **Logistic Regression** model trained on your text data.
|
| 132 |
+
- Text data is transformed into numbers using **TF-IDF vectorization**, which converts text into features based on word importance.
|
| 133 |
+
- The model learns patterns from labeled examples you provide.
|
| 134 |
+
- After training, it can predict the label/category of new text inputs.
|
| 135 |
+
\n
|
| 136 |
+
**Note:** Model performance depends heavily on quality and quantity of your data.
|
| 137 |
+
"""
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
gr.Markdown(
|
| 141 |
+
"### Step 1: Upload your training data\n"
|
| 142 |
+
"Upload an Excel file (`.xlsx`) containing your texts and corresponding labels."
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
with gr.Row():
|
| 146 |
+
file_input = gr.File(label="Upload Training Excel File (.xlsx)", file_types=[".xlsx"],
|
| 147 |
+
interactive=True)
|
| 148 |
load_button = gr.Button("π Load Training File")
|
| 149 |
|
| 150 |
status_output = gr.Markdown()
|
| 151 |
+
|
| 152 |
+
gr.Markdown(
|
| 153 |
+
"After loading, select the text and target columns for training."
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
with gr.Row():
|
| 157 |
+
text_column_dropdown = gr.Dropdown(label="Text column",
|
| 158 |
+
interactive=True,
|
| 159 |
+
info="Select the column that contains the text data.")
|
| 160 |
+
target_column_dropdown = gr.Dropdown(label="Target column",
|
| 161 |
+
interactive=True,
|
| 162 |
+
info="Select the column that contains the labels to predict.")
|
| 163 |
|
| 164 |
train_button = gr.Button("π Train Model")
|
| 165 |
training_status = gr.Markdown()
|
| 166 |
|
| 167 |
+
gr.Markdown(
|
| 168 |
+
"### Step 2: Predict on single texts\n"
|
| 169 |
+
"Enter a text below to get the model's predicted label."
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
with gr.Row():
|
| 173 |
+
input_text = gr.Textbox(label="Enter text to classify", placeholder="Type some text here...")
|
| 174 |
predict_button = gr.Button("π Predict Single")
|
| 175 |
|
| 176 |
prediction_output = gr.Markdown()
|
| 177 |
|
| 178 |
+
gr.Markdown(
|
| 179 |
+
"### Step 3: Batch prediction\n"
|
| 180 |
+
"Upload a new Excel file with texts to predict multiple labels at once."
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
with gr.Row():
|
| 184 |
pred_file_input = gr.File(label="Upload Prediction Excel File (.xlsx)", file_types=[".xlsx"])
|
| 185 |
load_pred_button = gr.Button("π Load Prediction File")
|
| 186 |
|
| 187 |
pred_status = gr.Markdown()
|
| 188 |
+
|
| 189 |
+
pred_text_column_dropdown = gr.Dropdown(label="Text column for Prediction",
|
| 190 |
+
info="Select the column in your prediction file containing text to classify.")
|
| 191 |
|
| 192 |
batch_pred_button = gr.Button("β‘ Run Batch Prediction")
|
| 193 |
batch_pred_status = gr.Markdown()
|
| 194 |
batch_pred_preview = gr.Dataframe(headers=None, interactive=False)
|
| 195 |
|
| 196 |
export_button = gr.Button("β¬οΈ Export Predictions")
|
| 197 |
+
gr.Markdown(
|
| 198 |
+
"Click **Export Predictions** to download the batch prediction results as an Excel file."
|
| 199 |
+
)
|
| 200 |
|
| 201 |
# Button connections
|
| 202 |
load_button.click(
|
|
|
|
| 229 |
outputs=[batch_pred_status, batch_pred_preview]
|
| 230 |
)
|
| 231 |
|
|
|
|
| 232 |
export_button.click(
|
| 233 |
fn=export_predictions,
|
| 234 |
inputs=[],
|