Spaces:
Paused
Paused
File size: 8,548 Bytes
ea1fb77 98af5f3 95316bb ea1fb77 aa123f2 6c51406 b412fe9 6849a4f 94e6de7 98af5f3 6c51406 aa123f2 6c51406 b412fe9 6c51406 b412fe9 6849a4f 6c51406 16b89ff 6c51406 95316bb b412fe9 6c51406 b412fe9 98af5f3 6c51406 b412fe9 6c51406 16b89ff 95316bb b412fe9 95316bb 6c51406 b412fe9 95316bb 6849a4f 95316bb 16b89ff 98af5f3 95316bb b412fe9 6849a4f def006a ea1fb77 6849a4f 94e6de7 6849a4f 94e6de7 6849a4f 94e6de7 6849a4f 94e6de7 6849a4f 94e6de7 16b89ff d3db3d3 6849a4f ea1fb77 b412fe9 16b89ff 2b8217b 16b89ff 6849a4f ea1fb77 b412fe9 16b89ff 98af5f3 16b89ff b412fe9 aa123f2 16b89ff 6c51406 16b89ff 6849a4f b412fe9 16b89ff 6849a4f 16b89ff 6849a4f 94e6de7 6849a4f 94e6de7 16b89ff 94e6de7 b412fe9 98af5f3 b412fe9 ea1fb77 6849a4f 94e6de7 6849a4f b412fe9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score
df_train = None
model = None
vectorizer = None
test_metrics = None
df_predict = None # for batch prediction file
df_predict_results = None # to store batch prediction results for export
def load_training_file(file):
global df_train
if file is None:
return "โ Please upload a file.", gr.update(choices=[], value=None), gr.update(choices=[], value=None)
df_train = pd.read_excel(file.name)
col_names = list(df_train.columns)
return f"โ
Loaded training file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
def interpret_score(score):
# Simple interpretation based on accuracy score
if score < 0.6:
return "๐ด The model performance is LOW. Consider improving your data or features."
elif score < 0.8:
return "๐ The model performance is MODERATE. It may work but could be improved."
else:
return "๐ข The model performance is STRONG. The model is reliable."
def train_model(text_column, target_column):
global model, vectorizer, test_metrics, df_train
if df_train is None:
return "โ No training data loaded."
if text_column not in df_train.columns or target_column not in df_train.columns:
return "โ Invalid column selection."
df_filtered = df_train.dropna(subset=[text_column, target_column])
if len(df_filtered) < 10:
return "โ Not enough data after filtering for training. Need at least 10 samples."
X_train, X_test, y_train, y_test = train_test_split(
df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42
)
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)
y_pred = model.predict(X_test_vec)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
report = classification_report(y_test, y_pred, zero_division=0)
performance_msg = interpret_score(accuracy)
test_metrics = (
f"Accuracy: {accuracy:.2%}\n"
f"Precision (weighted): {precision:.2%}\n\n"
f"{performance_msg}\n\n"
f"Classification Report:\n{report}"
)
return f"โ
Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}"
def predict_label(text_input):
if model is None or vectorizer is None:
return "โ Model is not trained yet."
X = vectorizer.transform([text_input])
prediction = model.predict(X)[0]
proba = model.predict_proba(X).max()
return f"๐ฎ Prediction: {prediction} (confidence: {proba:.2%})"
def load_prediction_file(file):
global df_predict
if file is None:
return "โ Please upload a prediction file.", gr.update(choices=[], value=None)
df_predict = pd.read_excel(file.name)
col_names = list(df_predict.columns)
return f"โ
Loaded prediction file with {len(df_predict)} rows", gr.update(choices=col_names, value=col_names[0])
def run_batch_prediction(text_column):
global df_predict, model, vectorizer, df_predict_results
if model is None or vectorizer is None:
return "โ Model is not trained yet.", None
if df_predict is None:
return "โ No prediction file loaded.", None
if text_column not in df_predict.columns:
return "โ Invalid text column selected.", None
df_filtered = df_predict.dropna(subset=[text_column]).copy()
X = vectorizer.transform(df_filtered[text_column])
preds = model.predict(X)
probs = model.predict_proba(X).max(axis=1)
df_filtered["Prediction"] = preds
df_filtered["Confidence"] = probs
df_predict_results = df_filtered # save for export
# Show preview of first 10 rows
return f"โ
Batch prediction completed on {len(df_filtered)} rows.", df_filtered.head(10)
def export_predictions():
global df_predict_results
if df_predict_results is None:
return None
export_path = "/mnt/data/predictions_output.xlsx" # Gradio environment allows writing here
df_predict_results.to_excel(export_path, index=False)
return export_path
with gr.Blocks() as demo:
gr.Markdown("# ๐ง Text Classification App")
gr.Markdown(
"""
### How does this model work?
This app uses a **Logistic Regression** model trained on your text data.
- Text data is transformed into numbers using **TF-IDF vectorization**, which converts text into features based on word importance.
- The model learns patterns from labeled examples you provide.
- After training, it can predict the label/category of new text inputs.
\n
**Note:** Model performance depends heavily on quality and quantity of your data.
"""
)
gr.Markdown(
"### Step 1: Upload your training data\n"
"Upload an Excel file (`.xlsx`) containing your texts and corresponding labels."
)
with gr.Row():
file_input = gr.File(label="Upload Training Excel File (.xlsx)", file_types=[".xlsx"],
interactive=True)
load_button = gr.Button("๐ Load Training File")
status_output = gr.Markdown()
gr.Markdown(
"After loading, select the text and target columns for training."
)
with gr.Row():
text_column_dropdown = gr.Dropdown(label="Text column",
interactive=True,
info="Select the column that contains the text data.")
target_column_dropdown = gr.Dropdown(label="Target column",
interactive=True,
info="Select the column that contains the labels to predict.")
train_button = gr.Button("๐ Train Model")
training_status = gr.Markdown()
gr.Markdown(
"### Step 2: Predict on single texts\n"
"Enter a text below to get the model's predicted label."
)
with gr.Row():
input_text = gr.Textbox(label="Enter text to classify", placeholder="Type some text here...")
predict_button = gr.Button("๐ Predict Single")
prediction_output = gr.Markdown()
gr.Markdown(
"### Step 3: Batch prediction\n"
"Upload a new Excel file with texts to predict multiple labels at once."
)
with gr.Row():
pred_file_input = gr.File(label="Upload Prediction Excel File (.xlsx)", file_types=[".xlsx"])
load_pred_button = gr.Button("๐ Load Prediction File")
pred_status = gr.Markdown()
pred_text_column_dropdown = gr.Dropdown(label="Text column for Prediction",
info="Select the column in your prediction file containing text to classify.")
batch_pred_button = gr.Button("โก Run Batch Prediction")
batch_pred_status = gr.Markdown()
batch_pred_preview = gr.Dataframe(headers=None, interactive=False)
export_button = gr.Button("โฌ๏ธ Export Predictions")
gr.Markdown(
"Click **Export Predictions** to download the batch prediction results as an Excel file."
)
# Button connections
load_button.click(
fn=load_training_file,
inputs=file_input,
outputs=[status_output, text_column_dropdown, target_column_dropdown]
)
train_button.click(
fn=train_model,
inputs=[text_column_dropdown, target_column_dropdown],
outputs=training_status
)
predict_button.click(
fn=predict_label,
inputs=input_text,
outputs=prediction_output
)
load_pred_button.click(
fn=load_prediction_file,
inputs=pred_file_input,
outputs=[pred_status, pred_text_column_dropdown]
)
batch_pred_button.click(
fn=run_batch_prediction,
inputs=pred_text_column_dropdown,
outputs=[batch_pred_status, batch_pred_preview]
)
export_button.click(
fn=export_predictions,
inputs=[],
outputs=gr.File(file_types=[".xlsx"])
)
if __name__ == "__main__":
demo.launch()
|