Spaces:
Paused
Paused
| import gradio as gr | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import classification_report, accuracy_score, precision_score | |
| df_train = None | |
| model = None | |
| vectorizer = None | |
| test_metrics = None | |
| df_predict = None # for batch prediction file | |
| df_predict_results = None # to store batch prediction results for export | |
| def load_training_file(file): | |
| global df_train | |
| if file is None: | |
| return "โ Please upload a file.", gr.update(choices=[], value=None), gr.update(choices=[], value=None) | |
| df_train = pd.read_excel(file.name) | |
| col_names = list(df_train.columns) | |
| return f"โ Loaded training file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1]) | |
| def interpret_score(score): | |
| # Simple interpretation based on accuracy score | |
| if score < 0.6: | |
| return "๐ด The model performance is LOW. Consider improving your data or features." | |
| elif score < 0.8: | |
| return "๐ The model performance is MODERATE. It may work but could be improved." | |
| else: | |
| return "๐ข The model performance is STRONG. The model is reliable." | |
| def train_model(text_column, target_column): | |
| global model, vectorizer, test_metrics, df_train | |
| if df_train is None: | |
| return "โ No training data loaded." | |
| if text_column not in df_train.columns or target_column not in df_train.columns: | |
| return "โ Invalid column selection." | |
| df_filtered = df_train.dropna(subset=[text_column, target_column]) | |
| if len(df_filtered) < 10: | |
| return "โ Not enough data after filtering for training. Need at least 10 samples." | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42 | |
| ) | |
| vectorizer = TfidfVectorizer() | |
| X_train_vec = vectorizer.fit_transform(X_train) | |
| X_test_vec = vectorizer.transform(X_test) | |
| model = LogisticRegression(max_iter=1000) | |
| model.fit(X_train_vec, y_train) | |
| y_pred = model.predict(X_test_vec) | |
| accuracy = accuracy_score(y_test, y_pred) | |
| precision = precision_score(y_test, y_pred, average='weighted', zero_division=0) | |
| report = classification_report(y_test, y_pred, zero_division=0) | |
| performance_msg = interpret_score(accuracy) | |
| test_metrics = ( | |
| f"Accuracy: {accuracy:.2%}\n" | |
| f"Precision (weighted): {precision:.2%}\n\n" | |
| f"{performance_msg}\n\n" | |
| f"Classification Report:\n{report}" | |
| ) | |
| return f"โ Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}" | |
| def predict_label(text_input): | |
| if model is None or vectorizer is None: | |
| return "โ Model is not trained yet." | |
| X = vectorizer.transform([text_input]) | |
| prediction = model.predict(X)[0] | |
| proba = model.predict_proba(X).max() | |
| return f"๐ฎ Prediction: {prediction} (confidence: {proba:.2%})" | |
| def load_prediction_file(file): | |
| global df_predict | |
| if file is None: | |
| return "โ Please upload a prediction file.", gr.update(choices=[], value=None) | |
| df_predict = pd.read_excel(file.name) | |
| col_names = list(df_predict.columns) | |
| return f"โ Loaded prediction file with {len(df_predict)} rows", gr.update(choices=col_names, value=col_names[0]) | |
| def run_batch_prediction(text_column): | |
| global df_predict, model, vectorizer, df_predict_results | |
| if model is None or vectorizer is None: | |
| return "โ Model is not trained yet.", None | |
| if df_predict is None: | |
| return "โ No prediction file loaded.", None | |
| if text_column not in df_predict.columns: | |
| return "โ Invalid text column selected.", None | |
| df_filtered = df_predict.dropna(subset=[text_column]).copy() | |
| X = vectorizer.transform(df_filtered[text_column]) | |
| preds = model.predict(X) | |
| probs = model.predict_proba(X).max(axis=1) | |
| df_filtered["Prediction"] = preds | |
| df_filtered["Confidence"] = probs | |
| df_predict_results = df_filtered # save for export | |
| # Show preview of first 10 rows | |
| return f"โ Batch prediction completed on {len(df_filtered)} rows.", df_filtered.head(10) | |
| def export_predictions(): | |
| global df_predict_results | |
| if df_predict_results is None: | |
| return None | |
| export_path = "/mnt/data/predictions_output.xlsx" # Gradio environment allows writing here | |
| df_predict_results.to_excel(export_path, index=False) | |
| return export_path | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# ๐ง Text Classification App") | |
| gr.Markdown( | |
| """ | |
| ### How does this model work? | |
| This app uses a **Logistic Regression** model trained on your text data. | |
| - Text data is transformed into numbers using **TF-IDF vectorization**, which converts text into features based on word importance. | |
| - The model learns patterns from labeled examples you provide. | |
| - After training, it can predict the label/category of new text inputs. | |
| \n | |
| **Note:** Model performance depends heavily on quality and quantity of your data. | |
| """ | |
| ) | |
| gr.Markdown( | |
| "### Step 1: Upload your training data\n" | |
| "Upload an Excel file (`.xlsx`) containing your texts and corresponding labels." | |
| ) | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload Training Excel File (.xlsx)", file_types=[".xlsx"], | |
| interactive=True) | |
| load_button = gr.Button("๐ Load Training File") | |
| status_output = gr.Markdown() | |
| gr.Markdown( | |
| "After loading, select the text and target columns for training." | |
| ) | |
| with gr.Row(): | |
| text_column_dropdown = gr.Dropdown(label="Text column", | |
| interactive=True, | |
| info="Select the column that contains the text data.") | |
| target_column_dropdown = gr.Dropdown(label="Target column", | |
| interactive=True, | |
| info="Select the column that contains the labels to predict.") | |
| train_button = gr.Button("๐ Train Model") | |
| training_status = gr.Markdown() | |
| gr.Markdown( | |
| "### Step 2: Predict on single texts\n" | |
| "Enter a text below to get the model's predicted label." | |
| ) | |
| with gr.Row(): | |
| input_text = gr.Textbox(label="Enter text to classify", placeholder="Type some text here...") | |
| predict_button = gr.Button("๐ Predict Single") | |
| prediction_output = gr.Markdown() | |
| gr.Markdown( | |
| "### Step 3: Batch prediction\n" | |
| "Upload a new Excel file with texts to predict multiple labels at once." | |
| ) | |
| with gr.Row(): | |
| pred_file_input = gr.File(label="Upload Prediction Excel File (.xlsx)", file_types=[".xlsx"]) | |
| load_pred_button = gr.Button("๐ Load Prediction File") | |
| pred_status = gr.Markdown() | |
| pred_text_column_dropdown = gr.Dropdown(label="Text column for Prediction", | |
| info="Select the column in your prediction file containing text to classify.") | |
| batch_pred_button = gr.Button("โก Run Batch Prediction") | |
| batch_pred_status = gr.Markdown() | |
| batch_pred_preview = gr.Dataframe(headers=None, interactive=False) | |
| export_button = gr.Button("โฌ๏ธ Export Predictions") | |
| gr.Markdown( | |
| "Click **Export Predictions** to download the batch prediction results as an Excel file." | |
| ) | |
| # Button connections | |
| load_button.click( | |
| fn=load_training_file, | |
| inputs=file_input, | |
| outputs=[status_output, text_column_dropdown, target_column_dropdown] | |
| ) | |
| train_button.click( | |
| fn=train_model, | |
| inputs=[text_column_dropdown, target_column_dropdown], | |
| outputs=training_status | |
| ) | |
| predict_button.click( | |
| fn=predict_label, | |
| inputs=input_text, | |
| outputs=prediction_output | |
| ) | |
| load_pred_button.click( | |
| fn=load_prediction_file, | |
| inputs=pred_file_input, | |
| outputs=[pred_status, pred_text_column_dropdown] | |
| ) | |
| batch_pred_button.click( | |
| fn=run_batch_prediction, | |
| inputs=pred_text_column_dropdown, | |
| outputs=[batch_pred_status, batch_pred_preview] | |
| ) | |
| export_button.click( | |
| fn=export_predictions, | |
| inputs=[], | |
| outputs=gr.File(file_types=[".xlsx"]) | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |