Spaces:

clementBE
/

Trainer4Xlsx

Paused

App Files Files Community

clementBE commited on Jul 11, 2025

Commit

16b89ff

verified ·

1 Parent(s): 489d682

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -13

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import classification_report, accuracy_score, precision_score
-import os
 df_train = None
 model = None
@@ -24,6 +23,15 @@ def load_training_file(file):
     return f"✅ Loaded training file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
 def train_model(text_column, target_column):
     global model, vectorizer, test_metrics, df_train
@@ -35,6 +43,9 @@ def train_model(text_column, target_column):
     df_filtered = df_train.dropna(subset=[text_column, target_column])
     X_train, X_test, y_train, y_test = train_test_split(
         df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42
     )
@@ -52,7 +63,14 @@ def train_model(text_column, target_column):
     precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
     report = classification_report(y_test, y_pred, zero_division=0)
-    test_metrics = f"Accuracy: {accuracy:.2%}\nPrecision (weighted): {precision:.2%}\n\nClassification Report:\n{report}"
     return f"✅ Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}"
@@ -100,47 +118,85 @@ def export_predictions():
     global df_predict_results
     if df_predict_results is None:
         return None
-    # Save file locally - current working directory
-    export_path = "predictions_output.xlsx"
     df_predict_results.to_excel(export_path, index=False)
     return export_path
 with gr.Blocks() as demo:
     gr.Markdown("# 🧠 Text Classification App")
-    # Training data upload
     with gr.Row():
-        file_input = gr.File(label="Upload Training Excel File (.xlsx)", file_types=[".xlsx"])
         load_button = gr.Button("📂 Load Training File")
     status_output = gr.Markdown()
     with gr.Row():
-        text_column_dropdown = gr.Dropdown(label="Text column")
-        target_column_dropdown = gr.Dropdown(label="Target column")
     train_button = gr.Button("🚀 Train Model")
     training_status = gr.Markdown()
-    # Single prediction
     with gr.Row():
-        input_text = gr.Textbox(label="Enter text to classify")
         predict_button = gr.Button("🔍 Predict Single")
     prediction_output = gr.Markdown()
-    # Batch prediction upload
     with gr.Row():
         pred_file_input = gr.File(label="Upload Prediction Excel File (.xlsx)", file_types=[".xlsx"])
         load_pred_button = gr.Button("📂 Load Prediction File")
     pred_status = gr.Markdown()
-    pred_text_column_dropdown = gr.Dropdown(label="Text column for Prediction")
     batch_pred_button = gr.Button("⚡ Run Batch Prediction")
     batch_pred_status = gr.Markdown()
     batch_pred_preview = gr.Dataframe(headers=None, interactive=False)
     export_button = gr.Button("⬇️ Export Predictions")
     # Button connections
     load_button.click(
@@ -173,7 +229,6 @@ with gr.Blocks() as demo:
         outputs=[batch_pred_status, batch_pred_preview]
     )
-    # Export returns a downloadable file
     export_button.click(
         fn=export_predictions,
         inputs=[],

 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import classification_report, accuracy_score, precision_score
 df_train = None
 model = None
     return f"✅ Loaded training file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
+def interpret_score(score):
+    # Simple interpretation based on accuracy score
+    if score < 0.6:
+        return "🔴 The model performance is LOW. Consider improving your data or features."
+    elif score < 0.8:
+        return "🟠 The model performance is MODERATE. It may work but could be improved."
+    else:
+        return "🟢 The model performance is STRONG. The model is reliable."
 def train_model(text_column, target_column):
     global model, vectorizer, test_metrics, df_train
     df_filtered = df_train.dropna(subset=[text_column, target_column])
+    if len(df_filtered) < 10:
+        return "❌ Not enough data after filtering for training. Need at least 10 samples."
     X_train, X_test, y_train, y_test = train_test_split(
         df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42
     )
     precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
     report = classification_report(y_test, y_pred, zero_division=0)
+    performance_msg = interpret_score(accuracy)
+    test_metrics = (
+        f"Accuracy: {accuracy:.2%}\n"
+        f"Precision (weighted): {precision:.2%}\n\n"
+        f"{performance_msg}\n\n"
+        f"Classification Report:\n{report}"
+    )
     return f"✅ Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}"
     global df_predict_results
     if df_predict_results is None:
         return None
+    export_path = "/mnt/data/predictions_output.xlsx"  # Gradio environment allows writing here
     df_predict_results.to_excel(export_path, index=False)
     return export_path
 with gr.Blocks() as demo:
     gr.Markdown("# 🧠 Text Classification App")
+    gr.Markdown(
+        """
+        ### How does this model work?
+        This app uses a **Logistic Regression** model trained on your text data.
+        - Text data is transformed into numbers using **TF-IDF vectorization**, which converts text into features based on word importance.
+        - The model learns patterns from labeled examples you provide.
+        - After training, it can predict the label/category of new text inputs.
+        \n
+        **Note:** Model performance depends heavily on quality and quantity of your data.
+        """
+    )
+    gr.Markdown(
+        "### Step 1: Upload your training data\n"
+        "Upload an Excel file (`.xlsx`) containing your texts and corresponding labels."
+    )
     with gr.Row():
+        file_input = gr.File(label="Upload Training Excel File (.xlsx)", file_types=[".xlsx"],
+                             interactive=True)
         load_button = gr.Button("📂 Load Training File")
     status_output = gr.Markdown()
+    gr.Markdown(
+        "After loading, select the text and target columns for training."
+    )
     with gr.Row():
+        text_column_dropdown = gr.Dropdown(label="Text column",
+                                           interactive=True,
+                                           info="Select the column that contains the text data.")
+        target_column_dropdown = gr.Dropdown(label="Target column",
+                                             interactive=True,
+                                             info="Select the column that contains the labels to predict.")
     train_button = gr.Button("🚀 Train Model")
     training_status = gr.Markdown()
+    gr.Markdown(
+        "### Step 2: Predict on single texts\n"
+        "Enter a text below to get the model's predicted label."
+    )
     with gr.Row():
+        input_text = gr.Textbox(label="Enter text to classify", placeholder="Type some text here...")
         predict_button = gr.Button("🔍 Predict Single")
     prediction_output = gr.Markdown()
+    gr.Markdown(
+        "### Step 3: Batch prediction\n"
+        "Upload a new Excel file with texts to predict multiple labels at once."
+    )
     with gr.Row():
         pred_file_input = gr.File(label="Upload Prediction Excel File (.xlsx)", file_types=[".xlsx"])
         load_pred_button = gr.Button("📂 Load Prediction File")
     pred_status = gr.Markdown()
+    pred_text_column_dropdown = gr.Dropdown(label="Text column for Prediction",
+                                            info="Select the column in your prediction file containing text to classify.")
     batch_pred_button = gr.Button("⚡ Run Batch Prediction")
     batch_pred_status = gr.Markdown()
     batch_pred_preview = gr.Dataframe(headers=None, interactive=False)
     export_button = gr.Button("⬇️ Export Predictions")
+    gr.Markdown(
+        "Click **Export Predictions** to download the batch prediction results as an Excel file."
+    )
     # Button connections
     load_button.click(
         outputs=[batch_pred_status, batch_pred_preview]
     )
     export_button.click(
         fn=export_predictions,
         inputs=[],