Spaces:

clementBE
/

Trainer4Xlsx

Paused

App Files Files Community

clementBE commited on Jul 11, 2025

Commit

6c51406

verified ·

1 Parent(s): e6b6ed3

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -45

app.py CHANGED Viewed

@@ -1,71 +1,72 @@
 import gradio as gr
 import pandas as pd
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.linear_model import LogisticRegression
-from sklearn.pipeline import Pipeline
-model = None
-target_column = None
 df_train = None
-def load_training_file(file_path):
-    xls = pd.ExcelFile(file_path)
-    sheet_names = xls.sheet_names
-    return gr.update(choices=sheet_names, value=sheet_names[0])  # return gr.update for dropdown
-def load_columns(sheet_name, file_path):
     global df_train
-    df_train = pd.read_excel(file_path, sheet_name=sheet_name)
-    column_names = df_train.columns.tolist()
-    return gr.update(choices=column_names, value=column_names[0])  # also update dropdown
-def train_model(column):
-    global model, target_column, df_train
-    if df_train is None or column not in df_train.columns:
-        return "Please load a valid training file and column."
-    target_column = column
-    df_filtered = df_train.dropna(subset=["Sentence", target_column])
-    X = df_filtered["Sentence"]
     y = df_filtered[target_column]
     model = Pipeline([
         ("tfidf", TfidfVectorizer()),
         ("clf", LogisticRegression(max_iter=1000))
     ])
     model.fit(X, y)
-    return f"✅ Model trained on {len(X)} samples for target column: {target_column}"
-def test_model(file_path):
-    global model, target_column
     if model is None:
-        return "⚠️ Please train a model first."
-    df_test = pd.read_excel(file_path)
-    if "Sentence" not in df_test.columns:
-        return "❌ Test file must contain a 'Sentence' column."
-    df_test["Predicted_" + target_column] = model.predict(df_test["Sentence"].fillna(""))
-    return df_test.head(20)
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Text Classifier with XLSX (Gradio)")
     with gr.Row():
-        training_file = gr.File(label="Upload Training XLSX")
-        load_btn = gr.Button("Load Sheets")
-    sheet_dropdown = gr.Dropdown(label="Select Sheet", choices=[])
-    column_dropdown = gr.Dropdown(label="Select Target Column", choices=[])
-    train_btn = gr.Button("Train Model")
-    status_output = gr.Textbox(label="Training Output")
     with gr.Row():
-        test_file = gr.File(label="Upload Test XLSX")
-        test_btn = gr.Button("Test Model")
-    prediction_output = gr.Dataframe(label="Predictions")
-    # Events
-    load_btn.click(fn=load_training_file, inputs=training_file, outputs=sheet_dropdown)
-    sheet_dropdown.change(fn=load_columns, inputs=[sheet_dropdown, training_file], outputs=column_dropdown)
-    train_btn.click(fn=train_model, inputs=column_dropdown, outputs=status_output)
-    test_btn.click(fn=test_model, inputs=test_file, outputs=prediction_output)
 demo.launch()

 import gradio as gr
 import pandas as pd
+from sklearn.pipeline import Pipeline
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.linear_model import LogisticRegression
+import tempfile
 df_train = None
+model = None
+def load_training_file(file):
     global df_train
+    if file is None:
+        return "❌ Please upload a file.", gr.update(choices=[], value=None), gr.update(choices=[], value=None)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
+        tmp.write(file.read_bytes())
+        tmp_path = tmp.name
+    df_train = pd.read_excel(tmp_path)
+    col_names = list(df_train.columns)
+    return f"✅ Loaded file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
+def train_model(text_column, target_column):
+    global model, df_train
+    if df_train is None:
+        return "⚠️ Please load a training file first."
+    if text_column not in df_train.columns or target_column not in df_train.columns:
+        return "❌ Selected columns not found in the data."
+    df_filtered = df_train.dropna(subset=[text_column, target_column])
+    if df_filtered.empty:
+        return "❌ No valid data after dropping missing values."
+    X = df_filtered[text_column]
     y = df_filtered[target_column]
     model = Pipeline([
         ("tfidf", TfidfVectorizer()),
         ("clf", LogisticRegression(max_iter=1000))
     ])
     model.fit(X, y)
+    return f"✅ Model trained with {len(X)} samples."
+def predict(text):
     if model is None:
+        return "⚠️ Please train the model first."
+    return model.predict([text])[0]
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Text Classification Trainer")
     with gr.Row():
+        training_file = gr.File(label="Upload Excel file (.xlsx)")
+        status = gr.Textbox(label="Status", interactive=False)
     with gr.Row():
+        text_column = gr.Dropdown(choices=[], label="Select Text Column")
+        target_column = gr.Dropdown(choices=[], label="Select Target Column")
+        train_btn = gr.Button("Train Model")
+    with gr.Row():
+        input_text = gr.Textbox(label="Enter text to predict")
+        output_label = gr.Textbox(label="Predicted Label", interactive=False)
+        predict_btn = gr.Button("Predict")
+    training_file.change(fn=load_training_file, inputs=[training_file], outputs=[status, text_column, target_column])
+    train_btn.click(fn=train_model, inputs=[text_column, target_column], outputs=[status])
+    predict_btn.click(fn=predict, inputs=[input_text], outputs=[output_label])
 demo.launch()