Spaces:

clementBE
/

Trainer4Xlsx

Paused

App Files Files Community

clementBE commited on Jul 11, 2025

Commit

2b8217b

verified ·

1 Parent(s): ea1fb77

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -41

app.py CHANGED Viewed

@@ -7,76 +7,75 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics import classification_report
 model = None
-X_test = None
-y_test = None
 def load_excel(file):
-    # Read Excel file
     xls = pd.ExcelFile(file.name)
-    # Just take first sheet to get columns
-    df = pd.read_excel(xls, xls.sheet_names[0])
-    columns = list(df.columns)
-    return columns, xls.sheet_names
-def load_sheet(file, sheet_name):
     xls = pd.ExcelFile(file.name)
-    df = pd.read_excel(xls, sheet_name)
-    return df.head().to_dict(), list(df.columns)
 def train_model(file, sheet_name, text_col, target_col):
-    global model, X_test, y_test
     xls = pd.ExcelFile(file.name)
-    df = pd.read_excel(xls, sheet_name)
-    # Drop rows with missing in selected columns
     df = df[[text_col, target_col]].dropna()
     X = df[text_col].astype(str)
     y = df[target_col].astype(str)
-    # Split train/test for evaluation
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    # Simple pipeline TFIDF + Logistic Regression
     model = make_pipeline(TfidfVectorizer(), LogisticRegression(max_iter=200))
     model.fit(X_train, y_train)
     y_pred = model.predict(X_test)
-    report = classification_report(y_test, y_pred)
-    return report
-def predict_text(text):
     global model
     if model is None:
         return "Please train the model first."
-    pred = model.predict([text])
-    return pred[0]
 with gr.Blocks() as demo:
-    gr.Markdown("## Upload Excel training file")
-    upload = gr.File(label="Upload XLSX file")
-    cols_dropdown = gr.Dropdown(label="Select Category Column for Training")
-    sheet_dropdown = gr.Dropdown(label="Select Sheet", interactive=True)
     train_btn = gr.Button("Train Model")
-    output_train = gr.Textbox(label="Training Report", lines=10)
-    text_input = gr.Textbox(label="Text to Classify")
-    predict_btn = gr.Button("Predict")
-    output_pred = gr.Textbox(label="Prediction")
-    # When file uploaded, populate sheets dropdown
-    upload.change(lambda f: load_excel(f), inputs=upload, outputs=[cols_dropdown, sheet_dropdown])
-    # When sheet selected, load sheet to get columns for text + target
-    sheet_dropdown.change(lambda f, s: load_sheet(f, s), inputs=[upload, sheet_dropdown], outputs=[output_train, cols_dropdown])
-    # When train clicked, train the model using selected columns
-    train_btn.click(train_model, inputs=[upload, sheet_dropdown, cols_dropdown, cols_dropdown], outputs=output_train)
-    # Predict button
-    predict_btn.click(predict_text, inputs=text_input, outputs=output_pred)
 demo.launch()

 from sklearn.metrics import classification_report
 model = None
 def load_excel(file):
     xls = pd.ExcelFile(file.name)
+    sheets = xls.sheet_names
+    return gr.update(choices=sheets, value=sheets[0])  # Set dropdown choices
+def load_columns(file, sheet_name):
     xls = pd.ExcelFile(file.name)
+    df = pd.read_excel(xls, sheet_name=sheet_name)
+    columns = list(df.columns)
+    return (
+        gr.update(choices=columns, value=columns[0]),
+        gr.update(choices=columns, value=columns[1] if len(columns) > 1 else columns[0]),
+        df.head().to_markdown()
+    )
 def train_model(file, sheet_name, text_col, target_col):
+    global model
     xls = pd.ExcelFile(file.name)
+    df = pd.read_excel(xls, sheet_name=sheet_name)
     df = df[[text_col, target_col]].dropna()
     X = df[text_col].astype(str)
     y = df[target_col].astype(str)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     model = make_pipeline(TfidfVectorizer(), LogisticRegression(max_iter=200))
     model.fit(X_train, y_train)
     y_pred = model.predict(X_test)
+    return classification_report(y_test, y_pred)
+def predict(text):
     global model
     if model is None:
         return "Please train the model first."
+    return model.predict([text])[0]
 with gr.Blocks() as demo:
+    gr.Markdown("## Excel Text Classifier")
+    with gr.Row():
+        file_input = gr.File(label="Upload Excel (.xlsx)")
+        sheet_dropdown = gr.Dropdown(label="Select Sheet")
+    with gr.Row():
+        text_col_dropdown = gr.Dropdown(label="Text Column")
+        target_col_dropdown = gr.Dropdown(label="Target Category Column")
+    sheet_preview = gr.Textbox(label="Sheet preview", lines=10)
     train_btn = gr.Button("Train Model")
+    train_output = gr.Textbox(label="Training Report", lines=10)
+    text_input = gr.Textbox(label="Enter text to classify")
+    pred_btn = gr.Button("Predict")
+    prediction_output = gr.Textbox(label="Prediction")
+    # File triggers sheet name dropdown
+    file_input.change(load_excel, inputs=file_input, outputs=sheet_dropdown)
+    # Sheet selection triggers column dropdowns and preview
+    sheet_dropdown.change(load_columns, inputs=[file_input, sheet_dropdown],
+                          outputs=[text_col_dropdown, target_col_dropdown, sheet_preview])
+    # Train model
+    train_btn.click(train_model, inputs=[file_input, sheet_dropdown, text_col_dropdown, target_col_dropdown],
+                    outputs=train_output)
+    # Predict
+    pred_btn.click(predict, inputs=text_input, outputs=prediction_output)
 demo.launch()