Spaces:

clementBE
/

smart_xlsx

Sleeping

App Files Files Community

clementBE commited on Jul 11, 2025

Commit

3fb95a5

verified ·

1 Parent(s): d3a453e

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -31

app.py CHANGED Viewed

@@ -1,55 +1,49 @@
 import gradio as gr
 import pandas as pd
-import matplotlib.pyplot as plt
 from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import classification_report
-import io
 def load_data(file):
     if file is None:
-        return None, []
     try:
         if file.name.endswith(".csv"):
             df = pd.read_csv(file.name)
         else:
             df = pd.read_excel(file.name)
-        return df, list(df.columns)
     except Exception as e:
-        return None, []
 def train_model(df, target_col, feature_cols):
-    if df is None:
         return "Please upload a valid dataset first."
     if target_col not in df.columns:
         return "Target column not found in dataset."
     if not feature_cols:
         return "Please select at least one feature column."
-    # Drop rows with NA in selected columns
     df_clean = df[[target_col] + feature_cols].dropna()
     if df_clean.empty:
-        return "After removing rows with missing values, no data left to train."
     X = df_clean[feature_cols]
     y = df_clean[target_col]
-    # Simple check for classification: target should be categorical or integer
     if y.nunique() < 2:
-        return "Target column must have at least 2 unique classes for classification."
-    # Encode categorical features if any
     X_enc = pd.get_dummies(X)
     try:
-        X_train, X_test, y_train, y_test = train_test_split(
-            X_enc, y, test_size=0.2, random_state=42
-        )
     except ValueError as e:
         return f"Error splitting data: {e}"
     if X_train.shape[0] == 0 or X_test.shape[0] == 0:
-        return "Train or test split resulted in empty dataset. Try reducing test size or adding more data."
     model = RandomForestClassifier(random_state=42)
     model.fit(X_train, y_train)
@@ -59,29 +53,36 @@ def train_model(df, target_col, feature_cols):
     return report
 with gr.Blocks() as demo:
-    gr.Markdown("# XLSX/CSV Classifier with Sklearn")
     df_state = gr.State(None)
-    cols_state = gr.State([])
     with gr.Row():
-        file_input = gr.File(label="Upload CSV or Excel")
-        column_selector = gr.Dropdown(label="Target Column", interactive=True)
     with gr.Row():
-        features_selector = gr.CheckboxGroup(label="Feature Columns", interactive=True)
-    train_btn = gr.Button("Train Classifier")
-    output_text = gr.Textbox(label="Classification Report", lines=10)
-    def on_file_upload(file):
-        df, columns = load_data(file)
-        return df, columns, columns, []
-    file_input.change(on_file_upload, inputs=file_input, outputs=[df_state, column_selector, features_selector])
     train_btn.click(
-        train_model,
-        inputs=[df_state, column_selector, features_selector],
-        outputs=output_text,
     )
 demo.launch()

 import gradio as gr
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import classification_report
 def load_data(file):
     if file is None:
+        return None, [], pd.DataFrame()
     try:
         if file.name.endswith(".csv"):
             df = pd.read_csv(file.name)
         else:
             df = pd.read_excel(file.name)
+        columns = list(df.columns)
+        return df, columns, df.head(100)  # Show first 100 rows as preview
     except Exception as e:
+        return None, [], pd.DataFrame()
 def train_model(df, target_col, feature_cols):
+    if df is None or df.empty:
         return "Please upload a valid dataset first."
     if target_col not in df.columns:
         return "Target column not found in dataset."
     if not feature_cols:
         return "Please select at least one feature column."
     df_clean = df[[target_col] + feature_cols].dropna()
     if df_clean.empty:
+        return "No data left after removing missing values."
     X = df_clean[feature_cols]
     y = df_clean[target_col]
     if y.nunique() < 2:
+        return "Target must have at least 2 classes."
     X_enc = pd.get_dummies(X)
     try:
+        X_train, X_test, y_train, y_test = train_test_split(X_enc, y, test_size=0.2, random_state=42)
     except ValueError as e:
         return f"Error splitting data: {e}"
     if X_train.shape[0] == 0 or X_test.shape[0] == 0:
+        return "Empty train or test set after splitting."
     model = RandomForestClassifier(random_state=42)
     model.fit(X_train, y_train)
     return report
 with gr.Blocks() as demo:
+    gr.Markdown("# XLSX/CSV Classification App with Table Preview")
     df_state = gr.State(None)
     with gr.Row():
+        file_input = gr.File(label="Upload CSV or Excel file")
     with gr.Row():
+        table_preview = gr.DataFrame(headers=None, datatype=["str"], interactive=False, label="Data Preview")
+    with gr.Row():
+        target_col = gr.Dropdown(label="Select Target Column", choices=[])
+    with gr.Row():
+        feature_cols = gr.CheckboxGroup(label="Select Feature Columns", choices=[])
+    train_btn = gr.Button("Train Model")
+    output = gr.Textbox(label="Classification Report", lines=10)
+    def on_file_change(file):
+        df, columns, preview = load_data(file)
+        # Store df in state
+        return df, columns, columns, preview
+    file_input.change(
+        fn=on_file_change,
+        inputs=file_input,
+        outputs=[df_state, target_col, feature_cols, table_preview]
+    )
     train_btn.click(
+        fn=train_model,
+        inputs=[df_state, target_col, feature_cols],
+        outputs=output
     )
 demo.launch()