clementBE commited on
Commit
2b8217b
·
verified ·
1 Parent(s): ea1fb77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -41
app.py CHANGED
@@ -7,76 +7,75 @@ from sklearn.feature_extraction.text import TfidfVectorizer
7
  from sklearn.metrics import classification_report
8
 
9
  model = None
10
- X_test = None
11
- y_test = None
12
 
13
  def load_excel(file):
14
- # Read Excel file
15
  xls = pd.ExcelFile(file.name)
16
- # Just take first sheet to get columns
17
- df = pd.read_excel(xls, xls.sheet_names[0])
18
- columns = list(df.columns)
19
- return columns, xls.sheet_names
20
 
21
- def load_sheet(file, sheet_name):
22
  xls = pd.ExcelFile(file.name)
23
- df = pd.read_excel(xls, sheet_name)
24
- return df.head().to_dict(), list(df.columns)
 
 
 
 
 
25
 
26
  def train_model(file, sheet_name, text_col, target_col):
27
- global model, X_test, y_test
28
-
29
  xls = pd.ExcelFile(file.name)
30
- df = pd.read_excel(xls, sheet_name)
31
-
32
- # Drop rows with missing in selected columns
33
  df = df[[text_col, target_col]].dropna()
34
-
35
  X = df[text_col].astype(str)
36
  y = df[target_col].astype(str)
37
-
38
- # Split train/test for evaluation
39
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
40
 
41
- # Simple pipeline TFIDF + Logistic Regression
42
  model = make_pipeline(TfidfVectorizer(), LogisticRegression(max_iter=200))
43
  model.fit(X_train, y_train)
44
 
45
  y_pred = model.predict(X_test)
46
- report = classification_report(y_test, y_pred)
47
- return report
48
 
49
- def predict_text(text):
50
  global model
51
  if model is None:
52
  return "Please train the model first."
53
- pred = model.predict([text])
54
- return pred[0]
55
 
56
  with gr.Blocks() as demo:
57
- gr.Markdown("## Upload Excel training file")
58
- upload = gr.File(label="Upload XLSX file")
 
 
 
 
 
 
 
59
 
60
- cols_dropdown = gr.Dropdown(label="Select Category Column for Training")
61
- sheet_dropdown = gr.Dropdown(label="Select Sheet", interactive=True)
62
-
63
  train_btn = gr.Button("Train Model")
64
- output_train = gr.Textbox(label="Training Report", lines=10)
65
 
66
- text_input = gr.Textbox(label="Text to Classify")
67
- predict_btn = gr.Button("Predict")
68
- output_pred = gr.Textbox(label="Prediction")
69
 
70
- # When file uploaded, populate sheets dropdown
71
- upload.change(lambda f: load_excel(f), inputs=upload, outputs=[cols_dropdown, sheet_dropdown])
72
 
73
- # When sheet selected, load sheet to get columns for text + target
74
- sheet_dropdown.change(lambda f, s: load_sheet(f, s), inputs=[upload, sheet_dropdown], outputs=[output_train, cols_dropdown])
 
75
 
76
- # When train clicked, train the model using selected columns
77
- train_btn.click(train_model, inputs=[upload, sheet_dropdown, cols_dropdown, cols_dropdown], outputs=output_train)
 
78
 
79
- # Predict button
80
- predict_btn.click(predict_text, inputs=text_input, outputs=output_pred)
81
 
82
  demo.launch()
 
7
  from sklearn.metrics import classification_report
8
 
9
  model = None
 
 
10
 
11
  def load_excel(file):
 
12
  xls = pd.ExcelFile(file.name)
13
+ sheets = xls.sheet_names
14
+ return gr.update(choices=sheets, value=sheets[0]) # Set dropdown choices
 
 
15
 
16
+ def load_columns(file, sheet_name):
17
  xls = pd.ExcelFile(file.name)
18
+ df = pd.read_excel(xls, sheet_name=sheet_name)
19
+ columns = list(df.columns)
20
+ return (
21
+ gr.update(choices=columns, value=columns[0]),
22
+ gr.update(choices=columns, value=columns[1] if len(columns) > 1 else columns[0]),
23
+ df.head().to_markdown()
24
+ )
25
 
26
  def train_model(file, sheet_name, text_col, target_col):
27
+ global model
 
28
  xls = pd.ExcelFile(file.name)
29
+ df = pd.read_excel(xls, sheet_name=sheet_name)
 
 
30
  df = df[[text_col, target_col]].dropna()
 
31
  X = df[text_col].astype(str)
32
  y = df[target_col].astype(str)
33
+
 
34
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
35
 
 
36
  model = make_pipeline(TfidfVectorizer(), LogisticRegression(max_iter=200))
37
  model.fit(X_train, y_train)
38
 
39
  y_pred = model.predict(X_test)
40
+ return classification_report(y_test, y_pred)
 
41
 
42
+ def predict(text):
43
  global model
44
  if model is None:
45
  return "Please train the model first."
46
+ return model.predict([text])[0]
 
47
 
48
  with gr.Blocks() as demo:
49
+ gr.Markdown("## Excel Text Classifier")
50
+
51
+ with gr.Row():
52
+ file_input = gr.File(label="Upload Excel (.xlsx)")
53
+ sheet_dropdown = gr.Dropdown(label="Select Sheet")
54
+
55
+ with gr.Row():
56
+ text_col_dropdown = gr.Dropdown(label="Text Column")
57
+ target_col_dropdown = gr.Dropdown(label="Target Category Column")
58
 
59
+ sheet_preview = gr.Textbox(label="Sheet preview", lines=10)
 
 
60
  train_btn = gr.Button("Train Model")
61
+ train_output = gr.Textbox(label="Training Report", lines=10)
62
 
63
+ text_input = gr.Textbox(label="Enter text to classify")
64
+ pred_btn = gr.Button("Predict")
65
+ prediction_output = gr.Textbox(label="Prediction")
66
 
67
+ # File triggers sheet name dropdown
68
+ file_input.change(load_excel, inputs=file_input, outputs=sheet_dropdown)
69
 
70
+ # Sheet selection triggers column dropdowns and preview
71
+ sheet_dropdown.change(load_columns, inputs=[file_input, sheet_dropdown],
72
+ outputs=[text_col_dropdown, target_col_dropdown, sheet_preview])
73
 
74
+ # Train model
75
+ train_btn.click(train_model, inputs=[file_input, sheet_dropdown, text_col_dropdown, target_col_dropdown],
76
+ outputs=train_output)
77
 
78
+ # Predict
79
+ pred_btn.click(predict, inputs=text_input, outputs=prediction_output)
80
 
81
  demo.launch()