clementBE commited on
Commit
95316bb
·
verified ·
1 Parent(s): def006a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -5
app.py CHANGED
@@ -2,10 +2,13 @@ import gradio as gr
2
  import pandas as pd
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.linear_model import LogisticRegression
 
 
5
 
6
  df_train = None
7
  model = None
8
  vectorizer = None
 
9
 
10
  def load_training_file(file):
11
  global df_train
@@ -18,7 +21,7 @@ def load_training_file(file):
18
  return f"✅ Loaded file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
19
 
20
  def train_model(text_column, target_column):
21
- global model, vectorizer
22
 
23
  if df_train is None:
24
  return "❌ No training data loaded."
@@ -28,14 +31,29 @@ def train_model(text_column, target_column):
28
 
29
  df_filtered = df_train.dropna(subset=[text_column, target_column])
30
 
 
 
 
 
 
31
  vectorizer = TfidfVectorizer()
32
- X = vectorizer.fit_transform(df_filtered[text_column])
33
- y = df_filtered[target_column]
34
 
35
  model = LogisticRegression(max_iter=1000)
36
- model.fit(X, y)
 
 
 
 
 
 
 
 
 
 
37
 
38
- return f"✅ Model trained on {len(df_filtered)} examples."
39
 
40
  def predict_label(text_input):
41
  if model is None or vectorizer is None:
 
2
  import pandas as pd
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.linear_model import LogisticRegression
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.metrics import classification_report, accuracy_score, precision_score
7
 
8
  df_train = None
9
  model = None
10
  vectorizer = None
11
+ test_metrics = None # To store metrics after training
12
 
13
  def load_training_file(file):
14
  global df_train
 
21
  return f"✅ Loaded file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])
22
 
23
  def train_model(text_column, target_column):
24
+ global model, vectorizer, test_metrics, df_train
25
 
26
  if df_train is None:
27
  return "❌ No training data loaded."
 
31
 
32
  df_filtered = df_train.dropna(subset=[text_column, target_column])
33
 
34
+ # Split train/test
35
+ X_train, X_test, y_train, y_test = train_test_split(
36
+ df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42
37
+ )
38
+
39
  vectorizer = TfidfVectorizer()
40
+ X_train_vec = vectorizer.fit_transform(X_train)
41
+ X_test_vec = vectorizer.transform(X_test)
42
 
43
  model = LogisticRegression(max_iter=1000)
44
+ model.fit(X_train_vec, y_train)
45
+
46
+ # Predict on test set
47
+ y_pred = model.predict(X_test_vec)
48
+
49
+ # Compute metrics
50
+ accuracy = accuracy_score(y_test, y_pred)
51
+ precision = precision_score(y_test, y_pred, average='weighted', zero_division=0) # weighted average for multiclass
52
+ report = classification_report(y_test, y_pred, zero_division=0)
53
+
54
+ test_metrics = f"Accuracy: {accuracy:.2%}\nPrecision (weighted): {precision:.2%}\n\nClassification Report:\n{report}"
55
 
56
+ return f"✅ Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}"
57
 
58
  def predict_label(text_input):
59
  if model is None or vectorizer is None: