Spaces:

msmaje
/

bert-complain-classifier

Sleeping

App Files Files Community

msmaje commited on Aug 24, 2025

Commit

1a06556

verified ·

1 Parent(s): bdc1139

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -30

app.py CHANGED Viewed

@@ -10,14 +10,15 @@ from pathlib import Path
 from sklearn.metrics import accuracy_score, classification_report
 from sklearn.model_selection import train_test_split
-from huggingface_hub import login, HfApi
 from transformers import (
     AutoTokenizer,
     BertForSequenceClassification,
     TrainingArguments,
     Trainer,
     DataCollatorWithPadding,
-    EarlyStoppingCallback
 )
 from datasets import Dataset, DatasetDict
@@ -361,6 +362,42 @@ def train_model_inline(uploaded_file, text_column, label_column, num_epochs, bat
         # Data collator
         data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
         # Create trainer
         trainer = Trainer(
@@ -371,36 +408,9 @@ def train_model_inline(uploaded_file, text_column, label_column, num_epochs, bat
             tokenizer=tokenizer,
             data_collator=data_collator,
             compute_metrics=compute_metrics,
-            callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
         )
-        TRAINING_LOGS.append("🚀 Starting training...")
-        yield "\n".join(TRAINING_LOGS)
-        # Custom training loop with progress updates
-        class ProgressCallback:
-            def __init__(self, logs_list):
-                self.logs = logs_list
-                self.step_count = 0
-            def on_step_end(self, args, state, control, model=None, **kwargs):
-                self.step_count += 1
-                if self.step_count % logging_steps == 0:
-                    self.logs.append(f"Step {self.step_count}/{total_steps}")
-            def on_epoch_end(self, args, state, control, model=None, **kwargs):
-                epoch = int(state.epoch)
-                self.logs.append(f"✅ Epoch {epoch} completed")
-            def on_evaluate(self, args, state, control, model=None, logs=None, **kwargs):
-                if logs:
-                    acc = logs.get('eval_accuracy', 0)
-                    loss = logs.get('eval_loss', 0)
-                    self.logs.append(f"📊 Eval - Accuracy: {acc:.4f}, Loss: {loss:.4f}")
-        progress_callback = ProgressCallback(TRAINING_LOGS)
-        trainer.add_callback(progress_callback)
         # Train the model
         try:
             trainer.train()

 from sklearn.metrics import accuracy_score, classification_report
 from sklearn.model_selection import train_test_split
+from huggingface_hub import login
 from transformers import (
     AutoTokenizer,
     BertForSequenceClassification,
     TrainingArguments,
     Trainer,
     DataCollatorWithPadding,
+    EarlyStoppingCallback,
+    TrainerCallback
 )
 from datasets import Dataset, DatasetDict
         # Data collator
         data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+        # Corrected Callback Class
+        class ProgressCallback(TrainerCallback):
+            def __init__(self, logs_list, total_steps):
+                self.logs = logs_list
+                self.total_steps = total_steps
+            def on_train_begin(self, args, state, control, **kwargs):
+                self.logs.append("🚀 Starting training...")
+                self.log_update()
+            def on_step_end(self, args, state, control, **kwargs):
+                if state.global_step % args.logging_steps == 0:
+                    self.logs.append(f"Step {state.global_step}/{self.total_steps}")
+                    self.log_update()
+            def on_epoch_end(self, args, state, control, **kwargs):
+                epoch = int(state.epoch)
+                self.logs.append(f"✅ Epoch {epoch} completed")
+                self.log_update()
+            def on_evaluate(self, args, state, control, logs=None, **kwargs):
+                if logs:
+                    acc = logs.get('eval_accuracy', 0)
+                    loss = logs.get('eval_loss', 0)
+                    self.logs.append(f"📊 Eval - Accuracy: {acc:.4f}, Loss: {loss:.4f}")
+                    self.log_update()
+            def log_update(self):
+                # This is a custom helper to yield updates to the Gradio UI
+                # The original code did this manually, but with TrainerCallback,
+                # we can't do that. So we log to the list and rely on the UI
+                # to refresh. For a real-time stream, this part would need to be
+                # handled by Gradio's streaming feature, but this approach
+                # is sufficient for the user's current setup.
+                pass
         # Create trainer
         trainer = Trainer(
             tokenizer=tokenizer,
             data_collator=data_collator,
             compute_metrics=compute_metrics,
+            callbacks=[EarlyStoppingCallback(early_stopping_patience=3), ProgressCallback(TRAINING_LOGS, total_steps)]
         )
         # Train the model
         try:
             trainer.train()