Spaces:

msmaje
/

bert-complain-classifier

Sleeping

msmaje commited on Aug 24, 2025

Commit

e2aef65

verified ·

1 Parent(s): e0cb164

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -89,7 +89,7 @@ def load_and_prepare_local_dataset(file_path, text_column, label_column, test_si
         # Create train/validation split
         train_df, val_df = train_test_split(
             df,
-            test_size=test_size,
             random_state=42,
             stratify=df['label_idx']
         )
@@ -635,8 +635,23 @@ def push_to_hub_after_training(model_path, username, model_name, token):
 def count_tokens(text):
     """Count tokens in input text"""
-    if not text or CURRENT_TOKENIZER === None:
         return "Enter text to see token count"
     tokens = CURRENT_TOKENIZER(text, truncation=False)
     count = len(tokens['input_ids'])
     if count > 512:
@@ -673,7 +688,7 @@ if CURRENT_TOKENIZER is None:
         CURRENT_TOKENIZER = AutoTokenizer.from_pretrained("bert-base-uncased")
         print("✅ Tokenizer initialized successfully")
     except Exception as e:
-        print(f"⚠️ Warning: Could not initialize tokenizer: {e}")
 print("🚀 Launching BERT Complaint Classifier...")
 print("📍 Available at: http://localhost:7860")

         # Create train/validation split
         train_df, val_df = train_test_split(
             df,
+            test_size=0.2,
             random_state=42,
             stratify=df['label_idx']
         )
 def count_tokens(text):
     """Count tokens in input text"""
+    global CURRENT_TOKENIZER
+    if not text:
         return "Enter text to see token count"
+    # Attempt to load a default tokenizer if it's not set
+    if CURRENT_TOKENIZER is None:
+        try:
+            CURRENT_TOKENIZER = AutoTokenizer.from_pretrained("bert-base-uncased")
+            logger.info("Fallback: Tokenizer loaded in count_tokens function.")
+        except Exception as e:
+            logger.error(f"Failed to load tokenizer in count_tokens fallback: {e}")
+            return "❌ Error: Tokenizer not loaded. Please load a model or check logs."
+    # If tokenizer is still None after fallback, something is seriously wrong
+    if CURRENT_TOKENIZER is None:
+        return "❌ Error: Tokenizer is still not available."
     tokens = CURRENT_TOKENIZER(text, truncation=False)
     count = len(tokens['input_ids'])
     if count > 512:
         CURRENT_TOKENIZER = AutoTokenizer.from_pretrained("bert-base-uncased")
         print("✅ Tokenizer initialized successfully")
     except Exception as e:
+        print(f"⚠️ Warning: Could not initialize tokenizer globally: {e}")
 print("🚀 Launching BERT Complaint Classifier...")
 print("📍 Available at: http://localhost:7860")