msmaje commited on
Commit
e2aef65
Β·
verified Β·
1 Parent(s): e0cb164

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -3
app.py CHANGED
@@ -89,7 +89,7 @@ def load_and_prepare_local_dataset(file_path, text_column, label_column, test_si
89
  # Create train/validation split
90
  train_df, val_df = train_test_split(
91
  df,
92
- test_size=test_size,
93
  random_state=42,
94
  stratify=df['label_idx']
95
  )
@@ -635,8 +635,23 @@ def push_to_hub_after_training(model_path, username, model_name, token):
635
 
636
  def count_tokens(text):
637
  """Count tokens in input text"""
638
- if not text or CURRENT_TOKENIZER === None:
 
639
  return "Enter text to see token count"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
  tokens = CURRENT_TOKENIZER(text, truncation=False)
641
  count = len(tokens['input_ids'])
642
  if count > 512:
@@ -673,7 +688,7 @@ if CURRENT_TOKENIZER is None:
673
  CURRENT_TOKENIZER = AutoTokenizer.from_pretrained("bert-base-uncased")
674
  print("βœ… Tokenizer initialized successfully")
675
  except Exception as e:
676
- print(f"⚠️ Warning: Could not initialize tokenizer: {e}")
677
 
678
  print("πŸš€ Launching BERT Complaint Classifier...")
679
  print("πŸ“ Available at: http://localhost:7860")
 
89
  # Create train/validation split
90
  train_df, val_df = train_test_split(
91
  df,
92
+ test_size=0.2,
93
  random_state=42,
94
  stratify=df['label_idx']
95
  )
 
635
 
636
  def count_tokens(text):
637
  """Count tokens in input text"""
638
+ global CURRENT_TOKENIZER
639
+ if not text:
640
  return "Enter text to see token count"
641
+
642
+ # Attempt to load a default tokenizer if it's not set
643
+ if CURRENT_TOKENIZER is None:
644
+ try:
645
+ CURRENT_TOKENIZER = AutoTokenizer.from_pretrained("bert-base-uncased")
646
+ logger.info("Fallback: Tokenizer loaded in count_tokens function.")
647
+ except Exception as e:
648
+ logger.error(f"Failed to load tokenizer in count_tokens fallback: {e}")
649
+ return "❌ Error: Tokenizer not loaded. Please load a model or check logs."
650
+
651
+ # If tokenizer is still None after fallback, something is seriously wrong
652
+ if CURRENT_TOKENIZER is None:
653
+ return "❌ Error: Tokenizer is still not available."
654
+
655
  tokens = CURRENT_TOKENIZER(text, truncation=False)
656
  count = len(tokens['input_ids'])
657
  if count > 512:
 
688
  CURRENT_TOKENIZER = AutoTokenizer.from_pretrained("bert-base-uncased")
689
  print("βœ… Tokenizer initialized successfully")
690
  except Exception as e:
691
+ print(f"⚠️ Warning: Could not initialize tokenizer globally: {e}")
692
 
693
  print("πŸš€ Launching BERT Complaint Classifier...")
694
  print("πŸ“ Available at: http://localhost:7860")