Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -89,7 +89,7 @@ def load_and_prepare_local_dataset(file_path, text_column, label_column, test_si
|
|
| 89 |
# Create train/validation split
|
| 90 |
train_df, val_df = train_test_split(
|
| 91 |
df,
|
| 92 |
-
test_size=
|
| 93 |
random_state=42,
|
| 94 |
stratify=df['label_idx']
|
| 95 |
)
|
|
@@ -635,8 +635,23 @@ def push_to_hub_after_training(model_path, username, model_name, token):
|
|
| 635 |
|
| 636 |
def count_tokens(text):
|
| 637 |
"""Count tokens in input text"""
|
| 638 |
-
|
|
|
|
| 639 |
return "Enter text to see token count"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
tokens = CURRENT_TOKENIZER(text, truncation=False)
|
| 641 |
count = len(tokens['input_ids'])
|
| 642 |
if count > 512:
|
|
@@ -673,7 +688,7 @@ if CURRENT_TOKENIZER is None:
|
|
| 673 |
CURRENT_TOKENIZER = AutoTokenizer.from_pretrained("bert-base-uncased")
|
| 674 |
print("β
Tokenizer initialized successfully")
|
| 675 |
except Exception as e:
|
| 676 |
-
print(f"β οΈ Warning: Could not initialize tokenizer: {e}")
|
| 677 |
|
| 678 |
print("π Launching BERT Complaint Classifier...")
|
| 679 |
print("π Available at: http://localhost:7860")
|
|
|
|
| 89 |
# Create train/validation split
|
| 90 |
train_df, val_df = train_test_split(
|
| 91 |
df,
|
| 92 |
+
test_size=0.2,
|
| 93 |
random_state=42,
|
| 94 |
stratify=df['label_idx']
|
| 95 |
)
|
|
|
|
| 635 |
|
| 636 |
def count_tokens(text):
|
| 637 |
"""Count tokens in input text"""
|
| 638 |
+
global CURRENT_TOKENIZER
|
| 639 |
+
if not text:
|
| 640 |
return "Enter text to see token count"
|
| 641 |
+
|
| 642 |
+
# Attempt to load a default tokenizer if it's not set
|
| 643 |
+
if CURRENT_TOKENIZER is None:
|
| 644 |
+
try:
|
| 645 |
+
CURRENT_TOKENIZER = AutoTokenizer.from_pretrained("bert-base-uncased")
|
| 646 |
+
logger.info("Fallback: Tokenizer loaded in count_tokens function.")
|
| 647 |
+
except Exception as e:
|
| 648 |
+
logger.error(f"Failed to load tokenizer in count_tokens fallback: {e}")
|
| 649 |
+
return "β Error: Tokenizer not loaded. Please load a model or check logs."
|
| 650 |
+
|
| 651 |
+
# If tokenizer is still None after fallback, something is seriously wrong
|
| 652 |
+
if CURRENT_TOKENIZER is None:
|
| 653 |
+
return "β Error: Tokenizer is still not available."
|
| 654 |
+
|
| 655 |
tokens = CURRENT_TOKENIZER(text, truncation=False)
|
| 656 |
count = len(tokens['input_ids'])
|
| 657 |
if count > 512:
|
|
|
|
| 688 |
CURRENT_TOKENIZER = AutoTokenizer.from_pretrained("bert-base-uncased")
|
| 689 |
print("β
Tokenizer initialized successfully")
|
| 690 |
except Exception as e:
|
| 691 |
+
print(f"β οΈ Warning: Could not initialize tokenizer globally: {e}")
|
| 692 |
|
| 693 |
print("π Launching BERT Complaint Classifier...")
|
| 694 |
print("π Available at: http://localhost:7860")
|