Update app.py
Browse files
app.py
CHANGED
|
@@ -40,6 +40,11 @@ def setup_training():
|
|
| 40 |
# Print dataset structure
|
| 41 |
logging.info(f"Dataset columns: {dataset['train'].column_names}")
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
logging.info(f"Dataset loaded. Train size: {len(dataset['train'])}, Test size: {len(dataset['test'])}")
|
| 44 |
|
| 45 |
# Load tokenizer and model
|
|
@@ -47,7 +52,7 @@ def setup_training():
|
|
| 47 |
tokenizer = AutoTokenizer.from_pretrained(config['model_name'])
|
| 48 |
model = AutoModelForSequenceClassification.from_pretrained(
|
| 49 |
config['model_name'],
|
| 50 |
-
num_labels=
|
| 51 |
)
|
| 52 |
|
| 53 |
# Tokenize the dataset
|
|
|
|
| 40 |
# Print dataset structure
|
| 41 |
logging.info(f"Dataset columns: {dataset['train'].column_names}")
|
| 42 |
|
| 43 |
+
# Determine the number of unique labels
|
| 44 |
+
unique_labels = dataset['train'].unique(config['target_column'])
|
| 45 |
+
num_labels = len(unique_labels)
|
| 46 |
+
logging.info(f"Number of unique labels: {num_labels}")
|
| 47 |
+
|
| 48 |
logging.info(f"Dataset loaded. Train size: {len(dataset['train'])}, Test size: {len(dataset['test'])}")
|
| 49 |
|
| 50 |
# Load tokenizer and model
|
|
|
|
| 52 |
tokenizer = AutoTokenizer.from_pretrained(config['model_name'])
|
| 53 |
model = AutoModelForSequenceClassification.from_pretrained(
|
| 54 |
config['model_name'],
|
| 55 |
+
num_labels=num_labels
|
| 56 |
)
|
| 57 |
|
| 58 |
# Tokenize the dataset
|