Oranblock commited on
Commit
af5b0b6
·
verified ·
1 Parent(s): 249cbf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -40,6 +40,11 @@ def setup_training():
40
  # Print dataset structure
41
  logging.info(f"Dataset columns: {dataset['train'].column_names}")
42
 
 
 
 
 
 
43
  logging.info(f"Dataset loaded. Train size: {len(dataset['train'])}, Test size: {len(dataset['test'])}")
44
 
45
  # Load tokenizer and model
@@ -47,7 +52,7 @@ def setup_training():
47
  tokenizer = AutoTokenizer.from_pretrained(config['model_name'])
48
  model = AutoModelForSequenceClassification.from_pretrained(
49
  config['model_name'],
50
- num_labels=len(dataset['train'].features[config['target_column']].names)
51
  )
52
 
53
  # Tokenize the dataset
 
40
  # Print dataset structure
41
  logging.info(f"Dataset columns: {dataset['train'].column_names}")
42
 
43
+ # Determine the number of unique labels
44
+ unique_labels = dataset['train'].unique(config['target_column'])
45
+ num_labels = len(unique_labels)
46
+ logging.info(f"Number of unique labels: {num_labels}")
47
+
48
  logging.info(f"Dataset loaded. Train size: {len(dataset['train'])}, Test size: {len(dataset['test'])}")
49
 
50
  # Load tokenizer and model
 
52
  tokenizer = AutoTokenizer.from_pretrained(config['model_name'])
53
  model = AutoModelForSequenceClassification.from_pretrained(
54
  config['model_name'],
55
+ num_labels=num_labels
56
  )
57
 
58
  # Tokenize the dataset