Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -55,7 +55,9 @@ if uploaded_file is not None:
|
|
| 55 |
|
| 56 |
# Tokenize the dataset
|
| 57 |
def tokenize_function(examples):
|
| 58 |
-
|
|
|
|
|
|
|
| 59 |
|
| 60 |
tokenized_tweets = tweet_dataset.map(tokenize_function, batched=True)
|
| 61 |
|
|
|
|
| 55 |
|
| 56 |
# Tokenize the dataset
|
| 57 |
def tokenize_function(examples):
|
| 58 |
+
tokens = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
|
| 59 |
+
tokens["labels"] = tokens["input_ids"].copy() # Use input_ids as labels
|
| 60 |
+
return tokens
|
| 61 |
|
| 62 |
tokenized_tweets = tweet_dataset.map(tokenize_function, batched=True)
|
| 63 |
|