Spaces:
Sleeping
Sleeping
update
Browse files- tasks/text.py +4 -2
tasks/text.py
CHANGED
|
@@ -51,6 +51,8 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
| 51 |
# Split dataset
|
| 52 |
train_test = dataset["train"]
|
| 53 |
test_dataset = dataset["test"]
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# Start tracking emissions
|
| 56 |
tracker.start()
|
|
@@ -68,10 +70,10 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
| 68 |
|
| 69 |
def tokenize_function(examples):
|
| 70 |
return tokenizer(examples["quote"], padding=True, truncation=True, return_tensors='pt')
|
| 71 |
-
|
| 72 |
# Tokenize the test dataset
|
| 73 |
tokenized_test = test_dataset.map(tokenize_function, batched=True)
|
| 74 |
-
|
| 75 |
# Create DataLoader
|
| 76 |
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
| 77 |
dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False, collate_fn=data_collator)
|
|
|
|
| 51 |
# Split dataset
|
| 52 |
train_test = dataset["train"]
|
| 53 |
test_dataset = dataset["test"]
|
| 54 |
+
print('dataset type: ' , test_dataset.column_names) # Debugging step
|
| 55 |
+
print('dataset type: ' , test_dataset['quote'][:5]) # Debugging step
|
| 56 |
|
| 57 |
# Start tracking emissions
|
| 58 |
tracker.start()
|
|
|
|
| 70 |
|
| 71 |
def tokenize_function(examples):
|
| 72 |
return tokenizer(examples["quote"], padding=True, truncation=True, return_tensors='pt')
|
| 73 |
+
print('BEFORE TOKENIZING')
|
| 74 |
# Tokenize the test dataset
|
| 75 |
tokenized_test = test_dataset.map(tokenize_function, batched=True)
|
| 76 |
+
print('AFTER TOKENIZING')
|
| 77 |
# Create DataLoader
|
| 78 |
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
| 79 |
dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False, collate_fn=data_collator)
|