submission-template

Sleeping

App Files Files Community

NaolTaye commited on Feb 3, 2025

Commit

8021f3c

verified ·

1 Parent(s): 1814075

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +29 -6

tasks/text.py CHANGED Viewed

@@ -75,21 +75,44 @@ async def evaluate_text(request: TextEvaluationRequest):
     tokenized_test = test_dataset.map(lambda batch: tokenize_frugal(batch, tokenizer), batched=True)
-    dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False)
-    print("Started prediction run")
     model.eval()
     with torch.no_grad():
-        predictions = np.array([])
         for batch in dataloader:
-            test_input_ids = batch["input_ids"].to(device)
-            test_attention_mask = batch["attention_mask"].to(device)
-            outputs = model(test_input_ids, test_attention_mask)
             p = torch.argmax(outputs.logits, dim=1)
             predictions = np.append(predictions, p.cpu().numpy())
     print("Finished prediction run")
     # Make random predictions (placeholder for actual model inference)

     tokenized_test = test_dataset.map(lambda batch: tokenize_frugal(batch, tokenizer), batched=True)
+    # dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False)
+    # print("Started prediction run")
+    # model.eval()
+    # with torch.no_grad():
+    #     predictions = np.array([])
+    #     for batch in dataloader:
+    #         test_input_ids = batch["input_ids"].to(device)
+    #         test_attention_mask = batch["attention_mask"].to(device)
+    #         outputs = model(test_input_ids, test_attention_mask)
+    #         p = torch.argmax(outputs.logits, dim=1)
+    #         predictions = np.append(predictions, p.cpu().numpy())
+    # print("Finished prediction run")
+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+    # Create DataLoader
+    dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False, collate_fn=data_collator)
+    print("Started prediction run")
+    # Model inference
     model.eval()
+    predictions = np.array([])
     with torch.no_grad():
         for batch in dataloader:
+            batch = {k: v.to(device) for k, v in batch.items()}  # Move batch to GPU
+            outputs = model(**batch)  # Correct way to pass inputs
             p = torch.argmax(outputs.logits, dim=1)
             predictions = np.append(predictions, p.cpu().numpy())
     print("Finished prediction run")
+    # Ensure "label" column exists in dataset
+    print(test_dataset.column_names)  # Debugging step
     # Make random predictions (placeholder for actual model inference)