Update tasks/text.py
Browse files- tasks/text.py +16 -15
tasks/text.py
CHANGED
|
@@ -38,27 +38,13 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
| 38 |
}
|
| 39 |
|
| 40 |
# Load and prepare the dataset
|
| 41 |
-
dataset = load_dataset(request.dataset_name)
|
| 42 |
|
| 43 |
# Convert string labels to integers
|
| 44 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
| 45 |
|
| 46 |
# Split dataset
|
| 47 |
-
train_test = dataset["train"]
|
| 48 |
test_dataset = dataset["test"]
|
| 49 |
-
|
| 50 |
-
# Start tracking emissions
|
| 51 |
-
tracker.start()
|
| 52 |
-
tracker.start_task("inference")
|
| 53 |
-
|
| 54 |
-
#--------------------------------------------------------------------------------------------
|
| 55 |
-
# YOUR MODEL INFERENCE CODE HERE
|
| 56 |
-
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
| 57 |
-
#--------------------------------------------------------------------------------------------
|
| 58 |
-
|
| 59 |
-
# Make random predictions (placeholder for actual model inference)
|
| 60 |
-
#true_labels = test_dataset["label"]
|
| 61 |
-
#predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
|
| 62 |
|
| 63 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 64 |
import torch
|
|
@@ -77,6 +63,21 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
| 77 |
model.to(device)
|
| 78 |
model.eval() # Set to evaluation mode
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
# tokenize texts
|
| 82 |
test_encodings = tokenizer(test_dataset["quote"], padding='max_length', truncation=True, max_length=MAX_LENGTH, return_tensors="pt")
|
|
|
|
| 38 |
}
|
| 39 |
|
| 40 |
# Load and prepare the dataset
|
| 41 |
+
dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
|
| 42 |
|
| 43 |
# Convert string labels to integers
|
| 44 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
| 45 |
|
| 46 |
# Split dataset
|
|
|
|
| 47 |
test_dataset = dataset["test"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 50 |
import torch
|
|
|
|
| 63 |
model.to(device)
|
| 64 |
model.eval() # Set to evaluation mode
|
| 65 |
|
| 66 |
+
|
| 67 |
+
# Start tracking emissions
|
| 68 |
+
tracker.start()
|
| 69 |
+
tracker.start_task("inference")
|
| 70 |
+
|
| 71 |
+
#--------------------------------------------------------------------------------------------
|
| 72 |
+
# YOUR MODEL INFERENCE CODE HERE
|
| 73 |
+
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
| 74 |
+
#--------------------------------------------------------------------------------------------
|
| 75 |
+
|
| 76 |
+
# Make random predictions (placeholder for actual model inference)
|
| 77 |
+
#true_labels = test_dataset["label"]
|
| 78 |
+
#predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
|
| 79 |
+
|
| 80 |
+
|
| 81 |
|
| 82 |
# tokenize texts
|
| 83 |
test_encodings = tokenizer(test_dataset["quote"], padding='max_length', truncation=True, max_length=MAX_LENGTH, return_tensors="pt")
|