Spaces:
Runtime error
Runtime error
File size: 1,596 Bytes
69914b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import os
import mlflow
import yaml
from transformers import (
Trainer,
TrainingArguments,
AutoModelForSequenceClassification,
AutoTokenizer
)
from datasets import load_dataset
from sklearn.metrics import accuracy_score
import torch
def compute_metrics(eval_pred):
logits, labels = eval_pred
preds = torch.argmax(torch.tensor(logits), axis=1)
acc = accuracy_score(labels, preds)
return {"accuracy": acc}
def load_config():
with open("configs/training_config.yaml") as f:
return yaml.safe_load(f)
def main():
cfg = load_config()
mlflow.set_experiment("huggingface-fulltrack-clone")
with mlflow.start_run():
tokenizer = AutoTokenizer.from_pretrained(cfg["model_name"])
model = AutoModelForSequenceClassification.from_pretrained(cfg["model_name"], num_labels=2)
dataset = load_dataset("imdb").map(lambda e: tokenizer(e["text"], truncation=True, padding="max_length"), batched=True)
dataset = dataset.rename_column("label", "labels").with_format("torch")
training_args = TrainingArguments(**cfg)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset["train"].shuffle(seed=42).select(range(2000)),
eval_dataset=dataset["test"].select(range(1000)),
tokenizer=tokenizer,
compute_metrics=compute_metrics,
)
trainer.train()
trainer.evaluate()
mlflow.log_params(cfg)
mlflow.pytorch.log_model(model, artifact_path="model")
if __name__ == "__main__":
main()
|