| | import os |
| | from uuid import uuid4 |
| |
|
| | from datasets import load_dataset |
| |
|
| | from autotrain.dataset import AutoTrainDataset |
| | from autotrain.project import Project |
| |
|
| |
|
| | RANDOM_ID = str(uuid4()) |
| | DATASET = "amazon_reviews_multi" |
| | PROJECT_NAME = f"amazon_reviews_multi_{RANDOM_ID}" |
| | TASK = "text_multi_class_classification" |
| | MODEL = "bert-base-uncased" |
| |
|
| | USERNAME = os.environ["AUTOTRAIN_USERNAME"] |
| | TOKEN = os.environ["HF_TOKEN"] |
| |
|
| |
|
| | if __name__ == "__main__": |
| | dataset = load_dataset(DATASET, "en") |
| | train = dataset["train"] |
| | validation = dataset["test"] |
| |
|
| | |
| | train_df = train.to_pandas() |
| | validation_df = validation.to_pandas() |
| |
|
| | |
| | dset = AutoTrainDataset( |
| | train_data=[train_df], |
| | valid_data=[validation_df], |
| | task=TASK, |
| | token=TOKEN, |
| | project_name=PROJECT_NAME, |
| | username=USERNAME, |
| | column_mapping={"text": "review_body", "label": "stars"}, |
| | percent_valid=None, |
| | ) |
| | dset.prepare() |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | job1 = { |
| | "task": TASK, |
| | "learning_rate": 1e-5, |
| | "optimizer": "adamw_torch", |
| | "scheduler": "linear", |
| | "epochs": 5, |
| | } |
| |
|
| | job2 = { |
| | "task": TASK, |
| | "learning_rate": 3e-5, |
| | "optimizer": "adamw_torch", |
| | "scheduler": "cosine", |
| | "epochs": 5, |
| | } |
| |
|
| | job3 = { |
| | "task": TASK, |
| | "learning_rate": 5e-5, |
| | "optimizer": "sgd", |
| | "scheduler": "cosine", |
| | "epochs": 5, |
| | } |
| |
|
| | jobs = [job1, job2, job3] |
| | project = Project(dataset=dset, hub_model=MODEL, job_params=jobs) |
| | project_id = project.create() |
| | project.approve(project_id) |
| |
|