| | import pandas as pd |
| | from huggingface_hub import HfApi, login |
| | from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments |
| | from datasets import Dataset |
| |
|
| | |
| | login() |
| |
|
| | |
| | data = { |
| | 'text': ["I love programming!", "I hate bugs.", "Python is great.", "I dislike syntax errors."], |
| | 'label': [1, 0, 1, 0] |
| | } |
| | df = pd.DataFrame(data) |
| |
|
| | |
| | dataset = Dataset.from_pandas(df) |
| |
|
| | |
| | train_test_split = dataset.train_test_split(test_size=0.2) |
| | train_dataset = train_test_split['train'] |
| | eval_dataset = train_test_split['test'] |
| |
|
| | |
| | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') |
| |
|
| | def tokenize_function(examples): |
| | return tokenizer(examples["text"], padding="max_length", truncation=True) |
| |
|
| | |
| | tokenized_train = train_dataset.map(tokenize_function, batched=True) |
| | tokenized_eval = eval_dataset.map(tokenize_function, batched=True) |
| |
|
| | |
| | model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2) |
| |
|
| | |
| | training_args = TrainingArguments( |
| | output_dir='./results', |
| | evaluation_strategy="epoch", |
| | learning_rate=2e-5, |
| | per_device_train_batch_size=2, |
| | num_train_epochs=3, |
| | weight_decay=0.01, |
| | ) |
| |
|
| | |
| | trainer = Trainer( |
| | model=model, |
| | args=training_args, |
| | train_dataset=tokenized_train, |
| | eval_dataset=tokenized_eval, |
| | ) |
| |
|
| | trainer.train() |
| |
|
| | |
| | model.save_pretrained('./mytest-model') |
| | tokenizer.save_pretrained('./mytest-model') |
| |
|
| | |
| | model_id = "aslan-asilon3/mytest-model" |
| | api = HfApi() |
| |
|
| | |
| | try: |
| | api.create_repo(repo_id=model_id) |
| | except Exception as e: |
| | print(f"Repo mungkin sudah ada: {e}") |
| |
|
| | |
| | model.push_to_hub(model_id) |
| | tokenizer.push_to_hub(model_id) |
| |
|
| | print(f"Model berhasil diunggah ke Hugging Face: {model_id}") |
| |
|