aslan-asilon3 commited on
Commit
f739329
·
verified ·
1 Parent(s): e4bd0da

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +75 -0
main.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from huggingface_hub import HfApi, login
3
+ from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
4
+ from datasets import Dataset
5
+
6
+ # Log in to Hugging Face
7
+ login() # Pastikan kamu sudah login ke akun Hugging Face
8
+
9
+ # Load dataset
10
+ data = {
11
+ 'text': ["I love programming!", "I hate bugs.", "Python is great.", "I dislike syntax errors."],
12
+ 'label': [1, 0, 1, 0] # 1 untuk sentimen positif, 0 untuk sentimen negatif
13
+ }
14
+ df = pd.DataFrame(data)
15
+
16
+ # Convert to Hugging Face dataset
17
+ dataset = Dataset.from_pandas(df)
18
+
19
+ # Split the dataset into training and evaluation sets
20
+ train_test_split = dataset.train_test_split(test_size=0.2) # 80% train, 20% eval
21
+ train_dataset = train_test_split['train']
22
+ eval_dataset = train_test_split['test']
23
+
24
+ # Tokenize the text
25
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
26
+
27
+ def tokenize_function(examples):
28
+ return tokenizer(examples["text"], padding="max_length", truncation=True)
29
+
30
+ # Tokenisasi data training dan evaluasi
31
+ tokenized_train = train_dataset.map(tokenize_function, batched=True)
32
+ tokenized_eval = eval_dataset.map(tokenize_function, batched=True)
33
+
34
+ # Define model
35
+ model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
36
+
37
+ # Set training arguments
38
+ training_args = TrainingArguments(
39
+ output_dir='./results',
40
+ evaluation_strategy="epoch",
41
+ learning_rate=2e-5,
42
+ per_device_train_batch_size=2,
43
+ num_train_epochs=3,
44
+ weight_decay=0.01,
45
+ )
46
+
47
+ # Train the model
48
+ trainer = Trainer(
49
+ model=model,
50
+ args=training_args,
51
+ train_dataset=tokenized_train,
52
+ eval_dataset=tokenized_eval, # Menyediakan dataset evaluasi di sini
53
+ )
54
+
55
+ trainer.train()
56
+
57
+ # Save the model
58
+ model.save_pretrained('./mytest-model')
59
+ tokenizer.save_pretrained('./mytest-model')
60
+
61
+ # Define model ID
62
+ model_id = "aslan-asilon3/mytest-model"
63
+ api = HfApi()
64
+
65
+ # Create a new repo on Hugging Face if it doesn't already exist
66
+ try:
67
+ api.create_repo(repo_id=model_id)
68
+ except Exception as e:
69
+ print(f"Repo mungkin sudah ada: {e}")
70
+
71
+ # Upload model dan tokenizer
72
+ model.push_to_hub(model_id)
73
+ tokenizer.push_to_hub(model_id)
74
+
75
+ print(f"Model berhasil diunggah ke Hugging Face: {model_id}")