Spaces:

NightPrince
/

FinetuneForTextClassfication

Sleeping

App Files Files Community

NightPrince commited on Jun 22, 2025

Commit

8abaade

verified ·

1 Parent(s): da6e172

Create train.py

Browse files

Files changed (1) hide show

train.py +46 -0

train.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
+from peft import get_peft_model, LoraConfig, TaskType
+import os
+# Load SST2 dataset from GLUE (binary sentiment classification)
+dataset = load_dataset("glue", "sst2")
+# Use a small subset to stay within 25-minute budget
+small_train = dataset["train"].select(range(500))
+tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
+def tokenize_fn(batch):
+    return tokenizer(batch["sentence"], padding=True, truncation=True)
+tokenized_train = small_train.map(tokenize_fn, batched=True)
+# Load model and apply LoRA
+model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
+peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)
+model = get_peft_model(model, peft_config)
+# Hugging Face token from environment or manually
+hf_token = os.environ.get("HF_TOKEN") or "hf_xxx"  # replace with real token or set in Space secrets
+# Training arguments
+training_args = TrainingArguments(
+    output_dir="results",
+    per_device_train_batch_size=8,
+    num_train_epochs=1,
+    logging_dir="./logs",
+    logging_steps=10,
+    save_strategy="epoch",
+    push_to_hub=True,
+    hub_model_id="NightPrince/peft-distilbert-sst2",
+    hub_token=hf_token,
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_train,
+)
+trainer.train()