Spaces:

Ahsamkk
/

SentimentAnalysisRomanUrdu

No application file

App Files Files Community

SentimentAnalysisRomanUrdu / train.py

Ahsamkk

Update train.py

68ef9e6 verified about 1 year ago

raw

history blame contribute delete

1.96 kB

	# Import necessary libraries
	from datasets import load_dataset
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

	# Load your dataset (assuming you uploaded it to Hugging Face)
	dataset = load_dataset("romanurdu_dataset")

	# Load pre-trained mBERT tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')
	model = AutoModelForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels=2)

	# Tokenize the dataset (adjust based on your dataset's structure)
	def tokenize_function(examples):
	return tokenizer(examples['Text'], padding="max_length", truncation=True)

	# Tokenize the datasets
	tokenized_datasets = dataset.map(tokenize_function, batched=True)

	# Split into train and test datasets (if not already split)
	train_dataset = tokenized_datasets['train']
	test_dataset = tokenized_datasets['test']

	# Define training arguments
	training_args = TrainingArguments(
	output_dir='./results', # output directory for model checkpoints
	evaluation_strategy="epoch", # evaluate after each epoch
	learning_rate=2e-5, # learning rate
	per_device_train_batch_size=16, # batch size for training
	per_device_eval_batch_size=64, # batch size for evaluation
	num_train_epochs=3, # number of epochs
	weight_decay=0.01, # strength of weight decay
	logging_dir='./logs', # directory to store logs
	)

	# Initialize Trainer
	trainer = Trainer(
	model=model, # the model to be trained
	args=training_args, # training arguments
	train_dataset=train_dataset, # training dataset
	eval_dataset=test_dataset # evaluation dataset
	)

	# Train the model
	trainer.train()

	# Save the model to Hugging Face Model Hub
	model.push_to_hub("SentimentAnalysisRomanUrdu")
	tokenizer.push_to_hub("SentimentAnalysisRomanUrdu")