jrawa
/

fake-distilbert-3class

Text Classification

Model card Files Files and versions

fake-distilbert-3class / load.py

jrawa's picture

Upload load.py with huggingface_hub

2bbbe37 verified about 2 months ago

history blame contribute delete

1.33 kB

	import torch
	from model import FakeBERT

	MODEL_NAME = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
	MODEL_PATH = "distilbert_best.pth"
	MAX_LENGTH = 512
	NUM_CLASSES = 3
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


	def predict_veracity(texts, model, tokenizer, device, max_length=MAX_LENGTH):
	model.eval()
	id2label = {0: "F", 1: "U", 2: "T"}

	encodings = tokenizer(
	texts,
	padding=True,
	truncation=True,
	max_length=max_length,
	return_tensors="pt"
	)

	input_ids = encodings["input_ids"].to(device)
	attention_mask = encodings["attention_mask"].to(device)
	token_type_ids = encodings.get("token_type_ids")
	if token_type_ids is not None:
	token_type_ids = token_type_ids.to(device)

	with torch.inference_mode():
	logits = model(input_ids, attention_mask, token_type_ids)
	preds = torch.argmax(logits, dim=1).tolist()

	return [id2label.get(p, "U") for p in preds]




	# Load resources
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = FakeBERT(model_name=MODEL_NAME, num_classes=NUM_CLASSES).to(DEVICE)
	state_dict = torch.load(MODEL_PATH, map_location=DEVICE)
	model.load_state_dict(state_dict)

	# Label a list of texts
	labels = predict_sentiment(texts, model, tokenizer, DEVICE)