uploaded files for tokenizer and handler

f026936 verified over 1 year ago

6.73 kB

	from typing import Dict, List, Any
	import torch
	import torch.nn as nn
	import json
	from transformers import pipeline, BertModel, AutoTokenizer, PretrainedConfig
	from huggingface_hub import PyTorchModelHubMixin

	class EndpointHandler():
	def __init__(self, path=""):
	# self.pipeline = pipeline("text-classification",model=path)
	# self.model = CustomModel("test_bert_config.json")
	# self.model.load_state_dict(torch.load("model3.pth"))
	config = {"bert_config": "test_bert_config.json"}
	self.model = CustomModel.from_pretrained("AbidHasan95/smsner_model2",**config)

	def __call__(self, data: Dict[str, Any])-> List[Dict[str, Any]]:
	"""
	data args:
	inputs (:obj: `str`)
	date (:obj: `str`)
	Return:
	A :obj:`list` \| `dict`: will be serialized and returned
	"""
	# get inputs
	inputs = data.pop("inputs",data)
	# date = data.pop("date", None)

	# check if date exists and if it is a holiday
	# if date is not None and date in self.holidays:
	# return [{"label": "happy", "score": 1}]


	# run normal prediction
	prediction = self.model(inputs)
	# prediction = json.dumps(prediction)
	return prediction

	class CustomModelOld(nn.Module):
	def __init__(self, bert_config):
	super(CustomModel, self).__init__()
	# self.bert = BertModel.from_pretrained(base_model_path)
	self.bert = BertModel._from_config(PretrainedConfig.from_json_file(bert_config))
	self.dropout = nn.Dropout(0.2)
	self.token_classifier = nn.Linear(self.bert.config.hidden_size, 16)
	self.sequence_classifier = nn.Linear(self.bert.config.hidden_size, 7)

	# Initialize weights
	nn.init.kaiming_normal_(self.token_classifier.weight, mode='fan_in', nonlinearity='linear')
	nn.init.kaiming_normal_(self.sequence_classifier.weight, mode='fan_in', nonlinearity='linear')
	self.seq_labels = [
	"Transaction",
	"Courier",
	"OTP",
	"Expiry",
	"Misc",
	"Tele Marketing",
	"Spam",
	]

	self.token_class_labels = [
	'O',
	'Courier Service',
	'Credit',
	'Date',
	'Debit',
	'Email',
	'Expiry',
	'Item',
	'Order ID',
	'Organization',
	'OTP',
	'Phone Number',
	'Refund',
	'Time',
	'Tracking ID',
	'URL',
	]
	base_model_path = '.'
	self.tokenizer = AutoTokenizer.from_pretrained(base_model_path)

	def forward(self, input_ids : torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor):
	outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
	sequence_output, pooled_output = outputs.last_hidden_state, outputs.pooler_output

	token_logits = self.token_classifier(self.dropout(sequence_output))
	sequence_logits = self.sequence_classifier(self.dropout(pooled_output))

	return token_logits, sequence_logits
	def classify(self, inputs):
	out = self.tokenizer(inputs, return_tensors="pt")
	token_classification_logits, sequence_logits = self.forward(**out)
	token_classification_logits = token_classification_logits.argmax(2)[0]
	sequence_logits = sequence_logits.argmax(1)[0]
	token_classification_out = [self.token_class_labels[i] for i in token_classification_logits.tolist()]
	seq_classification_out = self.seq_labels[sequence_logits]
	# return token_classification_out, seq_classification_out
	return {"token_classfier":token_classification_out, "sequence_classfier": seq_classification_out}

	class CustomModel(nn.Module, PyTorchModelHubMixin):
	def __init__(self, bert_config: str):
	super().__init__()
	# self.bert = BertModel.from_pretrained(base_model_path)
	self.bert = BertModel._from_config(PretrainedConfig.from_json_file(bert_config))
	self.dropout = nn.Dropout(0.2)
	self.token_classifier = nn.Linear(self.bert.config.hidden_size, 16)
	self.sequence_classifier = nn.Linear(self.bert.config.hidden_size, 7)

	# Initialize weights
	nn.init.kaiming_normal_(self.token_classifier.weight, mode='fan_in', nonlinearity='linear')
	nn.init.kaiming_normal_(self.sequence_classifier.weight, mode='fan_in', nonlinearity='linear')
	self.seq_labels = [
	"Transaction",
	"Courier",
	"OTP",
	"Expiry",
	"Misc",
	"Tele Marketing",
	"Spam",
	]

	self.token_class_labels = [
	'O',
	'Courier Service',
	'Credit',
	'Date',
	'Debit',
	'Email',
	'Expiry',
	'Item',
	'Order ID',
	'Organization',
	'OTP',
	'Phone Number',
	'Refund',
	'Time',
	'Tracking ID',
	'URL',
	]
	base_model_path = '.'
	self.tokenizer = AutoTokenizer.from_pretrained(base_model_path)

	# def forward(self, input_ids : torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor):
	# outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
	# sequence_output, pooled_output = outputs.last_hidden_state, outputs.pooler_output

	# token_classification_logits = self.token_classifier(self.dropout(sequence_output))
	# sequence_logits = self.sequence_classifier(self.dropout(pooled_output))

	# return token_classification_logits, sequence_logits
	def forward(self, inputs: str):
	out = self.tokenizer(inputs, return_tensors="pt")
	outputs = self.bert(**out)
	sequence_output, pooled_output = outputs.last_hidden_state, outputs.pooler_output

	token_classification_logits = self.token_classifier(self.dropout(sequence_output))
	sequence_logits = self.sequence_classifier(self.dropout(pooled_output))

	token_classification_logits = token_classification_logits.argmax(2)[0]
	sequence_logits = sequence_logits.argmax(1)[0]
	token_classification_out = [self.token_class_labels[i] for i in token_classification_logits.tolist()]
	seq_classification_out = self.seq_labels[sequence_logits]
	model_out = str({"token_classfier":token_classification_out, "sequence_classfier": seq_classification_out})
	return model_out