ecroatt
/

imdb-bilstm-sentiment

sentiment-analysis

Eval Results (legacy)

Model card Files Files and versions

imdb-bilstm-sentiment / inference.py

ecroatt's picture

Initial push from Colab

9eced06 verified 10 months ago

history blame contribute delete

1.04 kB


	import torch, pickle, json, string, nltk
	from pathlib import Path
	from lstm_model import LSTMClassifier

	PAD = 0
	UNK = 1
	ROOT = Path(__file__).resolve().parent

	cfg = json.load(open(ROOT/'config.json'))
	vocab = pickle.load(open(ROOT/'vocab.pkl', 'rb'))

	model = LSTMClassifier(**cfg).eval()
	model.load_state_dict(torch.load(ROOT/'pytorch_model.bin', map_location='cpu'))

	nltk.download('stopwords', quiet=True)
	STOP = set(nltk.corpus.stopwords.words('english'))
	PUNC = str.maketrans('', '', string.punctuation)

	def preprocess(text):
	text = text.lower().translate(PUNC)
	toks = [w for w in text.split() if w not in STOP]
	return toks[: cfg['pad_len']]

	def encode(tokens):
	ids = [vocab.get(w, UNK) for w in tokens]
	ids += [PAD] * (cfg['pad_len'] - len(ids))
	return torch.tensor(ids).unsqueeze(0), torch.tensor([len(tokens)])

	@torch.no_grad()
	def predict(text):
	x, length = encode(preprocess(text))
	logit = model(x, length)
	prob = torch.sigmoid(logit).item()
	return prob # 0-1, >0.5 → positive