bert-sentiment-analyzer / predictor.py
SreyaDvn's picture
Update predictor.py
043fd70 verified
# predictor.py
import os
import torch
import re
from transformers import BertTokenizer, BertForSequenceClassification
# โœ… Redirect HuggingFace cache to a writable directory
os.environ["TRANSFORMERS_CACHE"] = "./hf_cache"
# โœ… Load model and tokenizer from Hugging Face Hub
MODEL_NAME = "SreyaDvn/sentiment-model"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
try:
print("๐Ÿš€ Loading tokenizer and model from Hugging Face Hub...")
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
model = BertForSequenceClassification.from_pretrained(MODEL_NAME)
model.to(device)
model.eval()
print("โœ… Model & tokenizer loaded successfully from Hugging Face.")
except Exception as e:
print("โŒ Error loading model/tokenizer:", e)
# ๐Ÿ” Text cleaner
def clean_text(text):
text = str(text).lower()
text = re.sub(r"http\S+", "", text)
text = re.sub(r"@\w+", "", text)
text = re.sub(r"#\w+", "", text)
text = re.sub(r"[^\w\s]", "", text)
text = re.sub(r"\d+", "", text)
text = re.sub(r"\s+", " ", text).strip()
return text
# ๐Ÿ”ฎ Sentiment prediction
def predict_sentiment(text):
try:
cleaned = clean_text(text)
inputs = tokenizer(cleaned, return_tensors="pt", truncation=True, padding=True, max_length=128)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
probs = torch.softmax(outputs.logits, dim=1).cpu().numpy()[0]
pred_class = probs.argmax()
sentiment = "Positive ๐Ÿ‘๐Ÿป" if pred_class == 1 else "Negative ๐Ÿ‘Ž๐Ÿป"
confidence = f"{probs[pred_class] * 100:.2f}%"
return sentiment, confidence
except Exception as e:
print("โŒ Prediction error:", e)
return "Error", "0.00%"