Review-RAG / src /inference.py
HariHaran9597
Initial commit
1d70196
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification
# Standard Categories for our ABSA Model
ASPECT_CATEGORIES = ['food', 'service', 'ambiance', 'price', 'anecdotes/miscellaneous']
LABELS = {0: 'positive', 1: 'negative', 2: 'neutral', 3: 'conflict'}
class ABSAPredictor:
def __init__(self, model_path: str = 'models/absa-roberta-final'):
"""
Initializes the ABSA predictor by loading the fine-tuned RoBERTa model and tokenizer.
"""
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
try:
self.tokenizer = RobertaTokenizer.from_pretrained(model_path)
self.model = RobertaForSequenceClassification.from_pretrained(model_path)
self.model = self.model.to(self.device)
self.model.eval()
self.is_loaded = True
print(f"Model successfully loaded from {model_path} onto {self.device}")
except Exception as e:
print(f"Warning: Could not load model from {model_path}. Error: {e}")
print("The predictor will run in MOCK mode until you place the trained model in the directory.")
self.is_loaded = False
def predict(self, review_text: str, confidence_threshold: float = 0.6) -> dict:
"""
Predicts sentiments for all aspect categories in a single review.
Args:
review_text (str): The raw text of the review.
confidence_threshold (float): Only return aspects where model confidence > threshold.
Returns:
dict: A dictionary mapping aspect -> {'sentiment': str, 'confidence': float}
"""
results = {}
# Fallback Mock Mode if model isn't built yet
if not self.is_loaded:
# Simple mock logic for demonstration before actual model is downloaded
lower_text = review_text.lower()
if 'pizza' in lower_text or 'food' in lower_text or 'delicious' in lower_text:
results['food'] = {'sentiment': 'positive', 'confidence': 0.95}
if 'waiter' in lower_text or 'slow' in lower_text or 'rude' in lower_text:
results['service'] = {'sentiment': 'negative', 'confidence': 0.88}
if 'expensive' in lower_text or 'cheap' in lower_text:
results['price'] = {'sentiment': 'negative' if 'expensive' in lower_text else 'positive', 'confidence': 0.82}
return results
# Actual Inference Loop
with torch.no_grad():
for aspect in ASPECT_CATEGORIES:
inputs = self.tokenizer(
review_text,
aspect,
truncation=True,
padding='max_length',
max_length=128,
return_tensors='pt'
)
inputs = {k: v.to(self.device) for k, v in inputs.items()}
outputs = self.model(**inputs)
probs = torch.softmax(outputs.logits, dim=1)
pred_label_idx = torch.argmax(probs).item()
confidence = probs[0][pred_label_idx].item()
# We only say an aspect is "mentioned" if the confidence of *any* sentiment is high enough
# and it's not overwhelmingly 'neutral' with low confidence
if confidence >= confidence_threshold:
results[aspect] = {
'sentiment': LABELS[pred_label_idx],
'confidence': round(confidence, 3)
}
return results
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--text', type=str, required=True, help="Review text to analyze")
args = parser.parse_args()
predictor = ABSAPredictor()
results = predictor.predict(args.text)
print(f"\nReview: '{args.text}'")
print("Detected Aspects & Sentiments:")
for aspect, data in results.items():
print(f" - {aspect.ljust(15)}: {data['sentiment'].ljust(10)} (conf: {data['confidence']})")
if not results:
print(" - No aspects detected above the confidence threshold.")