Spaces:

justhariharan
/

Review-RAG

Sleeping

HariHaran9597

Initial commit

1d70196 29 days ago

4.34 kB

	import torch
	from transformers import RobertaTokenizer, RobertaForSequenceClassification

	# Standard Categories for our ABSA Model
	ASPECT_CATEGORIES = ['food', 'service', 'ambiance', 'price', 'anecdotes/miscellaneous']
	LABELS = {0: 'positive', 1: 'negative', 2: 'neutral', 3: 'conflict'}

	class ABSAPredictor:
	def __init__(self, model_path: str = 'models/absa-roberta-final'):
	"""
	Initializes the ABSA predictor by loading the fine-tuned RoBERTa model and tokenizer.
	"""
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	try:
	self.tokenizer = RobertaTokenizer.from_pretrained(model_path)
	self.model = RobertaForSequenceClassification.from_pretrained(model_path)
	self.model = self.model.to(self.device)
	self.model.eval()
	self.is_loaded = True
	print(f"Model successfully loaded from {model_path} onto {self.device}")
	except Exception as e:
	print(f"Warning: Could not load model from {model_path}. Error: {e}")
	print("The predictor will run in MOCK mode until you place the trained model in the directory.")
	self.is_loaded = False

	def predict(self, review_text: str, confidence_threshold: float = 0.6) -> dict:
	"""
	Predicts sentiments for all aspect categories in a single review.

	Args:
	review_text (str): The raw text of the review.
	confidence_threshold (float): Only return aspects where model confidence > threshold.

	Returns:
	dict: A dictionary mapping aspect -> {'sentiment': str, 'confidence': float}
	"""
	results = {}

	# Fallback Mock Mode if model isn't built yet
	if not self.is_loaded:
	# Simple mock logic for demonstration before actual model is downloaded
	lower_text = review_text.lower()
	if 'pizza' in lower_text or 'food' in lower_text or 'delicious' in lower_text:
	results['food'] = {'sentiment': 'positive', 'confidence': 0.95}
	if 'waiter' in lower_text or 'slow' in lower_text or 'rude' in lower_text:
	results['service'] = {'sentiment': 'negative', 'confidence': 0.88}
	if 'expensive' in lower_text or 'cheap' in lower_text:
	results['price'] = {'sentiment': 'negative' if 'expensive' in lower_text else 'positive', 'confidence': 0.82}
	return results

	# Actual Inference Loop
	with torch.no_grad():
	for aspect in ASPECT_CATEGORIES:
	inputs = self.tokenizer(
	review_text,
	aspect,
	truncation=True,
	padding='max_length',
	max_length=128,
	return_tensors='pt'
	)

	inputs = {k: v.to(self.device) for k, v in inputs.items()}
	outputs = self.model(**inputs)

	probs = torch.softmax(outputs.logits, dim=1)
	pred_label_idx = torch.argmax(probs).item()
	confidence = probs[0][pred_label_idx].item()

	# We only say an aspect is "mentioned" if the confidence of any sentiment is high enough
	# and it's not overwhelmingly 'neutral' with low confidence
	if confidence >= confidence_threshold:
	results[aspect] = {
	'sentiment': LABELS[pred_label_idx],
	'confidence': round(confidence, 3)
	}

	return results

	if __name__ == "__main__":
	import argparse
	parser = argparse.ArgumentParser()
	parser.add_argument('--text', type=str, required=True, help="Review text to analyze")
	args = parser.parse_args()

	predictor = ABSAPredictor()
	results = predictor.predict(args.text)

	print(f"\nReview: '{args.text}'")
	print("Detected Aspects & Sentiments:")
	for aspect, data in results.items():
	print(f" - {aspect.ljust(15)}: {data['sentiment'].ljust(10)} (conf: {data['confidence']})")
	if not results:
	print(" - No aspects detected above the confidence threshold.")