"""Shared constants and helpers for AspectBERT. Centralized here so data preparation, training, and inference all agree on aspect names, label encoding, and the model input format. """ import re ASPECTS = [ "battery", "display", "camera", "price", "performance", "design", "software", "customer_service", ] ASPECT_KEYWORDS = { "battery": [ "battery", "batteries", "charge", "charging", "charger", "battery life", "drain", "recharge", "mah", "power bank", ], "display": [ "screen", "display", "resolution", "brightness", "touchscreen", "touch screen", "pixel", "lcd", "oled", "monitor", ], "camera": [ "camera", "photo", "photos", "picture", "pictures", "lens", "video", "zoom", "image", "selfie", "megapixel", ], "price": [ "price", "priced", "cost", "expensive", "cheap", "affordable", "value", "worth", "money", "overpriced", "budget", "deal", ], "performance": [ "performance", "speed", "fast", "slow", "lag", "laggy", "processor", "smooth", "responsive", "cpu", "ram", "freeze", "hang", "loading", ], "design": [ "design", "look", "looks", "build quality", "feel", "weight", "size", "color", "colour", "material", "sleek", "compact", "appearance", "style", ], "software": [ "software", "app", "apps", "update", "interface", "ui", "os", "operating system", "bug", "glitch", "firmware", "android", "ios", ], "customer_service": [ "customer service", "support", "return", "returned", "refund", "warranty", "replacement", "seller", "customer support", "exchange", "delivery", ], } _ASPECT_PATTERNS = { aspect: re.compile( r"\b(?:" + "|".join(re.escape(kw) for kw in keywords) + r")\b", re.IGNORECASE, ) for aspect, keywords in ASPECT_KEYWORDS.items() } LABEL2ID = {"negative": 0, "neutral": 1, "positive": 2} ID2LABEL = {v: k for k, v in LABEL2ID.items()} NUM_LABELS = len(LABEL2ID) MODEL_NAME = "distilbert-base-uncased" MAX_LENGTH = 128 def rating_to_label(rating): """Map a 1-5 star rating to a sentiment label.""" rating = float(rating) if rating >= 4: return "positive" if rating == 3: return "neutral" return "negative" def detect_aspects(text): """Return the list of aspects whose keywords appear in `text` as whole words.""" return [aspect for aspect, pattern in _ASPECT_PATTERNS.items() if pattern.search(text)] def format_input(text, aspect): """Build the model input string: '{review_text} aspect: {aspect_name}'.""" return f"{text} aspect: {aspect}"