Spaces:
Sleeping
Sleeping
| """Shared constants and helpers for AspectBERT. | |
| Centralized here so data preparation, training, and inference all agree on | |
| aspect names, label encoding, and the model input format. | |
| """ | |
| import re | |
| ASPECTS = [ | |
| "battery", | |
| "display", | |
| "camera", | |
| "price", | |
| "performance", | |
| "design", | |
| "software", | |
| "customer_service", | |
| ] | |
| ASPECT_KEYWORDS = { | |
| "battery": [ | |
| "battery", "batteries", "charge", "charging", "charger", | |
| "battery life", "drain", "recharge", "mah", "power bank", | |
| ], | |
| "display": [ | |
| "screen", "display", "resolution", "brightness", "touchscreen", | |
| "touch screen", "pixel", "lcd", "oled", "monitor", | |
| ], | |
| "camera": [ | |
| "camera", "photo", "photos", "picture", "pictures", "lens", | |
| "video", "zoom", "image", "selfie", "megapixel", | |
| ], | |
| "price": [ | |
| "price", "priced", "cost", "expensive", "cheap", "affordable", | |
| "value", "worth", "money", "overpriced", "budget", "deal", | |
| ], | |
| "performance": [ | |
| "performance", "speed", "fast", "slow", "lag", "laggy", | |
| "processor", "smooth", "responsive", "cpu", "ram", "freeze", | |
| "hang", "loading", | |
| ], | |
| "design": [ | |
| "design", "look", "looks", "build quality", "feel", "weight", | |
| "size", "color", "colour", "material", "sleek", "compact", | |
| "appearance", "style", | |
| ], | |
| "software": [ | |
| "software", "app", "apps", "update", "interface", "ui", "os", | |
| "operating system", "bug", "glitch", "firmware", "android", "ios", | |
| ], | |
| "customer_service": [ | |
| "customer service", "support", "return", "returned", "refund", | |
| "warranty", "replacement", "seller", "customer support", | |
| "exchange", "delivery", | |
| ], | |
| } | |
| _ASPECT_PATTERNS = { | |
| aspect: re.compile( | |
| r"\b(?:" + "|".join(re.escape(kw) for kw in keywords) + r")\b", | |
| re.IGNORECASE, | |
| ) | |
| for aspect, keywords in ASPECT_KEYWORDS.items() | |
| } | |
| LABEL2ID = {"negative": 0, "neutral": 1, "positive": 2} | |
| ID2LABEL = {v: k for k, v in LABEL2ID.items()} | |
| NUM_LABELS = len(LABEL2ID) | |
| MODEL_NAME = "distilbert-base-uncased" | |
| MAX_LENGTH = 128 | |
| def rating_to_label(rating): | |
| """Map a 1-5 star rating to a sentiment label.""" | |
| rating = float(rating) | |
| if rating >= 4: | |
| return "positive" | |
| if rating == 3: | |
| return "neutral" | |
| return "negative" | |
| def detect_aspects(text): | |
| """Return the list of aspects whose keywords appear in `text` as whole words.""" | |
| return [aspect for aspect, pattern in _ASPECT_PATTERNS.items() if pattern.search(text)] | |
| def format_input(text, aspect): | |
| """Build the model input string: '{review_text} aspect: {aspect_name}'.""" | |
| return f"{text} aspect: {aspect}" | |