AspectBERT / src /constants.py
itismeTithi's picture
Deploy AspectBERT Streamlit app
31f6bcb
Raw
History Blame Contribute Delete
2.7 kB
"""Shared constants and helpers for AspectBERT.
Centralized here so data preparation, training, and inference all agree on
aspect names, label encoding, and the model input format.
"""
import re
ASPECTS = [
"battery",
"display",
"camera",
"price",
"performance",
"design",
"software",
"customer_service",
]
ASPECT_KEYWORDS = {
"battery": [
"battery", "batteries", "charge", "charging", "charger",
"battery life", "drain", "recharge", "mah", "power bank",
],
"display": [
"screen", "display", "resolution", "brightness", "touchscreen",
"touch screen", "pixel", "lcd", "oled", "monitor",
],
"camera": [
"camera", "photo", "photos", "picture", "pictures", "lens",
"video", "zoom", "image", "selfie", "megapixel",
],
"price": [
"price", "priced", "cost", "expensive", "cheap", "affordable",
"value", "worth", "money", "overpriced", "budget", "deal",
],
"performance": [
"performance", "speed", "fast", "slow", "lag", "laggy",
"processor", "smooth", "responsive", "cpu", "ram", "freeze",
"hang", "loading",
],
"design": [
"design", "look", "looks", "build quality", "feel", "weight",
"size", "color", "colour", "material", "sleek", "compact",
"appearance", "style",
],
"software": [
"software", "app", "apps", "update", "interface", "ui", "os",
"operating system", "bug", "glitch", "firmware", "android", "ios",
],
"customer_service": [
"customer service", "support", "return", "returned", "refund",
"warranty", "replacement", "seller", "customer support",
"exchange", "delivery",
],
}
_ASPECT_PATTERNS = {
aspect: re.compile(
r"\b(?:" + "|".join(re.escape(kw) for kw in keywords) + r")\b",
re.IGNORECASE,
)
for aspect, keywords in ASPECT_KEYWORDS.items()
}
LABEL2ID = {"negative": 0, "neutral": 1, "positive": 2}
ID2LABEL = {v: k for k, v in LABEL2ID.items()}
NUM_LABELS = len(LABEL2ID)
MODEL_NAME = "distilbert-base-uncased"
MAX_LENGTH = 128
def rating_to_label(rating):
"""Map a 1-5 star rating to a sentiment label."""
rating = float(rating)
if rating >= 4:
return "positive"
if rating == 3:
return "neutral"
return "negative"
def detect_aspects(text):
"""Return the list of aspects whose keywords appear in `text` as whole words."""
return [aspect for aspect, pattern in _ASPECT_PATTERNS.items() if pattern.search(text)]
def format_input(text, aspect):
"""Build the model input string: '{review_text} aspect: {aspect_name}'."""
return f"{text} aspect: {aspect}"