Spaces:
Paused
Paused
| from __future__ import annotations | |
| import re | |
| from datetime import date | |
| from typing import Dict, List, Optional, Sequence | |
| HOLIDAY_MONTH_MAP: Dict[str, int] = { | |
| "newyear": 1, | |
| "newyears": 1, | |
| "nye": 1, | |
| "valentine": 2, | |
| "valentines": 2, | |
| "stpatrick": 3, | |
| "easter": 4, | |
| "mothersday": 5, | |
| "memorial": 5, | |
| "juneteenth": 6, | |
| "pride": 6, | |
| "father": 6, | |
| "independence": 7, | |
| "july4": 7, | |
| "labor": 9, | |
| "halloween": 10, | |
| "thanksgiving": 11, | |
| "blackfriday": 11, | |
| "cybermonday": 11, | |
| "christmas": 12, | |
| "xmas": 12, | |
| "hanukkah": 12, | |
| } | |
| MONTH_KEYWORDS: Dict[str, int] = { | |
| "january": 1, | |
| "jan": 1, | |
| "february": 2, | |
| "feb": 2, | |
| "march": 3, | |
| "mar": 3, | |
| "april": 4, | |
| "apr": 4, | |
| "may": 5, | |
| "june": 6, | |
| "jun": 6, | |
| "july": 7, | |
| "jul": 7, | |
| "august": 8, | |
| "aug": 8, | |
| "september": 9, | |
| "sept": 9, | |
| "sep": 9, | |
| "october": 10, | |
| "oct": 10, | |
| "november": 11, | |
| "nov": 11, | |
| "december": 12, | |
| "dec": 12, | |
| } | |
| SEASON_TERMS = { | |
| "spring", | |
| "summer", | |
| "fall", | |
| "autumn", | |
| "winter", | |
| "backtoschool", | |
| "graduation", | |
| } | |
| VIRAL_TOKENS = { | |
| "fyp", | |
| "foryou", | |
| "foryoupage", | |
| "viral", | |
| "trending", | |
| "trend", | |
| "xyzbca", | |
| } | |
| ALNUM_RE = re.compile(r"[^a-z0-9]+") | |
| def normalize_token(token: str) -> str: | |
| return ALNUM_RE.sub("", (token or "").lower()) | |
| def parse_created_month(created_date: Optional[str]) -> Optional[int]: | |
| if not created_date: | |
| return None | |
| try: | |
| parts = str(created_date).split("T", 1)[0].split(" ", 1)[0].split("-", 2) | |
| if len(parts) >= 2: | |
| m = int(parts[1]) | |
| if 1 <= m <= 12: | |
| return m | |
| except Exception: | |
| return None | |
| return None | |
| def detect_month_from_token(token: str) -> Optional[int]: | |
| token = normalize_token(token) | |
| if not token: | |
| return None | |
| if token in MONTH_KEYWORDS: | |
| return MONTH_KEYWORDS[token] | |
| for holiday, month in HOLIDAY_MONTH_MAP.items(): | |
| if holiday in token: | |
| return month | |
| return None | |
| def _squash_hits(hits: int, *, base: float = 0.35, step: float = 0.15) -> float: | |
| if hits <= 0: | |
| return 0.0 | |
| return float(min(1.0, base + step * (hits - 1))) | |
| def compute_time_scores( | |
| tokens: Sequence[str], | |
| created_date: Optional[str], | |
| label_order: Sequence[str], | |
| ) -> List[float]: | |
| """ | |
| Derived time scores (not lexicon-mined). | |
| Supports the default System 7.1 time labels: ['seasonal', 'viral']. | |
| """ | |
| seasonal_hits = 0 | |
| viral_hits = 0 | |
| token_month_hits: Dict[int, int] = {} | |
| for tok in tokens or []: | |
| norm = normalize_token(str(tok)) | |
| if not norm: | |
| continue | |
| if norm in VIRAL_TOKENS: | |
| viral_hits += 1 | |
| if norm in SEASON_TERMS: | |
| seasonal_hits += 1 | |
| m = detect_month_from_token(norm) | |
| if m is not None: | |
| seasonal_hits += 1 | |
| token_month_hits[m] = token_month_hits.get(m, 0) + 1 | |
| seasonal = _squash_hits(seasonal_hits) | |
| viral = _squash_hits(viral_hits) | |
| created_month = parse_created_month(created_date) | |
| if created_month and token_month_hits: | |
| dominant_month = max(token_month_hits.items(), key=lambda kv: kv[1])[0] | |
| if dominant_month == created_month and seasonal > 0: | |
| seasonal = float(min(1.0, seasonal + 0.05)) | |
| by_label = {"seasonal": seasonal, "viral": viral} | |
| return [float(by_label.get(label, 0.0)) for label in label_order] | |