|
|
import re |
|
|
from collections import Counter |
|
|
|
|
|
BUCKETS = ["pre-1900","1900–1945","1946–1990","1991–2008","2009–2015","2016–2018","2019–2022","2023–present"] |
|
|
|
|
|
|
|
|
LEXICON = { |
|
|
"covid": "2019–2022", "covid-19": "2019–2022", "sars-cov-2": "2019–2022", |
|
|
"lockdown": "2019–2022", "n95": "2019–2022", "zoom": "2019–2022", |
|
|
"myspace": "1991–2008", "iraq war": "1991–2008", "y2k": "1991–2008", |
|
|
"tik tok": "2023–present", "tiktok": "2023–present", "chatgpt": "2023–present", |
|
|
"vietnam war": "1946–1990", "sputnik": "1946–1990", "cold war": "1946–1990", |
|
|
} |
|
|
|
|
|
def year_to_bucket(y: int) -> str: |
|
|
if y < 1900: return "pre-1900" |
|
|
if y <= 1945: return "1900–1945" |
|
|
if y <= 1990: return "1946–1990" |
|
|
if y <= 2008: return "1991–2008" |
|
|
if y <= 2015: return "2009–2015" |
|
|
if y <= 2018: return "2016–2018" |
|
|
if y <= 2022: return "2019–2022" |
|
|
return "2023–present" |
|
|
|
|
|
def predict_period(text: str): |
|
|
t = text.lower() |
|
|
|
|
|
|
|
|
years = [int(y) for y in re.findall(r"\b(1[89]\d{2}|20\d{2})\b", t)] |
|
|
bucket_votes = [] |
|
|
for y in years: |
|
|
bucket_votes.append(year_to_bucket(y)) |
|
|
|
|
|
|
|
|
for k, b in LEXICON.items(): |
|
|
if k in t: |
|
|
bucket_votes.append(b) |
|
|
|
|
|
if not bucket_votes: |
|
|
|
|
|
return "2023–present", {"reason": "no explicit clues", "votes": {}} |
|
|
|
|
|
counts = Counter(bucket_votes) |
|
|
best = counts.most_common(1)[0][0] |
|
|
return best, {"reason": "votes", "votes": dict(counts)} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
txt = "Schools went remote during the pandemic and everyone wore N95 masks." |
|
|
pred, expl = predict_period(txt) |
|
|
print(pred, expl) |
|
|
|