import re from collections import Counter BUCKETS = ["pre-1900","1900–1945","1946–1990","1991–2008","2009–2015","2016–2018","2019–2022","2023–present"] # Very small keyword→bucket map to start (extend this over time) LEXICON = { "covid": "2019–2022", "covid-19": "2019–2022", "sars-cov-2": "2019–2022", "lockdown": "2019–2022", "n95": "2019–2022", "zoom": "2019–2022", "myspace": "1991–2008", "iraq war": "1991–2008", "y2k": "1991–2008", "tik tok": "2023–present", "tiktok": "2023–present", "chatgpt": "2023–present", "vietnam war": "1946–1990", "sputnik": "1946–1990", "cold war": "1946–1990", } def year_to_bucket(y: int) -> str: if y < 1900: return "pre-1900" if y <= 1945: return "1900–1945" if y <= 1990: return "1946–1990" if y <= 2008: return "1991–2008" if y <= 2015: return "2009–2015" if y <= 2018: return "2016–2018" if y <= 2022: return "2019–2022" return "2023–present" def predict_period(text: str): t = text.lower() # 1) explicit years years = [int(y) for y in re.findall(r"\b(1[89]\d{2}|20\d{2})\b", t)] bucket_votes = [] for y in years: bucket_votes.append(year_to_bucket(y)) # 2) keyword hits for k, b in LEXICON.items(): if k in t: bucket_votes.append(b) if not bucket_votes: # no clues → default to a broad recent bucket return "2023–present", {"reason": "no explicit clues", "votes": {}} counts = Counter(bucket_votes) best = counts.most_common(1)[0][0] return best, {"reason": "votes", "votes": dict(counts)} if __name__ == "__main__": txt = "Schools went remote during the pandemic and everyone wore N95 masks." pred, expl = predict_period(txt) print(pred, expl)