|
|
|
|
|
import re |
|
|
from collections import defaultdict |
|
|
|
|
|
|
|
|
BUCKET_HINTS = { |
|
|
"2019–2022": ["covid", "covid-19", "sars-cov-2", "lockdown", "n95", "zoom", "pfizer", "pcr", "remote school"], |
|
|
"2023–present": ["chatgpt", "gpt-4", "llm", "generative ai", "tiktok", "threads", "hybrid work"], |
|
|
"1991–2008": ["myspace", "y2k", "dial-up", "iraq war", "windows xp"], |
|
|
"1946–1990": ["cold war", "sputnik", "berlin wall", "vietnam", "ussr"], |
|
|
"1900–1945": ["world war i", "world war ii", "blitz", "rationing", "great depression"], |
|
|
"2009–2015": ["iphone 4", "ipad", "app store", "snapchat", "vine"], |
|
|
"2016–2018": ["fidget spinner", "fortnite", "instagram stories"], |
|
|
"pre-1900": ["telegraph", "steam locomotive", "victorian"], |
|
|
} |
|
|
|
|
|
YEAR_RE = re.compile(r"\b(1[89]\d{2}|20\d{2})\b") |
|
|
|
|
|
def extract_evidence(text: str): |
|
|
t = text.lower() |
|
|
years = YEAR_RE.findall(t) |
|
|
hits = defaultdict(list) |
|
|
for bucket, terms in BUCKET_HINTS.items(): |
|
|
for term in terms: |
|
|
if term in t: |
|
|
hits[bucket].append(term) |
|
|
return {"years": years, "keyword_hits": dict(hits)} |
|
|
|