DelaliScratchwerk commited on
Commit
5e572ba
·
verified ·
1 Parent(s): 3133f26

Create evidence.py

Browse files
Files changed (1) hide show
  1. evidence.py +27 -0
evidence.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # evidence.py
2
+ import re
3
+ from collections import defaultdict
4
+
5
+ # seed hints – extend over time
6
+ BUCKET_HINTS = {
7
+ "2019–2022": ["covid", "covid-19", "sars-cov-2", "lockdown", "n95", "zoom", "pfizer", "pcr", "remote school"],
8
+ "2023–present": ["chatgpt", "gpt-4", "llm", "generative ai", "tiktok", "threads", "hybrid work"],
9
+ "1991–2008": ["myspace", "y2k", "dial-up", "iraq war", "windows xp"],
10
+ "1946–1990": ["cold war", "sputnik", "berlin wall", "vietnam", "ussr"],
11
+ "1900–1945": ["world war i", "world war ii", "blitz", "rationing", "great depression"],
12
+ "2009–2015": ["iphone 4", "ipad", "app store", "snapchat", "vine"],
13
+ "2016–2018": ["fidget spinner", "fortnite", "instagram stories"],
14
+ "pre-1900": ["telegraph", "steam locomotive", "victorian"],
15
+ }
16
+
17
+ YEAR_RE = re.compile(r"\b(1[89]\d{2}|20\d{2})\b")
18
+
19
+ def extract_evidence(text: str):
20
+ t = text.lower()
21
+ years = YEAR_RE.findall(t)
22
+ hits = defaultdict(list)
23
+ for bucket, terms in BUCKET_HINTS.items():
24
+ for term in terms:
25
+ if term in t:
26
+ hits[bucket].append(term)
27
+ return {"years": years, "keyword_hits": dict(hits)}