Spaces:

DelaliScratchwerk
/

TextPeriod_Summarization

Sleeping

App Files Files Community

TextPeriod_Summarization / mvp_temporal.py

DelaliScratchwerk

Upload 5 files

410a5d4 verified 5 months ago

raw

history blame contribute delete

1.8 kB

	import re
	from collections import Counter

	BUCKETS = ["pre-1900","1900–1945","1946–1990","1991–2008","2009–2015","2016–2018","2019–2022","2023–present"]

	# Very small keyword→bucket map to start (extend this over time)
	LEXICON = {
	"covid": "2019–2022", "covid-19": "2019–2022", "sars-cov-2": "2019–2022",
	"lockdown": "2019–2022", "n95": "2019–2022", "zoom": "2019–2022",
	"myspace": "1991–2008", "iraq war": "1991–2008", "y2k": "1991–2008",
	"tik tok": "2023–present", "tiktok": "2023–present", "chatgpt": "2023–present",
	"vietnam war": "1946–1990", "sputnik": "1946–1990", "cold war": "1946–1990",
	}

	def year_to_bucket(y: int) -> str:
	if y < 1900: return "pre-1900"
	if y <= 1945: return "1900–1945"
	if y <= 1990: return "1946–1990"
	if y <= 2008: return "1991–2008"
	if y <= 2015: return "2009–2015"
	if y <= 2018: return "2016–2018"
	if y <= 2022: return "2019–2022"
	return "2023–present"

	def predict_period(text: str):
	t = text.lower()

	# 1) explicit years
	years = [int(y) for y in re.findall(r"\b(1[89]\d{2}\|20\d{2})\b", t)]
	bucket_votes = []
	for y in years:
	bucket_votes.append(year_to_bucket(y))

	# 2) keyword hits
	for k, b in LEXICON.items():
	if k in t:
	bucket_votes.append(b)

	if not bucket_votes:
	# no clues → default to a broad recent bucket
	return "2023–present", {"reason": "no explicit clues", "votes": {}}

	counts = Counter(bucket_votes)
	best = counts.most_common(1)[0][0]
	return best, {"reason": "votes", "votes": dict(counts)}

	if __name__ == "__main__":
	txt = "Schools went remote during the pandemic and everyone wore N95 masks."
	pred, expl = predict_period(txt)
	print(pred, expl)