Spaces:

DelaliScratchwerk
/

TextPeriod_Summarization

Sleeping

File size: 1,801 Bytes

410a5d4

import re
from collections import Counter

BUCKETS = ["pre-1900","1900–1945","1946–1990","1991–2008","2009–2015","2016–2018","2019–2022","2023–present"]

# Very small keyword→bucket map to start (extend this over time)
LEXICON = {
  "covid": "2019–2022", "covid-19": "2019–2022", "sars-cov-2": "2019–2022",
  "lockdown": "2019–2022", "n95": "2019–2022", "zoom": "2019–2022",
  "myspace": "1991–2008", "iraq war": "1991–2008", "y2k": "1991–2008",
  "tik tok": "2023–present", "tiktok": "2023–present", "chatgpt": "2023–present",
  "vietnam war": "1946–1990", "sputnik": "1946–1990", "cold war": "1946–1990",
}

def year_to_bucket(y: int) -> str:
    if y < 1900: return "pre-1900"
    if y <= 1945: return "1900–1945"
    if y <= 1990: return "1946–1990"
    if y <= 2008: return "1991–2008"
    if y <= 2015: return "2009–2015"
    if y <= 2018: return "2016–2018"
    if y <= 2022: return "2019–2022"
    return "2023–present"

def predict_period(text: str):
    t = text.lower()

    # 1) explicit years
    years = [int(y) for y in re.findall(r"\b(1[89]\d{2}|20\d{2})\b", t)]
    bucket_votes = []
    for y in years:
        bucket_votes.append(year_to_bucket(y))

    # 2) keyword hits
    for k, b in LEXICON.items():
        if k in t:
            bucket_votes.append(b)

    if not bucket_votes:
        # no clues → default to a broad recent bucket
        return "2023–present", {"reason": "no explicit clues", "votes": {}}

    counts = Counter(bucket_votes)
    best = counts.most_common(1)[0][0]
    return best, {"reason": "votes", "votes": dict(counts)}

if __name__ == "__main__":
    txt = "Schools went remote during the pandemic and everyone wore N95 masks."
    pred, expl = predict_period(txt)
    print(pred, expl)