| """Configuration for the ML pipeline.""" |
|
|
| import os |
|
|
| DB = { |
| "host": os.getenv("DB_HOST", "127.0.0.1"), |
| "port": int(os.getenv("DB_PORT", "5432")), |
| "dbname": os.getenv("DB_NAME", "epstein_research"), |
| "user": os.getenv("DB_USER", "epstein"), |
| "password": os.getenv("DB_PASSWORD", "di9vrLaJGskZrvlDRQJPtDYiFf3UCPl"), |
| } |
|
|
| RAW_DIR = "/data/raw" |
|
|
| |
| SPACY_MODEL = "en_core_web_lg" |
|
|
| |
| ZERO_SHOT_MODEL = "facebook/bart-large-mnli" |
|
|
| |
| TOPIC_LABELS = [ |
| "national security", |
| "intelligence operations", |
| "assassination", |
| "military operations", |
| "civil rights", |
| "public health", |
| "government oversight", |
| "foreign policy", |
| "law enforcement", |
| "judicial proceedings", |
| "congressional legislation", |
| "scientific research", |
| "financial regulation", |
| "nuclear weapons", |
| "surveillance", |
| "propaganda", |
| "human experimentation", |
| "space exploration", |
| "terrorism", |
| "organized crime", |
| ] |
|
|
| |
| CONGRESS_DATES = { |
| 80: ("1947-01-03", "1949-01-03"), |
| 81: ("1949-01-03", "1951-01-03"), |
| 82: ("1951-01-03", "1953-01-03"), |
| 103: ("1993-01-05", "1995-01-03"), |
| 104: ("1995-01-04", "1997-01-03"), |
| 105: ("1997-01-07", "1999-01-03"), |
| 106: ("1999-01-06", "2001-01-03"), |
| 107: ("2001-01-03", "2003-01-03"), |
| 108: ("2003-01-07", "2005-01-03"), |
| 109: ("2005-01-04", "2007-01-03"), |
| 110: ("2007-01-04", "2009-01-03"), |
| 111: ("2009-01-06", "2011-01-03"), |
| 112: ("2011-01-05", "2013-01-03"), |
| 113: ("2013-01-03", "2015-01-03"), |
| 114: ("2015-01-06", "2017-01-03"), |
| 115: ("2017-01-03", "2019-01-03"), |
| 116: ("2019-01-03", "2021-01-03"), |
| 117: ("2021-01-03", "2023-01-03"), |
| 118: ("2023-01-03", "2025-01-03"), |
| 119: ("2025-01-03", "2027-01-03"), |
| } |
|
|
| |
| HISTORICAL_EVENTS = [ |
| { |
| "name": "Lincoln Assassination", |
| "start": "1865-04-14", |
| "end": "1865-07-07", |
| "category": "assassination", |
| "keywords": ["lincoln", "booth", "ford's theatre", "assassination", "conspirator"], |
| }, |
| { |
| "name": "Civil War End / Reconstruction", |
| "start": "1865-04-09", |
| "end": "1877-03-31", |
| "category": "war", |
| "keywords": ["reconstruction", "appomattox", "confederate", "freedmen", "civil war"], |
| }, |
| { |
| "name": "Bay of Pigs Invasion", |
| "start": "1961-04-17", |
| "end": "1961-04-20", |
| "category": "military", |
| "keywords": ["bay of pigs", "cuba", "castro", "brigade 2506"], |
| }, |
| { |
| "name": "Cuban Missile Crisis", |
| "start": "1962-10-16", |
| "end": "1962-10-28", |
| "category": "nuclear", |
| "keywords": ["cuban missile", "nuclear", "blockade", "khrushchev"], |
| }, |
| { |
| "name": "JFK Assassination", |
| "start": "1963-11-22", |
| "end": "1964-09-24", |
| "category": "assassination", |
| "keywords": ["kennedy", "oswald", "dallas", "warren commission", "grassy knoll", "dealey plaza"], |
| }, |
| { |
| "name": "Gulf of Tonkin Incident", |
| "start": "1964-08-02", |
| "end": "1964-08-07", |
| "category": "military", |
| "keywords": ["gulf of tonkin", "vietnam", "tonkin resolution"], |
| }, |
| { |
| "name": "MLK Assassination", |
| "start": "1968-04-04", |
| "end": "1968-04-04", |
| "category": "assassination", |
| "keywords": ["martin luther king", "mlk", "james earl ray", "memphis"], |
| }, |
| { |
| "name": "RFK Assassination", |
| "start": "1968-06-05", |
| "end": "1968-06-06", |
| "category": "assassination", |
| "keywords": ["robert kennedy", "rfk", "sirhan", "ambassador hotel"], |
| }, |
| { |
| "name": "Watergate Scandal", |
| "start": "1972-06-17", |
| "end": "1974-08-09", |
| "category": "scandal", |
| "keywords": ["watergate", "nixon", "impeach", "cover-up", "plumbers"], |
| }, |
| { |
| "name": "Church Committee Investigations", |
| "start": "1975-01-27", |
| "end": "1976-04-29", |
| "category": "oversight", |
| "keywords": ["church committee", "intelligence abuses", "cointelpro", "mkultra", "assassination plots"], |
| }, |
| { |
| "name": "MKUltra Program", |
| "start": "1953-04-13", |
| "end": "1973-01-01", |
| "category": "human_experimentation", |
| "keywords": ["mkultra", "mind control", "lsd", "behavioral", "gottlieb", "subproject"], |
| }, |
| { |
| "name": "CIA Stargate / Remote Viewing Program", |
| "start": "1978-01-01", |
| "end": "1995-06-30", |
| "category": "intelligence", |
| "keywords": ["stargate", "remote viewing", "psychic", "grill flame", "sun streak"], |
| }, |
| { |
| "name": "Iran-Contra Affair", |
| "start": "1985-08-01", |
| "end": "1987-11-18", |
| "category": "scandal", |
| "keywords": ["iran-contra", "contras", "nicaragua", "oliver north", "arms sales"], |
| }, |
| { |
| "name": "Area 51 / U-2 Program", |
| "start": "1955-01-01", |
| "end": "1998-12-31", |
| "category": "intelligence", |
| "keywords": ["area 51", "groom lake", "u-2", "oxcart", "a-12", "classified aircraft"], |
| }, |
| { |
| "name": "September 11 Attacks", |
| "start": "2001-09-11", |
| "end": "2001-12-31", |
| "category": "terrorism", |
| "keywords": ["september 11", "9/11", "world trade center", "pentagon", "al-qaeda", "bin laden"], |
| }, |
| { |
| "name": "PATRIOT Act Passage", |
| "start": "2001-10-26", |
| "end": "2001-10-26", |
| "category": "legislation", |
| "keywords": ["patriot act", "surveillance", "domestic spying", "fisa"], |
| }, |
| { |
| "name": "Iraq War Authorization", |
| "start": "2002-10-10", |
| "end": "2003-05-01", |
| "category": "military", |
| "keywords": ["iraq war", "weapons of mass destruction", "wmd", "saddam", "authorization for use"], |
| }, |
| { |
| "name": "Snowden NSA Revelations", |
| "start": "2013-06-05", |
| "end": "2013-12-31", |
| "category": "surveillance", |
| "keywords": ["snowden", "nsa", "prism", "mass surveillance", "metadata"], |
| }, |
| { |
| "name": "COVID-19 Pandemic", |
| "start": "2020-01-20", |
| "end": "2023-05-11", |
| "category": "pandemic", |
| "keywords": ["covid", "coronavirus", "pandemic", "lockdown", "vaccine"], |
| }, |
| { |
| "name": "January 6 Capitol Attack", |
| "start": "2021-01-06", |
| "end": "2022-12-22", |
| "category": "insurrection", |
| "keywords": ["january 6", "capitol", "insurrection", "electoral college", "certification"], |
| }, |
| ] |
|
|
| BATCH_SIZE = 500 |
|
|