| |
| |
| |
|
|
| BEGIN; |
|
|
| |
| CREATE TABLE IF NOT EXISTS historical_events ( |
| id SERIAL PRIMARY KEY, |
| event_name TEXT NOT NULL UNIQUE, |
| start_date DATE NOT NULL, |
| end_date DATE, |
| category VARCHAR(50), |
| description TEXT, |
| keywords JSONB DEFAULT '[]' |
| ); |
|
|
| |
| CREATE TABLE IF NOT EXISTS document_events ( |
| document_id INT NOT NULL REFERENCES documents(id) ON DELETE CASCADE, |
| event_id INT NOT NULL REFERENCES historical_events(id) ON DELETE CASCADE, |
| relevance_score REAL NOT NULL DEFAULT 0, |
| match_methods JSONB DEFAULT '[]', |
| details JSONB DEFAULT '{}', |
| created_at TIMESTAMP DEFAULT NOW(), |
| PRIMARY KEY (document_id, event_id) |
| ); |
|
|
| CREATE INDEX IF NOT EXISTS idx_document_events_event ON document_events(event_id); |
| CREATE INDEX IF NOT EXISTS idx_document_events_score ON document_events(relevance_score DESC); |
|
|
| |
| CREATE TABLE IF NOT EXISTS document_dates ( |
| document_id INT PRIMARY KEY REFERENCES documents(id) ON DELETE CASCADE, |
| estimated_date DATE, |
| date_source VARCHAR(30), |
| date_confidence REAL, |
| date_range_start DATE, |
| date_range_end DATE, |
| congress_session INT, |
| created_at TIMESTAMP DEFAULT NOW() |
| ); |
|
|
| CREATE INDEX IF NOT EXISTS idx_document_dates_date ON document_dates(estimated_date); |
| CREATE INDEX IF NOT EXISTS idx_document_dates_congress ON document_dates(congress_session); |
|
|
| |
| CREATE TABLE IF NOT EXISTS ml_pipeline_log ( |
| id SERIAL PRIMARY KEY, |
| pipeline TEXT NOT NULL, |
| started_at TIMESTAMP DEFAULT NOW(), |
| finished_at TIMESTAMP, |
| docs_processed INT DEFAULT 0, |
| status TEXT DEFAULT 'running' |
| ); |
|
|
| COMMIT; |
|
|