File size: 5,198 Bytes
9daab04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48486c6
 
 
 
 
 
 
 
 
330e927
bc1ac55
 
 
 
48486c6
 
330e927
bc1ac55
48486c6
 
9daab04
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import logging
from api.database import db
import asyncio

log = logging.getLogger("api.bootstrap")

TABLE_ACTIVE_CONFLICTS = """
CREATE TABLE IF NOT EXISTS active_conflicts (
  conflict_id   SERIAL PRIMARY KEY,
  name          VARCHAR(200),
  countries     TEXT[],
  region        VARCHAR(100),
  start_date    DATE,
  status        VARCHAR(20) DEFAULT 'ACTIVE',
  intensity     VARCHAR(20) DEFAULT 'CRISIS',
  total_events  INTEGER DEFAULT 0,
  last_event_at TIMESTAMP DEFAULT NOW()
);
"""

TABLE_CONFLICT_EVENTS = """
CREATE TABLE IF NOT EXISTS conflict_events (
  id              BIGSERIAL PRIMARY KEY,
  event_id        VARCHAR(100) UNIQUE NOT NULL,
  source          VARCHAR(20)  NOT NULL,
  source_reliability VARCHAR(10) DEFAULT 'MEDIUM',
  event_time      TIMESTAMP NOT NULL,
  event_date      DATE      NOT NULL,
  country         VARCHAR(100) NOT NULL,
  country_iso3    CHAR(3)      NOT NULL,
  region          VARCHAR(100),
  admin1          VARCHAR(100),
  admin2          VARCHAR(100),
  city            VARCHAR(100),
  lat             DECIMAL(9,6),
  lon             DECIMAL(10,6),
  geom            GEOGRAPHY(POINT,4326),
  geo_precision   SMALLINT DEFAULT 3,
  event_type      VARCHAR(50),
  event_subtype   VARCHAR(80),
  interaction_code VARCHAR(60),
  actor1          VARCHAR(200),
  actor1_type     VARCHAR(50),
  actor2          VARCHAR(200),
  actor2_type     VARCHAR(50),
  fatalities      INTEGER DEFAULT 0,
  fatalities_civilians INTEGER DEFAULT 0,
  fatalities_confidence VARCHAR(10) DEFAULT 'LOW',
  severity        VARCHAR(20),
  severity_score  DECIMAL(4,2),
  title           VARCHAR(500),
  notes           TEXT,
  tags            TEXT[],
  source_url      TEXT,
  conflict_name   VARCHAR(200),
  conflict_id     INTEGER REFERENCES active_conflicts(conflict_id),
  category        VARCHAR(20) DEFAULT 'GENERAL',
  ingested_at     TIMESTAMP DEFAULT NOW()
);
"""

TABLE_INTEL_ARTICLES = """
CREATE TABLE IF NOT EXISTS intel_articles (
  id              SERIAL PRIMARY KEY,
  title           VARCHAR(255) NOT NULL,
  content         TEXT NOT NULL,
  author          VARCHAR(100),
  tags            TEXT[],
  created_at      TIMESTAMP DEFAULT NOW(),
  updated_at      TIMESTAMP DEFAULT NOW()
);
"""

SEARCH_VECTOR_ADD = """
DO $$ 
BEGIN 
  IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='intel_articles' AND column_name='search_vector') THEN
    ALTER TABLE intel_articles ADD COLUMN search_vector TSVECTOR 
    GENERATED ALWAYS AS (to_tsvector('english', title || ' ' || content)) STORED;
  END IF;
END $$;
"""

async def bootstrap_db():
    log.info("Starting database bootstrap process...")
    try:
        async with db.pool.acquire() as conn:
            # 1. Extensions
            await conn.execute("CREATE EXTENSION IF NOT EXISTS postgis;")
            
            # 2. Tables
            await conn.execute(TABLE_ACTIVE_CONFLICTS)
            await conn.execute(TABLE_CONFLICT_EVENTS)
            await conn.execute(TABLE_INTEL_ARTICLES)
            
            # 3. Features
            await conn.execute(SEARCH_VECTOR_ADD)
            
            # 4. Indexes (Safe Creation)
            await conn.execute("CREATE INDEX IF NOT EXISTS idx_event_time ON conflict_events(event_time DESC);")
            await conn.execute("CREATE INDEX IF NOT EXISTS idx_country_time ON conflict_events(country_iso3, event_time DESC);")
            await conn.execute("CREATE INDEX IF NOT EXISTS idx_geom ON conflict_events USING GIST(geom);")
            await conn.execute("CREATE INDEX IF NOT EXISTS idx_articles_search ON intel_articles USING GIN(search_vector);")
            
            # 5. Geocoding Metadata V2 Migration
            log.info("Checking for Geospatial Metadata V2 columns...")
            await conn.execute("""
                ALTER TABLE conflict_events 
                ADD COLUMN IF NOT EXISTS geo_confidence NUMERIC(4,3),
                ADD COLUMN IF NOT EXISTS geo_method VARCHAR(40),
                ADD COLUMN IF NOT EXISTS geocode_provider VARCHAR(40),
                ADD COLUMN IF NOT EXISTS location_raw TEXT,
                ADD COLUMN IF NOT EXISTS location_admin1 VARCHAR(100),
                ADD COLUMN IF NOT EXISTS geo_validation_flags TEXT[],
                ADD COLUMN IF NOT EXISTS ai_analysis TEXT,
                ADD COLUMN IF NOT EXISTS verification_count INTEGER DEFAULT 1,
                ADD COLUMN IF NOT EXISTS source_urls TEXT[] DEFAULT '{}',
                ADD COLUMN IF NOT EXISTS strategic_relevance VARCHAR(15) DEFAULT 'LOW';
                
                CREATE INDEX IF NOT EXISTS idx_geo_confidence ON conflict_events(geo_confidence);
                CREATE INDEX IF NOT EXISTS idx_ai_analysis ON conflict_events USING GIN(to_tsvector('english', COALESCE(ai_analysis, '')));
                CREATE INDEX IF NOT EXISTS idx_verification ON conflict_events(verification_count DESC);
            """)
            
            log.info("[Bootstrap] Database schema validation and initialization complete.")
    except Exception as e:
        log.error(f"[Bootstrap] Critical Failure: {e}")
        # We don't raise here to allow the API to heartbeat even if DB is partially broken