LeonceNsh commited on
Commit
83031fa
·
verified ·
1 Parent(s): b80db6a

Upload database.py

Browse files
Files changed (1) hide show
  1. database.py +167 -0
database.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DuckDB database connection manager for the Gradio dashboard.
3
+ """
4
+
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ import duckdb
9
+ import pandas as pd
10
+
11
+
12
+ class Database:
13
+ """Manages DuckDB connection and query execution."""
14
+
15
+ def __init__(self, db_path: Optional[str] = None):
16
+ """
17
+ Initialize database connection.
18
+
19
+ Args:
20
+ db_path: Path to DuckDB file. Defaults to warehouse.duckdb in project root.
21
+ """
22
+ if db_path is None:
23
+ # Look for database in standard locations
24
+ possible_paths = [
25
+ Path(__file__).parent.parent / "warehouse.duckdb",
26
+ Path("warehouse.duckdb"),
27
+ Path("../warehouse.duckdb"),
28
+ ]
29
+ for path in possible_paths:
30
+ if path.exists():
31
+ db_path = str(path)
32
+ break
33
+
34
+ self.db_path = db_path
35
+ self._connection: Optional[duckdb.DuckDBPyConnection] = None
36
+
37
+ @property
38
+ def connection(self) -> duckdb.DuckDBPyConnection:
39
+ """Get or create database connection."""
40
+ if self._connection is None:
41
+ if self.db_path and Path(self.db_path).exists():
42
+ try:
43
+ self._connection = duckdb.connect(self.db_path, read_only=True)
44
+ except Exception as e:
45
+ print(f"Warning: Could not connect to {self.db_path}: {e}")
46
+ print("Falling back to in-memory database with sample data.")
47
+ self._connection = duckdb.connect(":memory:")
48
+ self._create_sample_data()
49
+ else:
50
+ # Create in-memory database with sample data for demo
51
+ self._connection = duckdb.connect(":memory:")
52
+ self._create_sample_data()
53
+ return self._connection
54
+
55
+ def query(self, sql: str) -> pd.DataFrame:
56
+ """Execute SQL query and return DataFrame."""
57
+ return self.connection.execute(sql).fetchdf()
58
+
59
+ def _create_sample_data(self) -> None:
60
+ """Create sample data for demo when no database file exists."""
61
+ conn = self._connection
62
+
63
+ # Create schema
64
+ conn.execute("CREATE SCHEMA IF NOT EXISTS main_marts")
65
+
66
+ # Create sample corridor flows
67
+ conn.execute("""
68
+ CREATE TABLE main_marts.fct_corridor_flows AS
69
+ SELECT
70
+ 'I-24' as corridor_id,
71
+ 'I-24 Main' as zone_name,
72
+ timestamp '2024-01-15 07:00:00' + interval (i) hour as hour_bucket,
73
+ CASE
74
+ WHEN (i % 24) BETWEEN 7 AND 9 THEN 'AM_PEAK'
75
+ WHEN (i % 24) BETWEEN 17 AND 19 THEN 'PM_PEAK'
76
+ ELSE 'OFF_PEAK'
77
+ END as time_period,
78
+ (random() * 500 + 200)::int as vehicle_count,
79
+ CASE
80
+ WHEN (i % 24) BETWEEN 7 AND 9 THEN 25 + random() * 15
81
+ WHEN (i % 24) BETWEEN 17 AND 19 THEN 20 + random() * 15
82
+ ELSE 55 + random() * 10
83
+ END as avg_speed_mph,
84
+ CASE
85
+ WHEN (i % 24) BETWEEN 7 AND 9 THEN 'D'
86
+ WHEN (i % 24) BETWEEN 17 AND 19 THEN 'E'
87
+ ELSE 'B'
88
+ END as level_of_service
89
+ FROM generate_series(0, 167) as t(i)
90
+ """)
91
+
92
+ # Create sample incentive events
93
+ conn.execute("""
94
+ CREATE TABLE main_marts.fct_incentive_events AS
95
+ SELECT
96
+ 'alloc_' || i as incentive_key,
97
+ 'agent_' || (random() * 1000)::int as agent_id,
98
+ 'run_001' as simulation_run_id,
99
+ CASE (i % 4)
100
+ WHEN 0 THEN 'CARPOOL'
101
+ WHEN 1 THEN 'PACER'
102
+ WHEN 2 THEN 'DEPARTURE_SHIFT'
103
+ ELSE 'TRANSIT'
104
+ END as incentive_type,
105
+ 2.0 + random() * 8 as offered_amount,
106
+ CASE WHEN random() > 0.3 THEN true ELSE false END as was_accepted,
107
+ CASE WHEN random() > 0.5 THEN true ELSE false END as was_completed,
108
+ CASE
109
+ WHEN random() > 0.5 THEN 'COMPLETED'
110
+ WHEN random() > 0.3 THEN 'ACCEPTED_PENDING'
111
+ ELSE 'REJECTED'
112
+ END as final_outcome,
113
+ (random() * 5)::decimal(10,2) as actual_payout
114
+ FROM generate_series(1, 500) as t(i)
115
+ """)
116
+
117
+ # Create sample elasticity metrics
118
+ conn.execute("""
119
+ CREATE TABLE main_marts.metrics_elasticity AS
120
+ SELECT
121
+ bucket as incentive_bucket,
122
+ (100 + idx * 50) as n_trips,
123
+ 0.1 + idx * 0.08 as carpool_rate,
124
+ idx * 1.5 as avg_incentive
125
+ FROM (
126
+ SELECT unnest(['NONE', 'LOW', 'MEDIUM', 'HIGH']) as bucket,
127
+ unnest([0, 1, 2, 3]) as idx
128
+ ) t
129
+ ORDER BY idx
130
+ """)
131
+
132
+ # Create sample scenario comparison
133
+ conn.execute("""
134
+ CREATE TABLE main_marts.fct_simulation_runs AS
135
+ SELECT
136
+ 'run_' || i as run_key,
137
+ CASE (i % 3)
138
+ WHEN 0 THEN 'Carpool Incentive'
139
+ WHEN 1 THEN 'Pacer Program'
140
+ ELSE 'Transit Promotion'
141
+ END as scenario_name,
142
+ 10000 as n_agents,
143
+ 45 + random() * 10 as treatment_avg_speed,
144
+ 42.0 as baseline_avg_speed,
145
+ (45 + random() * 10 - 42) / 42 * 100 as speed_improvement_pct,
146
+ 5 + random() * 15 as vmt_reduction_pct,
147
+ 3 + random() * 7 as peak_reduction_pct,
148
+ 5000 + random() * 5000 as treatment_spend
149
+ FROM generate_series(1, 10) as t(i)
150
+ """)
151
+
152
+
153
+ # Global database instance
154
+ _db: Optional[Database] = None
155
+
156
+
157
+ def get_database() -> Database:
158
+ """Get global database instance."""
159
+ global _db
160
+ if _db is None:
161
+ _db = Database()
162
+ return _db
163
+
164
+
165
+ def query(sql: str) -> pd.DataFrame:
166
+ """Execute SQL query using global database."""
167
+ return get_database().query(sql)