Spaces:
Sleeping
Sleeping
File size: 6,831 Bytes
558db1e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | import os
import sqlite3
import time
import pandas as pd
from typing import Optional
from sqlalchemy import create_engine, Column, String, Float, Date, UniqueConstraint, DateTime, JSON
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.dialects.postgresql import insert
from config import logger, OUTPUT_DIR
Base = declarative_base()
class DailyPrice(Base):
__tablename__ = 'daily_prices'
ticker = Column(String, primary_key=True)
date = Column(Date, primary_key=True)
close_price = Column(Float, nullable=False)
__table_args__ = (UniqueConstraint('ticker', 'date', name='uq_daily_prices_ticker_date'),)
class DailyYield(Base):
__tablename__ = 'daily_yields'
ticker = Column(String, primary_key=True)
date = Column(Date, primary_key=True)
yield_pct = Column(Float)
__table_args__ = (UniqueConstraint('ticker', 'date', name='uq_daily_yields_ticker_date'),)
class StitchMetadata(Base):
__tablename__ = 'stitch_metadata'
ticker = Column(String, primary_key=True)
date = Column(Date, primary_key=True)
source = Column(String) # 'direct', 'proxy_stitched', 'synthetic'
proxy_used = Column(String)
adjustment_factor = Column(Float)
__table_args__ = (UniqueConstraint('ticker', 'date', name='uq_stitch_metadata_ticker_date'),)
import uuid
import datetime
class AuditLog(Base):
__tablename__ = 'audit_log'
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String, nullable=True)
endpoint = Column(String, nullable=False)
request_hash = Column(String, nullable=True)
request_body = Column(JSON, nullable=True)
response_weights = Column(JSON, nullable=True)
timestamp = Column(DateTime, default=datetime.datetime.utcnow)
ip_address = Column(String, nullable=True)
class ApiKey(Base):
__tablename__ = 'api_keys'
key = Column(String, primary_key=True)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.utcnow)
expires_at = Column(DateTime, nullable=False)
revoked = Column(String, default="false") # SQLite boolean compat
used_at = Column(DateTime, nullable=True)
used_by_ip = Column(String, nullable=True)
_ENGINE = None
def with_db_retry(max_retries=3):
"""Decorator to retry database operations on transient failures."""
def decorator(func):
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except SQLAlchemyError as e:
if attempt == max_retries - 1:
raise
logger.warning(f"Database operation failed: {e}. Retrying ({attempt + 1}/{max_retries})...")
time.sleep(1.0 * (2 ** attempt)) # Exponential backoff
return wrapper
return decorator
def get_pg_engine():
"""
Creates and returns a singleton SQLAlchemy engine for PostgreSQL.
Expects DATABASE_URL to be set in the environment, falling back to local defaults if missing.
"""
global _ENGINE
if _ENGINE is not None:
return _ENGINE
db_url = os.getenv("DATABASE_URL")
if not db_url:
db_url = f"sqlite:///{os.path.join(OUTPUT_DIR, 'portfolio_db.sqlite3')}"
if db_url.startswith("sqlite"):
_ENGINE = create_engine(db_url, echo=False)
else:
_ENGINE = create_engine(db_url, echo=False, pool_size=10, max_overflow=20, pool_pre_ping=True, pool_recycle=3600)
return _ENGINE
def init_db():
"""Initializes the database schema (Creates tables if they don't exist)."""
engine = get_pg_engine()
Base.metadata.create_all(engine)
logger.info("PostgreSQL Database schema initialized.")
def migrate_sqlite_to_postgres(sqlite_path: Optional[str] = None):
"""
Reads the legacy SQLite finance database and bulk inserts all historical
price and yield records into the new PostgreSQL database.
"""
if sqlite_path is None:
sqlite_path = os.path.join(OUTPUT_DIR, "finance_data.db")
if not os.path.exists(sqlite_path):
logger.warning(f"Legacy SQLite database not found at {sqlite_path}. Nothing to migrate.")
return
logger.info(f"Starting migration from SQLite ({sqlite_path}) to PostgreSQL...")
# 1. Connect to SQLite
sqlite_conn = sqlite3.connect(sqlite_path)
# 2. Extract Data
try:
prices_df = pd.read_sql("SELECT ticker, date, close_price FROM daily_prices", sqlite_conn)
logger.info(f"Extracted {len(prices_df)} records from SQLite daily_prices.")
except Exception as e:
logger.warning(f"Could not read daily_prices from SQLite: {e}")
prices_df = pd.DataFrame()
try:
yields_df = pd.read_sql("SELECT ticker, date, yield_pct FROM daily_yields", sqlite_conn)
logger.info(f"Extracted {len(yields_df)} records from SQLite daily_yields.")
except Exception as e:
logger.warning(f"Could not read daily_yields from SQLite: {e}")
yields_df = pd.DataFrame()
sqlite_conn.close()
# 3. Connect to Postgres & Initialize schema
init_db()
pg_engine = get_pg_engine()
# 4. Transform and Load
Session = sessionmaker(bind=pg_engine)
session = Session()
try:
# We use pd.DataFrame.to_sql for massive bulk insert performance.
# Convert date strings to actual dates first
def insert_on_conflict_nothing(table, conn, keys, data_iter):
data = [dict(zip(keys, row)) for row in data_iter]
stmt = insert(table.table).values(data).on_conflict_do_nothing()
result = conn.execute(stmt)
return result.rowcount
if not prices_df.empty:
prices_df['date'] = pd.to_datetime(prices_df['date']).dt.date
prices_df.to_sql('daily_prices', pg_engine, if_exists='append', index=False, method=insert_on_conflict_nothing, chunksize=10000)
logger.info("Successfully migrated daily_prices to PostgreSQL.")
if not yields_df.empty:
yields_df['date'] = pd.to_datetime(yields_df['date']).dt.date
yields_df.to_sql('daily_yields', pg_engine, if_exists='append', index=False, method=insert_on_conflict_nothing, chunksize=10000)
logger.info("Successfully migrated daily_yields to PostgreSQL.")
except Exception as e:
logger.error(f"Migration failed during PostgreSQL insertion: {e}")
session.rollback()
finally:
session.close()
logger.info("Migration routine complete.")
if __name__ == "__main__":
# If run standalone, execute the migration
migrate_sqlite_to_postgres()
|